blob: e5fd601a2d91a2109df398df2217df915609c75e [file] [log] [blame]
Jim Cownie5e8470a2013-09-27 10:38:44 +00001/*
2 * kmp_lock.cpp -- lock-related functions
Jim Cownie5e8470a2013-09-27 10:38:44 +00003 */
4
5
6//===----------------------------------------------------------------------===//
7//
8// The LLVM Compiler Infrastructure
9//
10// This file is dual licensed under the MIT and the University of Illinois Open
11// Source Licenses. See LICENSE.txt for details.
12//
13//===----------------------------------------------------------------------===//
14
15
16#include <stddef.h>
Paul Osmialowskif7cc6af2016-05-31 20:20:32 +000017#include <atomic>
Jim Cownie5e8470a2013-09-27 10:38:44 +000018
19#include "kmp.h"
20#include "kmp_itt.h"
21#include "kmp_i18n.h"
22#include "kmp_lock.h"
23#include "kmp_io.h"
24
Paul Osmialowskifb043fd2016-05-16 09:44:11 +000025#if KMP_USE_FUTEX
Jim Cownie5e8470a2013-09-27 10:38:44 +000026# include <unistd.h>
27# include <sys/syscall.h>
28// We should really include <futex.h>, but that causes compatibility problems on different
29// Linux* OS distributions that either require that you include (or break when you try to include)
30// <pci/types.h>.
31// Since all we need is the two macros below (which are part of the kernel ABI, so can't change)
32// we just define the constants here and don't include <futex.h>
33# ifndef FUTEX_WAIT
34# define FUTEX_WAIT 0
35# endif
36# ifndef FUTEX_WAKE
37# define FUTEX_WAKE 1
38# endif
39#endif
40
Jim Cownie5e8470a2013-09-27 10:38:44 +000041/* Implement spin locks for internal library use. */
42/* The algorithm implemented is Lamport's bakery lock [1974]. */
43
44void
45__kmp_validate_locks( void )
46{
47 int i;
48 kmp_uint32 x, y;
49
50 /* Check to make sure unsigned arithmetic does wraps properly */
51 x = ~((kmp_uint32) 0) - 2;
52 y = x - 2;
53
54 for (i = 0; i < 8; ++i, ++x, ++y) {
55 kmp_uint32 z = (x - y);
56 KMP_ASSERT( z == 2 );
57 }
58
59 KMP_ASSERT( offsetof( kmp_base_queuing_lock, tail_id ) % 8 == 0 );
60}
61
62
63/* ------------------------------------------------------------------------ */
64/* test and set locks */
65
66//
67// For the non-nested locks, we can only assume that the first 4 bytes were
68// allocated, since gcc only allocates 4 bytes for omp_lock_t, and the Intel
69// compiler only allocates a 4 byte pointer on IA-32 architecture. On
70// Windows* OS on Intel(R) 64, we can assume that all 8 bytes were allocated.
71//
72// gcc reserves >= 8 bytes for nested locks, so we can assume that the
73// entire 8 bytes were allocated for nested locks on all 64-bit platforms.
74//
75
76static kmp_int32
77__kmp_get_tas_lock_owner( kmp_tas_lock_t *lck )
78{
Jonathan Peytonf2d119f2015-12-03 19:37:20 +000079 return KMP_LOCK_STRIP(TCR_4( lck->lk.poll )) - 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +000080}
81
82static inline bool
83__kmp_is_tas_lock_nestable( kmp_tas_lock_t *lck )
84{
85 return lck->lk.depth_locked != -1;
86}
87
Jonathan Peyton0e6d4572015-10-16 16:52:58 +000088__forceinline static int
Jim Cownie5e8470a2013-09-27 10:38:44 +000089__kmp_acquire_tas_lock_timed_template( kmp_tas_lock_t *lck, kmp_int32 gtid )
90{
91 KMP_MB();
92
93#ifdef USE_LOCK_PROFILE
94 kmp_uint32 curr = TCR_4( lck->lk.poll );
95 if ( ( curr != 0 ) && ( curr != gtid + 1 ) )
96 __kmp_printf( "LOCK CONTENTION: %p\n", lck );
97 /* else __kmp_printf( "." );*/
98#endif /* USE_LOCK_PROFILE */
99
Jonathan Peytonf2d119f2015-12-03 19:37:20 +0000100 if ( ( lck->lk.poll == KMP_LOCK_FREE(tas) )
101 && KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), KMP_LOCK_FREE(tas), KMP_LOCK_BUSY(gtid+1, tas) ) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000102 KMP_FSYNC_ACQUIRED(lck);
Jonathan Peyton0e6d4572015-10-16 16:52:58 +0000103 return KMP_LOCK_ACQUIRED_FIRST;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000104 }
105
106 kmp_uint32 spins;
107 KMP_FSYNC_PREPARE( lck );
108 KMP_INIT_YIELD( spins );
109 if ( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc :
110 __kmp_xproc ) ) {
111 KMP_YIELD( TRUE );
112 }
113 else {
114 KMP_YIELD_SPIN( spins );
115 }
116
Jonathan Peyton377aa402016-04-14 16:00:37 +0000117 kmp_backoff_t backoff = __kmp_spin_backoff_params;
Jonathan Peytonf2d119f2015-12-03 19:37:20 +0000118 while ( ( lck->lk.poll != KMP_LOCK_FREE(tas) ) ||
119 ( ! KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), KMP_LOCK_FREE(tas), KMP_LOCK_BUSY(gtid+1, tas) ) ) ) {
Jonathan Peyton377aa402016-04-14 16:00:37 +0000120
121 __kmp_spin_backoff(&backoff);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000122 if ( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc :
123 __kmp_xproc ) ) {
124 KMP_YIELD( TRUE );
125 }
126 else {
127 KMP_YIELD_SPIN( spins );
128 }
129 }
130 KMP_FSYNC_ACQUIRED( lck );
Jonathan Peyton0e6d4572015-10-16 16:52:58 +0000131 return KMP_LOCK_ACQUIRED_FIRST;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000132}
133
Jonathan Peyton0e6d4572015-10-16 16:52:58 +0000134int
Jim Cownie5e8470a2013-09-27 10:38:44 +0000135__kmp_acquire_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
136{
Jonathan Peyton0e6d4572015-10-16 16:52:58 +0000137 return __kmp_acquire_tas_lock_timed_template( lck, gtid );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000138}
139
Jonathan Peyton0e6d4572015-10-16 16:52:58 +0000140static int
Jim Cownie5e8470a2013-09-27 10:38:44 +0000141__kmp_acquire_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
142{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000143 char const * const func = "omp_set_lock";
144 if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE )
145 && __kmp_is_tas_lock_nestable( lck ) ) {
146 KMP_FATAL( LockNestableUsedAsSimple, func );
147 }
148 if ( ( gtid >= 0 ) && ( __kmp_get_tas_lock_owner( lck ) == gtid ) ) {
149 KMP_FATAL( LockIsAlreadyOwned, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000150 }
Jonathan Peyton0e6d4572015-10-16 16:52:58 +0000151 return __kmp_acquire_tas_lock( lck, gtid );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000152}
153
154int
155__kmp_test_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
156{
Jonathan Peytonf2d119f2015-12-03 19:37:20 +0000157 if ( ( lck->lk.poll == KMP_LOCK_FREE(tas) )
158 && KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), KMP_LOCK_FREE(tas), KMP_LOCK_BUSY(gtid+1, tas) ) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000159 KMP_FSYNC_ACQUIRED( lck );
160 return TRUE;
161 }
162 return FALSE;
163}
164
165static int
166__kmp_test_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
167{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000168 char const * const func = "omp_test_lock";
169 if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE )
170 && __kmp_is_tas_lock_nestable( lck ) ) {
171 KMP_FATAL( LockNestableUsedAsSimple, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000172 }
173 return __kmp_test_tas_lock( lck, gtid );
174}
175
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000176int
Jim Cownie5e8470a2013-09-27 10:38:44 +0000177__kmp_release_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
178{
179 KMP_MB(); /* Flush all pending memory write invalidates. */
180
181 KMP_FSYNC_RELEASING(lck);
Jonathan Peytonf2d119f2015-12-03 19:37:20 +0000182 KMP_ST_REL32( &(lck->lk.poll), KMP_LOCK_FREE(tas) );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000183 KMP_MB(); /* Flush all pending memory write invalidates. */
184
185 KMP_YIELD( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc :
186 __kmp_xproc ) );
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000187 return KMP_LOCK_RELEASED;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000188}
189
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000190static int
Jim Cownie5e8470a2013-09-27 10:38:44 +0000191__kmp_release_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
192{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000193 char const * const func = "omp_unset_lock";
194 KMP_MB(); /* in case another processor initialized lock */
195 if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE )
196 && __kmp_is_tas_lock_nestable( lck ) ) {
197 KMP_FATAL( LockNestableUsedAsSimple, func );
198 }
199 if ( __kmp_get_tas_lock_owner( lck ) == -1 ) {
200 KMP_FATAL( LockUnsettingFree, func );
201 }
202 if ( ( gtid >= 0 ) && ( __kmp_get_tas_lock_owner( lck ) >= 0 )
203 && ( __kmp_get_tas_lock_owner( lck ) != gtid ) ) {
204 KMP_FATAL( LockUnsettingSetByAnother, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000205 }
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000206 return __kmp_release_tas_lock( lck, gtid );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000207}
208
209void
210__kmp_init_tas_lock( kmp_tas_lock_t * lck )
211{
Jonathan Peytonf2d119f2015-12-03 19:37:20 +0000212 TCW_4( lck->lk.poll, KMP_LOCK_FREE(tas) );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000213}
214
215static void
216__kmp_init_tas_lock_with_checks( kmp_tas_lock_t * lck )
217{
218 __kmp_init_tas_lock( lck );
219}
220
221void
222__kmp_destroy_tas_lock( kmp_tas_lock_t *lck )
223{
224 lck->lk.poll = 0;
225}
226
227static void
228__kmp_destroy_tas_lock_with_checks( kmp_tas_lock_t *lck )
229{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000230 char const * const func = "omp_destroy_lock";
231 if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE )
232 && __kmp_is_tas_lock_nestable( lck ) ) {
233 KMP_FATAL( LockNestableUsedAsSimple, func );
234 }
235 if ( __kmp_get_tas_lock_owner( lck ) != -1 ) {
236 KMP_FATAL( LockStillOwned, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000237 }
238 __kmp_destroy_tas_lock( lck );
239}
240
241
242//
243// nested test and set locks
244//
245
Jonathan Peyton0e6d4572015-10-16 16:52:58 +0000246int
Jim Cownie5e8470a2013-09-27 10:38:44 +0000247__kmp_acquire_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
248{
249 KMP_DEBUG_ASSERT( gtid >= 0 );
250
251 if ( __kmp_get_tas_lock_owner( lck ) == gtid ) {
252 lck->lk.depth_locked += 1;
Jonathan Peyton0e6d4572015-10-16 16:52:58 +0000253 return KMP_LOCK_ACQUIRED_NEXT;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000254 }
255 else {
256 __kmp_acquire_tas_lock_timed_template( lck, gtid );
257 lck->lk.depth_locked = 1;
Jonathan Peyton0e6d4572015-10-16 16:52:58 +0000258 return KMP_LOCK_ACQUIRED_FIRST;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000259 }
260}
261
Jonathan Peyton0e6d4572015-10-16 16:52:58 +0000262static int
Jim Cownie5e8470a2013-09-27 10:38:44 +0000263__kmp_acquire_nested_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
264{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000265 char const * const func = "omp_set_nest_lock";
266 if ( ! __kmp_is_tas_lock_nestable( lck ) ) {
267 KMP_FATAL( LockSimpleUsedAsNestable, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000268 }
Jonathan Peyton0e6d4572015-10-16 16:52:58 +0000269 return __kmp_acquire_nested_tas_lock( lck, gtid );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000270}
271
272int
273__kmp_test_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
274{
275 int retval;
276
277 KMP_DEBUG_ASSERT( gtid >= 0 );
278
279 if ( __kmp_get_tas_lock_owner( lck ) == gtid ) {
280 retval = ++lck->lk.depth_locked;
281 }
282 else if ( !__kmp_test_tas_lock( lck, gtid ) ) {
283 retval = 0;
284 }
285 else {
286 KMP_MB();
287 retval = lck->lk.depth_locked = 1;
288 }
289 return retval;
290}
291
292static int
293__kmp_test_nested_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
294{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000295 char const * const func = "omp_test_nest_lock";
296 if ( ! __kmp_is_tas_lock_nestable( lck ) ) {
297 KMP_FATAL( LockSimpleUsedAsNestable, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000298 }
299 return __kmp_test_nested_tas_lock( lck, gtid );
300}
301
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000302int
Jim Cownie5e8470a2013-09-27 10:38:44 +0000303__kmp_release_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
304{
305 KMP_DEBUG_ASSERT( gtid >= 0 );
306
307 KMP_MB();
308 if ( --(lck->lk.depth_locked) == 0 ) {
309 __kmp_release_tas_lock( lck, gtid );
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000310 return KMP_LOCK_RELEASED;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000311 }
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000312 return KMP_LOCK_STILL_HELD;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000313}
314
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000315static int
Jim Cownie5e8470a2013-09-27 10:38:44 +0000316__kmp_release_nested_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
317{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000318 char const * const func = "omp_unset_nest_lock";
319 KMP_MB(); /* in case another processor initialized lock */
320 if ( ! __kmp_is_tas_lock_nestable( lck ) ) {
321 KMP_FATAL( LockSimpleUsedAsNestable, func );
322 }
323 if ( __kmp_get_tas_lock_owner( lck ) == -1 ) {
324 KMP_FATAL( LockUnsettingFree, func );
325 }
326 if ( __kmp_get_tas_lock_owner( lck ) != gtid ) {
327 KMP_FATAL( LockUnsettingSetByAnother, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000328 }
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000329 return __kmp_release_nested_tas_lock( lck, gtid );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000330}
331
332void
333__kmp_init_nested_tas_lock( kmp_tas_lock_t * lck )
334{
335 __kmp_init_tas_lock( lck );
336 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
337}
338
339static void
340__kmp_init_nested_tas_lock_with_checks( kmp_tas_lock_t * lck )
341{
342 __kmp_init_nested_tas_lock( lck );
343}
344
345void
346__kmp_destroy_nested_tas_lock( kmp_tas_lock_t *lck )
347{
348 __kmp_destroy_tas_lock( lck );
349 lck->lk.depth_locked = 0;
350}
351
352static void
353__kmp_destroy_nested_tas_lock_with_checks( kmp_tas_lock_t *lck )
354{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000355 char const * const func = "omp_destroy_nest_lock";
356 if ( ! __kmp_is_tas_lock_nestable( lck ) ) {
357 KMP_FATAL( LockSimpleUsedAsNestable, func );
358 }
359 if ( __kmp_get_tas_lock_owner( lck ) != -1 ) {
360 KMP_FATAL( LockStillOwned, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000361 }
362 __kmp_destroy_nested_tas_lock( lck );
363}
364
365
Paul Osmialowskifb043fd2016-05-16 09:44:11 +0000366#if KMP_USE_FUTEX
Jim Cownie5e8470a2013-09-27 10:38:44 +0000367
368/* ------------------------------------------------------------------------ */
369/* futex locks */
370
371// futex locks are really just test and set locks, with a different method
372// of handling contention. They take the same amount of space as test and
373// set locks, and are allocated the same way (i.e. use the area allocated by
374// the compiler for non-nested locks / allocate nested locks on the heap).
375
376static kmp_int32
377__kmp_get_futex_lock_owner( kmp_futex_lock_t *lck )
378{
Jonathan Peytonf2d119f2015-12-03 19:37:20 +0000379 return KMP_LOCK_STRIP(( TCR_4( lck->lk.poll ) >> 1 )) - 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000380}
381
382static inline bool
383__kmp_is_futex_lock_nestable( kmp_futex_lock_t *lck )
384{
385 return lck->lk.depth_locked != -1;
386}
387
Jonathan Peyton0e6d4572015-10-16 16:52:58 +0000388__forceinline static int
Jim Cownie5e8470a2013-09-27 10:38:44 +0000389__kmp_acquire_futex_lock_timed_template( kmp_futex_lock_t *lck, kmp_int32 gtid )
390{
391 kmp_int32 gtid_code = ( gtid + 1 ) << 1;
392
393 KMP_MB();
394
395#ifdef USE_LOCK_PROFILE
396 kmp_uint32 curr = TCR_4( lck->lk.poll );
397 if ( ( curr != 0 ) && ( curr != gtid_code ) )
398 __kmp_printf( "LOCK CONTENTION: %p\n", lck );
399 /* else __kmp_printf( "." );*/
400#endif /* USE_LOCK_PROFILE */
401
402 KMP_FSYNC_PREPARE( lck );
403 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d entering\n",
404 lck, lck->lk.poll, gtid ) );
405
406 kmp_int32 poll_val;
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000407
Jonathan Peytonf2d119f2015-12-03 19:37:20 +0000408 while ( ( poll_val = KMP_COMPARE_AND_STORE_RET32( & ( lck->lk.poll ), KMP_LOCK_FREE(futex),
409 KMP_LOCK_BUSY(gtid_code, futex) ) ) != KMP_LOCK_FREE(futex) ) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000410
Jonathan Peytonf2d119f2015-12-03 19:37:20 +0000411 kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000412 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d poll_val = 0x%x cond = 0x%x\n",
413 lck, gtid, poll_val, cond ) );
414
415 //
416 // NOTE: if you try to use the following condition for this branch
417 //
418 // if ( poll_val & 1 == 0 )
419 //
420 // Then the 12.0 compiler has a bug where the following block will
421 // always be skipped, regardless of the value of the LSB of poll_val.
422 //
423 if ( ! cond ) {
424 //
425 // Try to set the lsb in the poll to indicate to the owner
426 // thread that they need to wake this thread up.
427 //
Jonathan Peytonf2d119f2015-12-03 19:37:20 +0000428 if ( ! KMP_COMPARE_AND_STORE_REL32( & ( lck->lk.poll ), poll_val, poll_val | KMP_LOCK_BUSY(1, futex) ) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000429 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d can't set bit 0\n",
430 lck, lck->lk.poll, gtid ) );
431 continue;
432 }
Jonathan Peytonf2d119f2015-12-03 19:37:20 +0000433 poll_val |= KMP_LOCK_BUSY(1, futex);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000434
435 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d bit 0 set\n",
436 lck, lck->lk.poll, gtid ) );
437 }
438
439 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d before futex_wait(0x%x)\n",
440 lck, gtid, poll_val ) );
441
442 kmp_int32 rc;
443 if ( ( rc = syscall( __NR_futex, & ( lck->lk.poll ), FUTEX_WAIT,
444 poll_val, NULL, NULL, 0 ) ) != 0 ) {
445 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d futex_wait(0x%x) failed (rc=%d errno=%d)\n",
446 lck, gtid, poll_val, rc, errno ) );
447 continue;
448 }
449
450 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d after futex_wait(0x%x)\n",
451 lck, gtid, poll_val ) );
452 //
Alp Toker8f2d3f02014-02-24 10:40:15 +0000453 // This thread has now done a successful futex wait call and was
Jim Cownie5e8470a2013-09-27 10:38:44 +0000454 // entered on the OS futex queue. We must now perform a futex
455 // wake call when releasing the lock, as we have no idea how many
456 // other threads are in the queue.
457 //
458 gtid_code |= 1;
459 }
460
461 KMP_FSYNC_ACQUIRED( lck );
462 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d exiting\n",
463 lck, lck->lk.poll, gtid ) );
Jonathan Peyton0e6d4572015-10-16 16:52:58 +0000464 return KMP_LOCK_ACQUIRED_FIRST;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000465}
466
Jonathan Peyton0e6d4572015-10-16 16:52:58 +0000467int
Jim Cownie5e8470a2013-09-27 10:38:44 +0000468__kmp_acquire_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
469{
Jonathan Peyton0e6d4572015-10-16 16:52:58 +0000470 return __kmp_acquire_futex_lock_timed_template( lck, gtid );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000471}
472
Jonathan Peyton0e6d4572015-10-16 16:52:58 +0000473static int
Jim Cownie5e8470a2013-09-27 10:38:44 +0000474__kmp_acquire_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
475{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000476 char const * const func = "omp_set_lock";
477 if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE )
478 && __kmp_is_futex_lock_nestable( lck ) ) {
479 KMP_FATAL( LockNestableUsedAsSimple, func );
480 }
481 if ( ( gtid >= 0 ) && ( __kmp_get_futex_lock_owner( lck ) == gtid ) ) {
482 KMP_FATAL( LockIsAlreadyOwned, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000483 }
Jonathan Peyton0e6d4572015-10-16 16:52:58 +0000484 return __kmp_acquire_futex_lock( lck, gtid );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000485}
486
487int
488__kmp_test_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
489{
Jonathan Peytonf2d119f2015-12-03 19:37:20 +0000490 if ( KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), KMP_LOCK_FREE(futex), KMP_LOCK_BUSY(gtid+1, futex) << 1 ) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000491 KMP_FSYNC_ACQUIRED( lck );
492 return TRUE;
493 }
494 return FALSE;
495}
496
497static int
498__kmp_test_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
499{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000500 char const * const func = "omp_test_lock";
501 if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE )
502 && __kmp_is_futex_lock_nestable( lck ) ) {
503 KMP_FATAL( LockNestableUsedAsSimple, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000504 }
505 return __kmp_test_futex_lock( lck, gtid );
506}
507
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000508int
Jim Cownie5e8470a2013-09-27 10:38:44 +0000509__kmp_release_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
510{
511 KMP_MB(); /* Flush all pending memory write invalidates. */
512
513 KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d entering\n",
514 lck, lck->lk.poll, gtid ) );
515
516 KMP_FSYNC_RELEASING(lck);
517
Jonathan Peytonf2d119f2015-12-03 19:37:20 +0000518 kmp_int32 poll_val = KMP_XCHG_FIXED32( & ( lck->lk.poll ), KMP_LOCK_FREE(futex) );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000519
520 KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p, T#%d released poll_val = 0x%x\n",
521 lck, gtid, poll_val ) );
522
Jonathan Peytonf2d119f2015-12-03 19:37:20 +0000523 if ( KMP_LOCK_STRIP(poll_val) & 1 ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000524 KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p, T#%d futex_wake 1 thread\n",
525 lck, gtid ) );
Jonathan Peytonf2d119f2015-12-03 19:37:20 +0000526 syscall( __NR_futex, & ( lck->lk.poll ), FUTEX_WAKE, KMP_LOCK_BUSY(1, futex), NULL, NULL, 0 );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000527 }
528
529 KMP_MB(); /* Flush all pending memory write invalidates. */
530
531 KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d exiting\n",
532 lck, lck->lk.poll, gtid ) );
533
534 KMP_YIELD( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc :
535 __kmp_xproc ) );
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000536 return KMP_LOCK_RELEASED;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000537}
538
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000539static int
Jim Cownie5e8470a2013-09-27 10:38:44 +0000540__kmp_release_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
541{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000542 char const * const func = "omp_unset_lock";
543 KMP_MB(); /* in case another processor initialized lock */
544 if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE )
545 && __kmp_is_futex_lock_nestable( lck ) ) {
546 KMP_FATAL( LockNestableUsedAsSimple, func );
547 }
548 if ( __kmp_get_futex_lock_owner( lck ) == -1 ) {
549 KMP_FATAL( LockUnsettingFree, func );
550 }
551 if ( ( gtid >= 0 ) && ( __kmp_get_futex_lock_owner( lck ) >= 0 )
552 && ( __kmp_get_futex_lock_owner( lck ) != gtid ) ) {
553 KMP_FATAL( LockUnsettingSetByAnother, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000554 }
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000555 return __kmp_release_futex_lock( lck, gtid );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000556}
557
558void
559__kmp_init_futex_lock( kmp_futex_lock_t * lck )
560{
Jonathan Peytonf2d119f2015-12-03 19:37:20 +0000561 TCW_4( lck->lk.poll, KMP_LOCK_FREE(futex) );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000562}
563
564static void
565__kmp_init_futex_lock_with_checks( kmp_futex_lock_t * lck )
566{
567 __kmp_init_futex_lock( lck );
568}
569
570void
571__kmp_destroy_futex_lock( kmp_futex_lock_t *lck )
572{
573 lck->lk.poll = 0;
574}
575
576static void
577__kmp_destroy_futex_lock_with_checks( kmp_futex_lock_t *lck )
578{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000579 char const * const func = "omp_destroy_lock";
580 if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE )
581 && __kmp_is_futex_lock_nestable( lck ) ) {
582 KMP_FATAL( LockNestableUsedAsSimple, func );
583 }
584 if ( __kmp_get_futex_lock_owner( lck ) != -1 ) {
585 KMP_FATAL( LockStillOwned, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000586 }
587 __kmp_destroy_futex_lock( lck );
588}
589
590
591//
592// nested futex locks
593//
594
Jonathan Peyton0e6d4572015-10-16 16:52:58 +0000595int
Jim Cownie5e8470a2013-09-27 10:38:44 +0000596__kmp_acquire_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
597{
598 KMP_DEBUG_ASSERT( gtid >= 0 );
599
600 if ( __kmp_get_futex_lock_owner( lck ) == gtid ) {
601 lck->lk.depth_locked += 1;
Jonathan Peyton0e6d4572015-10-16 16:52:58 +0000602 return KMP_LOCK_ACQUIRED_NEXT;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000603 }
604 else {
605 __kmp_acquire_futex_lock_timed_template( lck, gtid );
606 lck->lk.depth_locked = 1;
Jonathan Peyton0e6d4572015-10-16 16:52:58 +0000607 return KMP_LOCK_ACQUIRED_FIRST;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000608 }
609}
610
Jonathan Peyton0e6d4572015-10-16 16:52:58 +0000611static int
Jim Cownie5e8470a2013-09-27 10:38:44 +0000612__kmp_acquire_nested_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
613{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000614 char const * const func = "omp_set_nest_lock";
615 if ( ! __kmp_is_futex_lock_nestable( lck ) ) {
616 KMP_FATAL( LockSimpleUsedAsNestable, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000617 }
Jonathan Peyton0e6d4572015-10-16 16:52:58 +0000618 return __kmp_acquire_nested_futex_lock( lck, gtid );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000619}
620
621int
622__kmp_test_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
623{
624 int retval;
625
626 KMP_DEBUG_ASSERT( gtid >= 0 );
627
628 if ( __kmp_get_futex_lock_owner( lck ) == gtid ) {
629 retval = ++lck->lk.depth_locked;
630 }
631 else if ( !__kmp_test_futex_lock( lck, gtid ) ) {
632 retval = 0;
633 }
634 else {
635 KMP_MB();
636 retval = lck->lk.depth_locked = 1;
637 }
638 return retval;
639}
640
641static int
642__kmp_test_nested_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
643{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000644 char const * const func = "omp_test_nest_lock";
645 if ( ! __kmp_is_futex_lock_nestable( lck ) ) {
646 KMP_FATAL( LockSimpleUsedAsNestable, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000647 }
648 return __kmp_test_nested_futex_lock( lck, gtid );
649}
650
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000651int
Jim Cownie5e8470a2013-09-27 10:38:44 +0000652__kmp_release_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
653{
654 KMP_DEBUG_ASSERT( gtid >= 0 );
655
656 KMP_MB();
657 if ( --(lck->lk.depth_locked) == 0 ) {
658 __kmp_release_futex_lock( lck, gtid );
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000659 return KMP_LOCK_RELEASED;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000660 }
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000661 return KMP_LOCK_STILL_HELD;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000662}
663
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000664static int
Jim Cownie5e8470a2013-09-27 10:38:44 +0000665__kmp_release_nested_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
666{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000667 char const * const func = "omp_unset_nest_lock";
668 KMP_MB(); /* in case another processor initialized lock */
669 if ( ! __kmp_is_futex_lock_nestable( lck ) ) {
670 KMP_FATAL( LockSimpleUsedAsNestable, func );
671 }
672 if ( __kmp_get_futex_lock_owner( lck ) == -1 ) {
673 KMP_FATAL( LockUnsettingFree, func );
674 }
675 if ( __kmp_get_futex_lock_owner( lck ) != gtid ) {
676 KMP_FATAL( LockUnsettingSetByAnother, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000677 }
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000678 return __kmp_release_nested_futex_lock( lck, gtid );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000679}
680
681void
682__kmp_init_nested_futex_lock( kmp_futex_lock_t * lck )
683{
684 __kmp_init_futex_lock( lck );
685 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
686}
687
688static void
689__kmp_init_nested_futex_lock_with_checks( kmp_futex_lock_t * lck )
690{
691 __kmp_init_nested_futex_lock( lck );
692}
693
694void
695__kmp_destroy_nested_futex_lock( kmp_futex_lock_t *lck )
696{
697 __kmp_destroy_futex_lock( lck );
698 lck->lk.depth_locked = 0;
699}
700
701static void
702__kmp_destroy_nested_futex_lock_with_checks( kmp_futex_lock_t *lck )
703{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000704 char const * const func = "omp_destroy_nest_lock";
705 if ( ! __kmp_is_futex_lock_nestable( lck ) ) {
706 KMP_FATAL( LockSimpleUsedAsNestable, func );
707 }
708 if ( __kmp_get_futex_lock_owner( lck ) != -1 ) {
709 KMP_FATAL( LockStillOwned, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000710 }
711 __kmp_destroy_nested_futex_lock( lck );
712}
713
Paul Osmialowskifb043fd2016-05-16 09:44:11 +0000714#endif // KMP_USE_FUTEX
Jim Cownie5e8470a2013-09-27 10:38:44 +0000715
716
717/* ------------------------------------------------------------------------ */
718/* ticket (bakery) locks */
719
720static kmp_int32
721__kmp_get_ticket_lock_owner( kmp_ticket_lock_t *lck )
722{
Paul Osmialowskif7cc6af2016-05-31 20:20:32 +0000723 return std::atomic_load_explicit( &lck->lk.owner_id, std::memory_order_relaxed ) - 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000724}
725
726static inline bool
727__kmp_is_ticket_lock_nestable( kmp_ticket_lock_t *lck )
728{
Paul Osmialowskif7cc6af2016-05-31 20:20:32 +0000729 return std::atomic_load_explicit( &lck->lk.depth_locked, std::memory_order_relaxed ) != -1;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000730}
731
732static kmp_uint32
Paul Osmialowskif7cc6af2016-05-31 20:20:32 +0000733__kmp_bakery_check( void *now_serving, kmp_uint32 my_ticket )
Jim Cownie5e8470a2013-09-27 10:38:44 +0000734{
Paul Osmialowskif7cc6af2016-05-31 20:20:32 +0000735 return std::atomic_load_explicit( (std::atomic<unsigned> *)now_serving, std::memory_order_acquire ) == my_ticket;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000736}
737
Jonathan Peyton0e6d4572015-10-16 16:52:58 +0000738__forceinline static int
Jim Cownie5e8470a2013-09-27 10:38:44 +0000739__kmp_acquire_ticket_lock_timed_template( kmp_ticket_lock_t *lck, kmp_int32 gtid )
740{
Paul Osmialowskif7cc6af2016-05-31 20:20:32 +0000741 kmp_uint32 my_ticket = std::atomic_fetch_add_explicit( &lck->lk.next_ticket, 1U, std::memory_order_relaxed );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000742
743#ifdef USE_LOCK_PROFILE
Paul Osmialowskif7cc6af2016-05-31 20:20:32 +0000744 if ( std::atomic_load_explicit( &lck->lk.now_serving, std::memory_order_relaxed ) != my_ticket )
Jim Cownie5e8470a2013-09-27 10:38:44 +0000745 __kmp_printf( "LOCK CONTENTION: %p\n", lck );
746 /* else __kmp_printf( "." );*/
747#endif /* USE_LOCK_PROFILE */
748
Paul Osmialowskif7cc6af2016-05-31 20:20:32 +0000749 if ( std::atomic_load_explicit( &lck->lk.now_serving, std::memory_order_acquire ) == my_ticket ) {
Jonathan Peyton0e6d4572015-10-16 16:52:58 +0000750 return KMP_LOCK_ACQUIRED_FIRST;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000751 }
Paul Osmialowskif7cc6af2016-05-31 20:20:32 +0000752 KMP_WAIT_YIELD_PTR( &lck->lk.now_serving, my_ticket, __kmp_bakery_check, lck );
Jonathan Peyton0e6d4572015-10-16 16:52:58 +0000753 return KMP_LOCK_ACQUIRED_FIRST;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000754}
755
Jonathan Peyton0e6d4572015-10-16 16:52:58 +0000756int
Jim Cownie5e8470a2013-09-27 10:38:44 +0000757__kmp_acquire_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
758{
Jonathan Peyton0e6d4572015-10-16 16:52:58 +0000759 return __kmp_acquire_ticket_lock_timed_template( lck, gtid );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000760}
761
Jonathan Peyton0e6d4572015-10-16 16:52:58 +0000762static int
Jim Cownie5e8470a2013-09-27 10:38:44 +0000763__kmp_acquire_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
764{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000765 char const * const func = "omp_set_lock";
Paul Osmialowskif7cc6af2016-05-31 20:20:32 +0000766
767 if ( ! std::atomic_load_explicit( &lck->lk.initialized, std::memory_order_relaxed ) ) {
768 KMP_FATAL( LockIsUninitialized, func );
769 }
770 if ( lck->lk.self != lck ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000771 KMP_FATAL( LockIsUninitialized, func );
772 }
773 if ( __kmp_is_ticket_lock_nestable( lck ) ) {
774 KMP_FATAL( LockNestableUsedAsSimple, func );
775 }
776 if ( ( gtid >= 0 ) && ( __kmp_get_ticket_lock_owner( lck ) == gtid ) ) {
777 KMP_FATAL( LockIsAlreadyOwned, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000778 }
779
780 __kmp_acquire_ticket_lock( lck, gtid );
781
Paul Osmialowskif7cc6af2016-05-31 20:20:32 +0000782 std::atomic_store_explicit( &lck->lk.owner_id, gtid + 1, std::memory_order_relaxed );
Jonathan Peyton0e6d4572015-10-16 16:52:58 +0000783 return KMP_LOCK_ACQUIRED_FIRST;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000784}
785
786int
787__kmp_test_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
788{
Paul Osmialowskif7cc6af2016-05-31 20:20:32 +0000789 kmp_uint32 my_ticket = std::atomic_load_explicit( &lck->lk.next_ticket, std::memory_order_relaxed );
790
791 if ( std::atomic_load_explicit( &lck->lk.now_serving, std::memory_order_relaxed ) == my_ticket ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000792 kmp_uint32 next_ticket = my_ticket + 1;
Paul Osmialowskif7cc6af2016-05-31 20:20:32 +0000793 if ( std::atomic_compare_exchange_strong_explicit( &lck->lk.next_ticket,
794 &my_ticket, next_ticket, std::memory_order_acquire, std::memory_order_acquire )) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000795 return TRUE;
796 }
797 }
798 return FALSE;
799}
800
801static int
802__kmp_test_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
803{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000804 char const * const func = "omp_test_lock";
Paul Osmialowskif7cc6af2016-05-31 20:20:32 +0000805
806 if ( ! std::atomic_load_explicit( &lck->lk.initialized, std::memory_order_relaxed ) ) {
807 KMP_FATAL( LockIsUninitialized, func );
808 }
809 if ( lck->lk.self != lck ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000810 KMP_FATAL( LockIsUninitialized, func );
811 }
812 if ( __kmp_is_ticket_lock_nestable( lck ) ) {
813 KMP_FATAL( LockNestableUsedAsSimple, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000814 }
815
816 int retval = __kmp_test_ticket_lock( lck, gtid );
817
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000818 if ( retval ) {
Paul Osmialowskif7cc6af2016-05-31 20:20:32 +0000819 std::atomic_store_explicit( &lck->lk.owner_id, gtid + 1, std::memory_order_relaxed );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000820 }
821 return retval;
822}
823
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000824int
Jim Cownie5e8470a2013-09-27 10:38:44 +0000825__kmp_release_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
826{
Paul Osmialowskif7cc6af2016-05-31 20:20:32 +0000827 kmp_uint32 distance = std::atomic_load_explicit( &lck->lk.next_ticket, std::memory_order_relaxed ) - std::atomic_load_explicit( &lck->lk.now_serving, std::memory_order_relaxed );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000828
Paul Osmialowskif7cc6af2016-05-31 20:20:32 +0000829 std::atomic_fetch_add_explicit( &lck->lk.now_serving, 1U, std::memory_order_release );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000830
831 KMP_YIELD( distance
832 > (kmp_uint32) (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc) );
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000833 return KMP_LOCK_RELEASED;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000834}
835
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000836static int
Jim Cownie5e8470a2013-09-27 10:38:44 +0000837__kmp_release_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
838{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000839 char const * const func = "omp_unset_lock";
Paul Osmialowskif7cc6af2016-05-31 20:20:32 +0000840
841 if ( ! std::atomic_load_explicit( &lck->lk.initialized, std::memory_order_relaxed ) ) {
842 KMP_FATAL( LockIsUninitialized, func );
843 }
844 if ( lck->lk.self != lck ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000845 KMP_FATAL( LockIsUninitialized, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000846 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000847 if ( __kmp_is_ticket_lock_nestable( lck ) ) {
848 KMP_FATAL( LockNestableUsedAsSimple, func );
849 }
850 if ( __kmp_get_ticket_lock_owner( lck ) == -1 ) {
851 KMP_FATAL( LockUnsettingFree, func );
852 }
853 if ( ( gtid >= 0 ) && ( __kmp_get_ticket_lock_owner( lck ) >= 0 )
854 && ( __kmp_get_ticket_lock_owner( lck ) != gtid ) ) {
855 KMP_FATAL( LockUnsettingSetByAnother, func );
856 }
Paul Osmialowskif7cc6af2016-05-31 20:20:32 +0000857 std::atomic_store_explicit( &lck->lk.owner_id, 0, std::memory_order_relaxed );
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000858 return __kmp_release_ticket_lock( lck, gtid );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000859}
860
861void
862__kmp_init_ticket_lock( kmp_ticket_lock_t * lck )
863{
864 lck->lk.location = NULL;
Paul Osmialowskif7cc6af2016-05-31 20:20:32 +0000865 lck->lk.self = lck;
866 std::atomic_store_explicit( &lck->lk.next_ticket, 0U, std::memory_order_relaxed );
867 std::atomic_store_explicit( &lck->lk.now_serving, 0U, std::memory_order_relaxed );
868 std::atomic_store_explicit( &lck->lk.owner_id, 0, std::memory_order_relaxed ); // no thread owns the lock.
869 std::atomic_store_explicit( &lck->lk.depth_locked, -1, std::memory_order_relaxed ); // -1 => not a nested lock.
870 std::atomic_store_explicit( &lck->lk.initialized, true, std::memory_order_release );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000871}
872
873static void
874__kmp_init_ticket_lock_with_checks( kmp_ticket_lock_t * lck )
875{
876 __kmp_init_ticket_lock( lck );
877}
878
879void
880__kmp_destroy_ticket_lock( kmp_ticket_lock_t *lck )
881{
Paul Osmialowskif7cc6af2016-05-31 20:20:32 +0000882 std::atomic_store_explicit( &lck->lk.initialized, false, std::memory_order_release );
883 lck->lk.self = NULL;
884 lck->lk.location = NULL;
885 std::atomic_store_explicit( &lck->lk.next_ticket, 0U, std::memory_order_relaxed );
886 std::atomic_store_explicit( &lck->lk.now_serving, 0U, std::memory_order_relaxed );
887 std::atomic_store_explicit( &lck->lk.owner_id, 0, std::memory_order_relaxed );
888 std::atomic_store_explicit( &lck->lk.depth_locked, -1, std::memory_order_relaxed );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000889}
890
891static void
892__kmp_destroy_ticket_lock_with_checks( kmp_ticket_lock_t *lck )
893{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000894 char const * const func = "omp_destroy_lock";
Paul Osmialowskif7cc6af2016-05-31 20:20:32 +0000895
896 if ( ! std::atomic_load_explicit( &lck->lk.initialized, std::memory_order_relaxed ) ) {
897 KMP_FATAL( LockIsUninitialized, func );
898 }
899 if ( lck->lk.self != lck ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000900 KMP_FATAL( LockIsUninitialized, func );
901 }
902 if ( __kmp_is_ticket_lock_nestable( lck ) ) {
903 KMP_FATAL( LockNestableUsedAsSimple, func );
904 }
905 if ( __kmp_get_ticket_lock_owner( lck ) != -1 ) {
906 KMP_FATAL( LockStillOwned, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000907 }
908 __kmp_destroy_ticket_lock( lck );
909}
910
911
912//
913// nested ticket locks
914//
915
Jonathan Peyton0e6d4572015-10-16 16:52:58 +0000916int
Jim Cownie5e8470a2013-09-27 10:38:44 +0000917__kmp_acquire_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
918{
919 KMP_DEBUG_ASSERT( gtid >= 0 );
920
921 if ( __kmp_get_ticket_lock_owner( lck ) == gtid ) {
Paul Osmialowskif7cc6af2016-05-31 20:20:32 +0000922 std::atomic_fetch_add_explicit( &lck->lk.depth_locked, 1, std::memory_order_relaxed );
Jonathan Peyton0e6d4572015-10-16 16:52:58 +0000923 return KMP_LOCK_ACQUIRED_NEXT;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000924 }
925 else {
926 __kmp_acquire_ticket_lock_timed_template( lck, gtid );
Paul Osmialowskif7cc6af2016-05-31 20:20:32 +0000927 std::atomic_store_explicit( &lck->lk.depth_locked, 1, std::memory_order_relaxed );
928 std::atomic_store_explicit( &lck->lk.owner_id, gtid + 1, std::memory_order_relaxed );
Jonathan Peyton0e6d4572015-10-16 16:52:58 +0000929 return KMP_LOCK_ACQUIRED_FIRST;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000930 }
931}
932
Jonathan Peyton0e6d4572015-10-16 16:52:58 +0000933static int
Jim Cownie5e8470a2013-09-27 10:38:44 +0000934__kmp_acquire_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
935{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000936 char const * const func = "omp_set_nest_lock";
Paul Osmialowskif7cc6af2016-05-31 20:20:32 +0000937
938 if ( ! std::atomic_load_explicit( &lck->lk.initialized, std::memory_order_relaxed ) ) {
939 KMP_FATAL( LockIsUninitialized, func );
940 }
941 if ( lck->lk.self != lck ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000942 KMP_FATAL( LockIsUninitialized, func );
943 }
944 if ( ! __kmp_is_ticket_lock_nestable( lck ) ) {
945 KMP_FATAL( LockSimpleUsedAsNestable, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000946 }
Jonathan Peyton0e6d4572015-10-16 16:52:58 +0000947 return __kmp_acquire_nested_ticket_lock( lck, gtid );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000948}
949
950int
951__kmp_test_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
952{
953 int retval;
954
955 KMP_DEBUG_ASSERT( gtid >= 0 );
956
957 if ( __kmp_get_ticket_lock_owner( lck ) == gtid ) {
Paul Osmialowskif7cc6af2016-05-31 20:20:32 +0000958 retval = std::atomic_fetch_add_explicit( &lck->lk.depth_locked, 1, std::memory_order_relaxed ) + 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000959 }
960 else if ( !__kmp_test_ticket_lock( lck, gtid ) ) {
961 retval = 0;
962 }
963 else {
Paul Osmialowskif7cc6af2016-05-31 20:20:32 +0000964 std::atomic_store_explicit( &lck->lk.depth_locked, 1, std::memory_order_relaxed );
965 std::atomic_store_explicit( &lck->lk.owner_id, gtid + 1, std::memory_order_relaxed );
966 retval = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000967 }
968 return retval;
969}
970
971static int
972__kmp_test_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck,
973 kmp_int32 gtid )
974{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000975 char const * const func = "omp_test_nest_lock";
Paul Osmialowskif7cc6af2016-05-31 20:20:32 +0000976
977 if ( ! std::atomic_load_explicit( &lck->lk.initialized, std::memory_order_relaxed ) ) {
978 KMP_FATAL( LockIsUninitialized, func );
979 }
980 if ( lck->lk.self != lck ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000981 KMP_FATAL( LockIsUninitialized, func );
982 }
983 if ( ! __kmp_is_ticket_lock_nestable( lck ) ) {
984 KMP_FATAL( LockSimpleUsedAsNestable, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000985 }
986 return __kmp_test_nested_ticket_lock( lck, gtid );
987}
988
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000989int
Jim Cownie5e8470a2013-09-27 10:38:44 +0000990__kmp_release_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
991{
992 KMP_DEBUG_ASSERT( gtid >= 0 );
993
Paul Osmialowskif7cc6af2016-05-31 20:20:32 +0000994 if ( ( std::atomic_fetch_add_explicit( &lck->lk.depth_locked, -1, std::memory_order_relaxed ) - 1 ) == 0 ) {
995 std::atomic_store_explicit( &lck->lk.owner_id, 0, std::memory_order_relaxed );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000996 __kmp_release_ticket_lock( lck, gtid );
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000997 return KMP_LOCK_RELEASED;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000998 }
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000999 return KMP_LOCK_STILL_HELD;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001000}
1001
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00001002static int
Jim Cownie5e8470a2013-09-27 10:38:44 +00001003__kmp_release_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
1004{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001005 char const * const func = "omp_unset_nest_lock";
Paul Osmialowskif7cc6af2016-05-31 20:20:32 +00001006
1007 if ( ! std::atomic_load_explicit( &lck->lk.initialized, std::memory_order_relaxed ) ) {
1008 KMP_FATAL( LockIsUninitialized, func );
1009 }
1010 if ( lck->lk.self != lck ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001011 KMP_FATAL( LockIsUninitialized, func );
1012 }
1013 if ( ! __kmp_is_ticket_lock_nestable( lck ) ) {
1014 KMP_FATAL( LockSimpleUsedAsNestable, func );
1015 }
1016 if ( __kmp_get_ticket_lock_owner( lck ) == -1 ) {
1017 KMP_FATAL( LockUnsettingFree, func );
1018 }
1019 if ( __kmp_get_ticket_lock_owner( lck ) != gtid ) {
1020 KMP_FATAL( LockUnsettingSetByAnother, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001021 }
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00001022 return __kmp_release_nested_ticket_lock( lck, gtid );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001023}
1024
1025void
1026__kmp_init_nested_ticket_lock( kmp_ticket_lock_t * lck )
1027{
1028 __kmp_init_ticket_lock( lck );
Paul Osmialowskif7cc6af2016-05-31 20:20:32 +00001029 std::atomic_store_explicit( &lck->lk.depth_locked, 0, std::memory_order_relaxed ); // >= 0 for nestable locks, -1 for simple locks
Jim Cownie5e8470a2013-09-27 10:38:44 +00001030}
1031
1032static void
1033__kmp_init_nested_ticket_lock_with_checks( kmp_ticket_lock_t * lck )
1034{
1035 __kmp_init_nested_ticket_lock( lck );
1036}
1037
1038void
1039__kmp_destroy_nested_ticket_lock( kmp_ticket_lock_t *lck )
1040{
1041 __kmp_destroy_ticket_lock( lck );
Paul Osmialowskif7cc6af2016-05-31 20:20:32 +00001042 std::atomic_store_explicit( &lck->lk.depth_locked, 0, std::memory_order_relaxed );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001043}
1044
1045static void
1046__kmp_destroy_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck )
1047{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001048 char const * const func = "omp_destroy_nest_lock";
Paul Osmialowskif7cc6af2016-05-31 20:20:32 +00001049
1050 if ( ! std::atomic_load_explicit( &lck->lk.initialized, std::memory_order_relaxed ) ) {
1051 KMP_FATAL( LockIsUninitialized, func );
1052 }
1053 if ( lck->lk.self != lck ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001054 KMP_FATAL( LockIsUninitialized, func );
1055 }
1056 if ( ! __kmp_is_ticket_lock_nestable( lck ) ) {
1057 KMP_FATAL( LockSimpleUsedAsNestable, func );
1058 }
1059 if ( __kmp_get_ticket_lock_owner( lck ) != -1 ) {
1060 KMP_FATAL( LockStillOwned, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001061 }
1062 __kmp_destroy_nested_ticket_lock( lck );
1063}
1064
1065
1066//
1067// access functions to fields which don't exist for all lock kinds.
1068//
1069
1070static int
1071__kmp_is_ticket_lock_initialized( kmp_ticket_lock_t *lck )
1072{
Paul Osmialowskif7cc6af2016-05-31 20:20:32 +00001073 return std::atomic_load_explicit( &lck->lk.initialized, std::memory_order_relaxed ) && ( lck->lk.self == lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001074}
1075
1076static const ident_t *
1077__kmp_get_ticket_lock_location( kmp_ticket_lock_t *lck )
1078{
1079 return lck->lk.location;
1080}
1081
1082static void
1083__kmp_set_ticket_lock_location( kmp_ticket_lock_t *lck, const ident_t *loc )
1084{
1085 lck->lk.location = loc;
1086}
1087
1088static kmp_lock_flags_t
1089__kmp_get_ticket_lock_flags( kmp_ticket_lock_t *lck )
1090{
1091 return lck->lk.flags;
1092}
1093
1094static void
1095__kmp_set_ticket_lock_flags( kmp_ticket_lock_t *lck, kmp_lock_flags_t flags )
1096{
1097 lck->lk.flags = flags;
1098}
1099
1100/* ------------------------------------------------------------------------ */
1101/* queuing locks */
1102
1103/*
1104 * First the states
1105 * (head,tail) = 0, 0 means lock is unheld, nobody on queue
1106 * UINT_MAX or -1, 0 means lock is held, nobody on queue
1107 * h, h means lock is held or about to transition, 1 element on queue
1108 * h, t h <> t, means lock is held or about to transition, >1 elements on queue
1109 *
1110 * Now the transitions
1111 * Acquire(0,0) = -1 ,0
1112 * Release(0,0) = Error
1113 * Acquire(-1,0) = h ,h h > 0
1114 * Release(-1,0) = 0 ,0
1115 * Acquire(h,h) = h ,t h > 0, t > 0, h <> t
1116 * Release(h,h) = -1 ,0 h > 0
1117 * Acquire(h,t) = h ,t' h > 0, t > 0, t' > 0, h <> t, h <> t', t <> t'
1118 * Release(h,t) = h',t h > 0, t > 0, h <> t, h <> h', h' maybe = t
1119 *
1120 * And pictorially
1121 *
1122 *
1123 * +-----+
1124 * | 0, 0|------- release -------> Error
1125 * +-----+
1126 * | ^
1127 * acquire| |release
1128 * | |
1129 * | |
1130 * v |
1131 * +-----+
1132 * |-1, 0|
1133 * +-----+
1134 * | ^
1135 * acquire| |release
1136 * | |
1137 * | |
1138 * v |
1139 * +-----+
1140 * | h, h|
1141 * +-----+
1142 * | ^
1143 * acquire| |release
1144 * | |
1145 * | |
1146 * v |
1147 * +-----+
1148 * | h, t|----- acquire, release loopback ---+
1149 * +-----+ |
1150 * ^ |
1151 * | |
1152 * +------------------------------------+
1153 *
1154 */
1155
1156#ifdef DEBUG_QUEUING_LOCKS
1157
1158/* Stuff for circular trace buffer */
1159#define TRACE_BUF_ELE 1024
1160static char traces[TRACE_BUF_ELE][128] = { 0 }
1161static int tc = 0;
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00001162#define TRACE_LOCK(X,Y) KMP_SNPRINTF( traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s\n", X, Y );
1163#define TRACE_LOCK_T(X,Y,Z) KMP_SNPRINTF( traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s%d\n", X,Y,Z );
1164#define TRACE_LOCK_HT(X,Y,Z,Q) KMP_SNPRINTF( traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s %d,%d\n", X, Y, Z, Q );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001165
1166static void
1167__kmp_dump_queuing_lock( kmp_info_t *this_thr, kmp_int32 gtid,
1168 kmp_queuing_lock_t *lck, kmp_int32 head_id, kmp_int32 tail_id )
1169{
1170 kmp_int32 t, i;
1171
1172 __kmp_printf_no_lock( "\n__kmp_dump_queuing_lock: TRACE BEGINS HERE! \n" );
1173
1174 i = tc % TRACE_BUF_ELE;
1175 __kmp_printf_no_lock( "%s\n", traces[i] );
1176 i = (i+1) % TRACE_BUF_ELE;
1177 while ( i != (tc % TRACE_BUF_ELE) ) {
1178 __kmp_printf_no_lock( "%s", traces[i] );
1179 i = (i+1) % TRACE_BUF_ELE;
1180 }
1181 __kmp_printf_no_lock( "\n" );
1182
1183 __kmp_printf_no_lock(
1184 "\n__kmp_dump_queuing_lock: gtid+1:%d, spin_here:%d, next_wait:%d, head_id:%d, tail_id:%d\n",
1185 gtid+1, this_thr->th.th_spin_here, this_thr->th.th_next_waiting,
1186 head_id, tail_id );
1187
1188 __kmp_printf_no_lock( "\t\thead: %d ", lck->lk.head_id );
1189
1190 if ( lck->lk.head_id >= 1 ) {
1191 t = __kmp_threads[lck->lk.head_id-1]->th.th_next_waiting;
1192 while (t > 0) {
1193 __kmp_printf_no_lock( "-> %d ", t );
1194 t = __kmp_threads[t-1]->th.th_next_waiting;
1195 }
1196 }
1197 __kmp_printf_no_lock( "; tail: %d ", lck->lk.tail_id );
1198 __kmp_printf_no_lock( "\n\n" );
1199}
1200
1201#endif /* DEBUG_QUEUING_LOCKS */
1202
1203static kmp_int32
1204__kmp_get_queuing_lock_owner( kmp_queuing_lock_t *lck )
1205{
1206 return TCR_4( lck->lk.owner_id ) - 1;
1207}
1208
1209static inline bool
1210__kmp_is_queuing_lock_nestable( kmp_queuing_lock_t *lck )
1211{
1212 return lck->lk.depth_locked != -1;
1213}
1214
1215/* Acquire a lock using a the queuing lock implementation */
1216template <bool takeTime>
1217/* [TLW] The unused template above is left behind because of what BEB believes is a
1218 potential compiler problem with __forceinline. */
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00001219__forceinline static int
Jim Cownie5e8470a2013-09-27 10:38:44 +00001220__kmp_acquire_queuing_lock_timed_template( kmp_queuing_lock_t *lck,
1221 kmp_int32 gtid )
1222{
1223 register kmp_info_t *this_thr = __kmp_thread_from_gtid( gtid );
1224 volatile kmp_int32 *head_id_p = & lck->lk.head_id;
1225 volatile kmp_int32 *tail_id_p = & lck->lk.tail_id;
1226 volatile kmp_uint32 *spin_here_p;
1227 kmp_int32 need_mf = 1;
1228
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001229#if OMPT_SUPPORT
1230 ompt_state_t prev_state = ompt_state_undefined;
1231#endif
1232
Jim Cownie5e8470a2013-09-27 10:38:44 +00001233 KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d entering\n", lck, gtid ));
1234
1235 KMP_FSYNC_PREPARE( lck );
1236 KMP_DEBUG_ASSERT( this_thr != NULL );
1237 spin_here_p = & this_thr->th.th_spin_here;
1238
1239#ifdef DEBUG_QUEUING_LOCKS
1240 TRACE_LOCK( gtid+1, "acq ent" );
1241 if ( *spin_here_p )
1242 __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1243 if ( this_thr->th.th_next_waiting != 0 )
1244 __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1245#endif
1246 KMP_DEBUG_ASSERT( !*spin_here_p );
1247 KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 );
1248
1249
1250 /* The following st.rel to spin_here_p needs to precede the cmpxchg.acq to head_id_p
1251 that may follow, not just in execution order, but also in visibility order. This way,
1252 when a releasing thread observes the changes to the queue by this thread, it can
1253 rightly assume that spin_here_p has already been set to TRUE, so that when it sets
1254 spin_here_p to FALSE, it is not premature. If the releasing thread sets spin_here_p
1255 to FALSE before this thread sets it to TRUE, this thread will hang.
1256 */
1257 *spin_here_p = TRUE; /* before enqueuing to prevent race */
1258
1259 while( 1 ) {
1260 kmp_int32 enqueued;
1261 kmp_int32 head;
1262 kmp_int32 tail;
1263
1264 head = *head_id_p;
1265
1266 switch ( head ) {
1267
1268 case -1:
1269 {
1270#ifdef DEBUG_QUEUING_LOCKS
1271 tail = *tail_id_p;
1272 TRACE_LOCK_HT( gtid+1, "acq read: ", head, tail );
1273#endif
1274 tail = 0; /* to make sure next link asynchronously read is not set accidentally;
1275 this assignment prevents us from entering the if ( t > 0 )
1276 condition in the enqueued case below, which is not necessary for
1277 this state transition */
1278
1279 need_mf = 0;
1280 /* try (-1,0)->(tid,tid) */
1281 enqueued = KMP_COMPARE_AND_STORE_ACQ64( (volatile kmp_int64 *) tail_id_p,
1282 KMP_PACK_64( -1, 0 ),
1283 KMP_PACK_64( gtid+1, gtid+1 ) );
1284#ifdef DEBUG_QUEUING_LOCKS
1285 if ( enqueued ) TRACE_LOCK( gtid+1, "acq enq: (-1,0)->(tid,tid)" );
1286#endif
1287 }
1288 break;
1289
1290 default:
1291 {
1292 tail = *tail_id_p;
1293 KMP_DEBUG_ASSERT( tail != gtid + 1 );
1294
1295#ifdef DEBUG_QUEUING_LOCKS
1296 TRACE_LOCK_HT( gtid+1, "acq read: ", head, tail );
1297#endif
1298
1299 if ( tail == 0 ) {
1300 enqueued = FALSE;
1301 }
1302 else {
1303 need_mf = 0;
1304 /* try (h,t) or (h,h)->(h,tid) */
1305 enqueued = KMP_COMPARE_AND_STORE_ACQ32( tail_id_p, tail, gtid+1 );
1306
1307#ifdef DEBUG_QUEUING_LOCKS
1308 if ( enqueued ) TRACE_LOCK( gtid+1, "acq enq: (h,t)->(h,tid)" );
1309#endif
1310 }
1311 }
1312 break;
1313
1314 case 0: /* empty queue */
1315 {
1316 kmp_int32 grabbed_lock;
1317
1318#ifdef DEBUG_QUEUING_LOCKS
1319 tail = *tail_id_p;
1320 TRACE_LOCK_HT( gtid+1, "acq read: ", head, tail );
1321#endif
1322 /* try (0,0)->(-1,0) */
1323
1324 /* only legal transition out of head = 0 is head = -1 with no change to tail */
1325 grabbed_lock = KMP_COMPARE_AND_STORE_ACQ32( head_id_p, 0, -1 );
1326
1327 if ( grabbed_lock ) {
1328
1329 *spin_here_p = FALSE;
1330
1331 KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: no queuing\n",
1332 lck, gtid ));
1333#ifdef DEBUG_QUEUING_LOCKS
1334 TRACE_LOCK_HT( gtid+1, "acq exit: ", head, 0 );
1335#endif
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001336
1337#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001338 if (ompt_enabled && prev_state != ompt_state_undefined) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001339 /* change the state before clearing wait_id */
1340 this_thr->th.ompt_thread_info.state = prev_state;
1341 this_thr->th.ompt_thread_info.wait_id = 0;
1342 }
1343#endif
1344
Jim Cownie5e8470a2013-09-27 10:38:44 +00001345 KMP_FSYNC_ACQUIRED( lck );
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00001346 return KMP_LOCK_ACQUIRED_FIRST; /* lock holder cannot be on queue */
Jim Cownie5e8470a2013-09-27 10:38:44 +00001347 }
1348 enqueued = FALSE;
1349 }
1350 break;
1351 }
1352
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001353#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001354 if (ompt_enabled && prev_state == ompt_state_undefined) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001355 /* this thread will spin; set wait_id before entering wait state */
1356 prev_state = this_thr->th.ompt_thread_info.state;
1357 this_thr->th.ompt_thread_info.wait_id = (uint64_t) lck;
1358 this_thr->th.ompt_thread_info.state = ompt_state_wait_lock;
1359 }
1360#endif
1361
Jim Cownie5e8470a2013-09-27 10:38:44 +00001362 if ( enqueued ) {
1363 if ( tail > 0 ) {
1364 kmp_info_t *tail_thr = __kmp_thread_from_gtid( tail - 1 );
1365 KMP_ASSERT( tail_thr != NULL );
1366 tail_thr->th.th_next_waiting = gtid+1;
1367 /* corresponding wait for this write in release code */
1368 }
1369 KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d waiting for lock\n", lck, gtid ));
1370
1371
1372 /* ToDo: May want to consider using __kmp_wait_sleep or something that sleeps for
1373 * throughput only here.
1374 */
1375 KMP_MB();
1376 KMP_WAIT_YIELD(spin_here_p, FALSE, KMP_EQ, lck);
1377
1378#ifdef DEBUG_QUEUING_LOCKS
1379 TRACE_LOCK( gtid+1, "acq spin" );
1380
1381 if ( this_thr->th.th_next_waiting != 0 )
1382 __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1383#endif
1384 KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 );
1385 KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: after waiting on queue\n",
1386 lck, gtid ));
1387
1388#ifdef DEBUG_QUEUING_LOCKS
1389 TRACE_LOCK( gtid+1, "acq exit 2" );
1390#endif
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001391
1392#if OMPT_SUPPORT
1393 /* change the state before clearing wait_id */
1394 this_thr->th.ompt_thread_info.state = prev_state;
1395 this_thr->th.ompt_thread_info.wait_id = 0;
1396#endif
1397
Jim Cownie5e8470a2013-09-27 10:38:44 +00001398 /* got lock, we were dequeued by the thread that released lock */
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00001399 return KMP_LOCK_ACQUIRED_FIRST;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001400 }
1401
1402 /* Yield if number of threads > number of logical processors */
1403 /* ToDo: Not sure why this should only be in oversubscription case,
1404 maybe should be traditional YIELD_INIT/YIELD_WHEN loop */
1405 KMP_YIELD( TCR_4( __kmp_nth ) > (__kmp_avail_proc ? __kmp_avail_proc :
1406 __kmp_xproc ) );
1407#ifdef DEBUG_QUEUING_LOCKS
1408 TRACE_LOCK( gtid+1, "acq retry" );
1409#endif
1410
1411 }
1412 KMP_ASSERT2( 0, "should not get here" );
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00001413 return KMP_LOCK_ACQUIRED_FIRST;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001414}
1415
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00001416int
Jim Cownie5e8470a2013-09-27 10:38:44 +00001417__kmp_acquire_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1418{
1419 KMP_DEBUG_ASSERT( gtid >= 0 );
1420
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00001421 return __kmp_acquire_queuing_lock_timed_template<false>( lck, gtid );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001422}
1423
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00001424static int
Jim Cownie5e8470a2013-09-27 10:38:44 +00001425__kmp_acquire_queuing_lock_with_checks( kmp_queuing_lock_t *lck,
1426 kmp_int32 gtid )
1427{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001428 char const * const func = "omp_set_lock";
1429 if ( lck->lk.initialized != lck ) {
1430 KMP_FATAL( LockIsUninitialized, func );
1431 }
1432 if ( __kmp_is_queuing_lock_nestable( lck ) ) {
1433 KMP_FATAL( LockNestableUsedAsSimple, func );
1434 }
1435 if ( __kmp_get_queuing_lock_owner( lck ) == gtid ) {
1436 KMP_FATAL( LockIsAlreadyOwned, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001437 }
1438
1439 __kmp_acquire_queuing_lock( lck, gtid );
1440
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001441 lck->lk.owner_id = gtid + 1;
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00001442 return KMP_LOCK_ACQUIRED_FIRST;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001443}
1444
1445int
1446__kmp_test_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1447{
1448 volatile kmp_int32 *head_id_p = & lck->lk.head_id;
1449 kmp_int32 head;
1450#ifdef KMP_DEBUG
1451 kmp_info_t *this_thr;
1452#endif
1453
1454 KA_TRACE( 1000, ("__kmp_test_queuing_lock: T#%d entering\n", gtid ));
1455 KMP_DEBUG_ASSERT( gtid >= 0 );
1456#ifdef KMP_DEBUG
1457 this_thr = __kmp_thread_from_gtid( gtid );
1458 KMP_DEBUG_ASSERT( this_thr != NULL );
1459 KMP_DEBUG_ASSERT( !this_thr->th.th_spin_here );
1460#endif
1461
1462 head = *head_id_p;
1463
1464 if ( head == 0 ) { /* nobody on queue, nobody holding */
1465
1466 /* try (0,0)->(-1,0) */
1467
1468 if ( KMP_COMPARE_AND_STORE_ACQ32( head_id_p, 0, -1 ) ) {
1469 KA_TRACE( 1000, ("__kmp_test_queuing_lock: T#%d exiting: holding lock\n", gtid ));
1470 KMP_FSYNC_ACQUIRED(lck);
1471 return TRUE;
1472 }
1473 }
1474
1475 KA_TRACE( 1000, ("__kmp_test_queuing_lock: T#%d exiting: without lock\n", gtid ));
1476 return FALSE;
1477}
1478
1479static int
1480__kmp_test_queuing_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1481{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001482 char const * const func = "omp_test_lock";
1483 if ( lck->lk.initialized != lck ) {
1484 KMP_FATAL( LockIsUninitialized, func );
1485 }
1486 if ( __kmp_is_queuing_lock_nestable( lck ) ) {
1487 KMP_FATAL( LockNestableUsedAsSimple, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001488 }
1489
1490 int retval = __kmp_test_queuing_lock( lck, gtid );
1491
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001492 if ( retval ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001493 lck->lk.owner_id = gtid + 1;
1494 }
1495 return retval;
1496}
1497
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00001498int
Jim Cownie5e8470a2013-09-27 10:38:44 +00001499__kmp_release_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1500{
1501 register kmp_info_t *this_thr;
1502 volatile kmp_int32 *head_id_p = & lck->lk.head_id;
1503 volatile kmp_int32 *tail_id_p = & lck->lk.tail_id;
1504
1505 KA_TRACE( 1000, ("__kmp_release_queuing_lock: lck:%p, T#%d entering\n", lck, gtid ));
1506 KMP_DEBUG_ASSERT( gtid >= 0 );
1507 this_thr = __kmp_thread_from_gtid( gtid );
1508 KMP_DEBUG_ASSERT( this_thr != NULL );
1509#ifdef DEBUG_QUEUING_LOCKS
1510 TRACE_LOCK( gtid+1, "rel ent" );
1511
1512 if ( this_thr->th.th_spin_here )
1513 __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1514 if ( this_thr->th.th_next_waiting != 0 )
1515 __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1516#endif
1517 KMP_DEBUG_ASSERT( !this_thr->th.th_spin_here );
1518 KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 );
1519
1520 KMP_FSYNC_RELEASING(lck);
1521
1522 while( 1 ) {
1523 kmp_int32 dequeued;
1524 kmp_int32 head;
1525 kmp_int32 tail;
1526
1527 head = *head_id_p;
1528
1529#ifdef DEBUG_QUEUING_LOCKS
1530 tail = *tail_id_p;
1531 TRACE_LOCK_HT( gtid+1, "rel read: ", head, tail );
1532 if ( head == 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail );
1533#endif
1534 KMP_DEBUG_ASSERT( head != 0 ); /* holding the lock, head must be -1 or queue head */
1535
1536 if ( head == -1 ) { /* nobody on queue */
1537
1538 /* try (-1,0)->(0,0) */
1539 if ( KMP_COMPARE_AND_STORE_REL32( head_id_p, -1, 0 ) ) {
1540 KA_TRACE( 1000, ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: queue empty\n",
1541 lck, gtid ));
1542#ifdef DEBUG_QUEUING_LOCKS
1543 TRACE_LOCK_HT( gtid+1, "rel exit: ", 0, 0 );
1544#endif
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001545
1546#if OMPT_SUPPORT
1547 /* nothing to do - no other thread is trying to shift blame */
1548#endif
1549
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00001550 return KMP_LOCK_RELEASED;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001551 }
1552 dequeued = FALSE;
1553
1554 }
1555 else {
1556
1557 tail = *tail_id_p;
1558 if ( head == tail ) { /* only one thread on the queue */
1559
1560#ifdef DEBUG_QUEUING_LOCKS
1561 if ( head <= 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail );
1562#endif
1563 KMP_DEBUG_ASSERT( head > 0 );
1564
1565 /* try (h,h)->(-1,0) */
1566 dequeued = KMP_COMPARE_AND_STORE_REL64( (kmp_int64 *) tail_id_p,
1567 KMP_PACK_64( head, head ), KMP_PACK_64( -1, 0 ) );
1568#ifdef DEBUG_QUEUING_LOCKS
1569 TRACE_LOCK( gtid+1, "rel deq: (h,h)->(-1,0)" );
1570#endif
1571
1572 }
1573 else {
1574 volatile kmp_int32 *waiting_id_p;
1575 kmp_info_t *head_thr = __kmp_thread_from_gtid( head - 1 );
1576 KMP_DEBUG_ASSERT( head_thr != NULL );
1577 waiting_id_p = & head_thr->th.th_next_waiting;
1578
1579 /* Does this require synchronous reads? */
1580#ifdef DEBUG_QUEUING_LOCKS
1581 if ( head <= 0 || tail <= 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail );
1582#endif
1583 KMP_DEBUG_ASSERT( head > 0 && tail > 0 );
1584
1585 /* try (h,t)->(h',t) or (t,t) */
1586
1587 KMP_MB();
1588 /* make sure enqueuing thread has time to update next waiting thread field */
Jonathan Peyton32a1ea12016-01-04 23:20:26 +00001589 *head_id_p = KMP_WAIT_YIELD((volatile kmp_uint32*)waiting_id_p, 0, KMP_NEQ, NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001590#ifdef DEBUG_QUEUING_LOCKS
1591 TRACE_LOCK( gtid+1, "rel deq: (h,t)->(h',t)" );
1592#endif
1593 dequeued = TRUE;
1594 }
1595 }
1596
1597 if ( dequeued ) {
1598 kmp_info_t *head_thr = __kmp_thread_from_gtid( head - 1 );
1599 KMP_DEBUG_ASSERT( head_thr != NULL );
1600
1601 /* Does this require synchronous reads? */
1602#ifdef DEBUG_QUEUING_LOCKS
1603 if ( head <= 0 || tail <= 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail );
1604#endif
1605 KMP_DEBUG_ASSERT( head > 0 && tail > 0 );
1606
1607 /* For clean code only.
1608 * Thread not released until next statement prevents race with acquire code.
1609 */
1610 head_thr->th.th_next_waiting = 0;
1611#ifdef DEBUG_QUEUING_LOCKS
1612 TRACE_LOCK_T( gtid+1, "rel nw=0 for t=", head );
1613#endif
1614
1615 KMP_MB();
1616 /* reset spin value */
1617 head_thr->th.th_spin_here = FALSE;
1618
1619 KA_TRACE( 1000, ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: after dequeuing\n",
1620 lck, gtid ));
1621#ifdef DEBUG_QUEUING_LOCKS
1622 TRACE_LOCK( gtid+1, "rel exit 2" );
1623#endif
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00001624 return KMP_LOCK_RELEASED;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001625 }
1626 /* KMP_CPU_PAUSE( ); don't want to make releasing thread hold up acquiring threads */
1627
1628#ifdef DEBUG_QUEUING_LOCKS
1629 TRACE_LOCK( gtid+1, "rel retry" );
1630#endif
1631
1632 } /* while */
1633 KMP_ASSERT2( 0, "should not get here" );
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00001634 return KMP_LOCK_RELEASED;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001635}
1636
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00001637static int
Jim Cownie5e8470a2013-09-27 10:38:44 +00001638__kmp_release_queuing_lock_with_checks( kmp_queuing_lock_t *lck,
1639 kmp_int32 gtid )
1640{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001641 char const * const func = "omp_unset_lock";
1642 KMP_MB(); /* in case another processor initialized lock */
1643 if ( lck->lk.initialized != lck ) {
1644 KMP_FATAL( LockIsUninitialized, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001645 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001646 if ( __kmp_is_queuing_lock_nestable( lck ) ) {
1647 KMP_FATAL( LockNestableUsedAsSimple, func );
1648 }
1649 if ( __kmp_get_queuing_lock_owner( lck ) == -1 ) {
1650 KMP_FATAL( LockUnsettingFree, func );
1651 }
1652 if ( __kmp_get_queuing_lock_owner( lck ) != gtid ) {
1653 KMP_FATAL( LockUnsettingSetByAnother, func );
1654 }
1655 lck->lk.owner_id = 0;
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00001656 return __kmp_release_queuing_lock( lck, gtid );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001657}
1658
1659void
1660__kmp_init_queuing_lock( kmp_queuing_lock_t *lck )
1661{
1662 lck->lk.location = NULL;
1663 lck->lk.head_id = 0;
1664 lck->lk.tail_id = 0;
1665 lck->lk.next_ticket = 0;
1666 lck->lk.now_serving = 0;
1667 lck->lk.owner_id = 0; // no thread owns the lock.
1668 lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks.
1669 lck->lk.initialized = lck;
1670
1671 KA_TRACE(1000, ("__kmp_init_queuing_lock: lock %p initialized\n", lck));
1672}
1673
1674static void
1675__kmp_init_queuing_lock_with_checks( kmp_queuing_lock_t * lck )
1676{
1677 __kmp_init_queuing_lock( lck );
1678}
1679
1680void
1681__kmp_destroy_queuing_lock( kmp_queuing_lock_t *lck )
1682{
1683 lck->lk.initialized = NULL;
1684 lck->lk.location = NULL;
1685 lck->lk.head_id = 0;
1686 lck->lk.tail_id = 0;
1687 lck->lk.next_ticket = 0;
1688 lck->lk.now_serving = 0;
1689 lck->lk.owner_id = 0;
1690 lck->lk.depth_locked = -1;
1691}
1692
1693static void
1694__kmp_destroy_queuing_lock_with_checks( kmp_queuing_lock_t *lck )
1695{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001696 char const * const func = "omp_destroy_lock";
1697 if ( lck->lk.initialized != lck ) {
1698 KMP_FATAL( LockIsUninitialized, func );
1699 }
1700 if ( __kmp_is_queuing_lock_nestable( lck ) ) {
1701 KMP_FATAL( LockNestableUsedAsSimple, func );
1702 }
1703 if ( __kmp_get_queuing_lock_owner( lck ) != -1 ) {
1704 KMP_FATAL( LockStillOwned, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001705 }
1706 __kmp_destroy_queuing_lock( lck );
1707}
1708
1709
1710//
1711// nested queuing locks
1712//
1713
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00001714int
Jim Cownie5e8470a2013-09-27 10:38:44 +00001715__kmp_acquire_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1716{
1717 KMP_DEBUG_ASSERT( gtid >= 0 );
1718
1719 if ( __kmp_get_queuing_lock_owner( lck ) == gtid ) {
1720 lck->lk.depth_locked += 1;
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00001721 return KMP_LOCK_ACQUIRED_NEXT;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001722 }
1723 else {
1724 __kmp_acquire_queuing_lock_timed_template<false>( lck, gtid );
1725 KMP_MB();
1726 lck->lk.depth_locked = 1;
1727 KMP_MB();
1728 lck->lk.owner_id = gtid + 1;
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00001729 return KMP_LOCK_ACQUIRED_FIRST;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001730 }
1731}
1732
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00001733static int
Jim Cownie5e8470a2013-09-27 10:38:44 +00001734__kmp_acquire_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1735{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001736 char const * const func = "omp_set_nest_lock";
1737 if ( lck->lk.initialized != lck ) {
1738 KMP_FATAL( LockIsUninitialized, func );
1739 }
1740 if ( ! __kmp_is_queuing_lock_nestable( lck ) ) {
1741 KMP_FATAL( LockSimpleUsedAsNestable, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001742 }
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00001743 return __kmp_acquire_nested_queuing_lock( lck, gtid );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001744}
1745
1746int
1747__kmp_test_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1748{
1749 int retval;
1750
1751 KMP_DEBUG_ASSERT( gtid >= 0 );
1752
1753 if ( __kmp_get_queuing_lock_owner( lck ) == gtid ) {
1754 retval = ++lck->lk.depth_locked;
1755 }
1756 else if ( !__kmp_test_queuing_lock( lck, gtid ) ) {
1757 retval = 0;
1758 }
1759 else {
1760 KMP_MB();
1761 retval = lck->lk.depth_locked = 1;
1762 KMP_MB();
1763 lck->lk.owner_id = gtid + 1;
1764 }
1765 return retval;
1766}
1767
1768static int
1769__kmp_test_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck,
1770 kmp_int32 gtid )
1771{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001772 char const * const func = "omp_test_nest_lock";
1773 if ( lck->lk.initialized != lck ) {
1774 KMP_FATAL( LockIsUninitialized, func );
1775 }
1776 if ( ! __kmp_is_queuing_lock_nestable( lck ) ) {
1777 KMP_FATAL( LockSimpleUsedAsNestable, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001778 }
1779 return __kmp_test_nested_queuing_lock( lck, gtid );
1780}
1781
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00001782int
Jim Cownie5e8470a2013-09-27 10:38:44 +00001783__kmp_release_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1784{
1785 KMP_DEBUG_ASSERT( gtid >= 0 );
1786
1787 KMP_MB();
1788 if ( --(lck->lk.depth_locked) == 0 ) {
1789 KMP_MB();
1790 lck->lk.owner_id = 0;
1791 __kmp_release_queuing_lock( lck, gtid );
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00001792 return KMP_LOCK_RELEASED;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001793 }
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00001794 return KMP_LOCK_STILL_HELD;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001795}
1796
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00001797static int
Jim Cownie5e8470a2013-09-27 10:38:44 +00001798__kmp_release_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1799{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001800 char const * const func = "omp_unset_nest_lock";
1801 KMP_MB(); /* in case another processor initialized lock */
1802 if ( lck->lk.initialized != lck ) {
1803 KMP_FATAL( LockIsUninitialized, func );
1804 }
1805 if ( ! __kmp_is_queuing_lock_nestable( lck ) ) {
1806 KMP_FATAL( LockSimpleUsedAsNestable, func );
1807 }
1808 if ( __kmp_get_queuing_lock_owner( lck ) == -1 ) {
1809 KMP_FATAL( LockUnsettingFree, func );
1810 }
1811 if ( __kmp_get_queuing_lock_owner( lck ) != gtid ) {
1812 KMP_FATAL( LockUnsettingSetByAnother, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001813 }
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00001814 return __kmp_release_nested_queuing_lock( lck, gtid );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001815}
1816
1817void
1818__kmp_init_nested_queuing_lock( kmp_queuing_lock_t * lck )
1819{
1820 __kmp_init_queuing_lock( lck );
1821 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
1822}
1823
1824static void
1825__kmp_init_nested_queuing_lock_with_checks( kmp_queuing_lock_t * lck )
1826{
1827 __kmp_init_nested_queuing_lock( lck );
1828}
1829
1830void
1831__kmp_destroy_nested_queuing_lock( kmp_queuing_lock_t *lck )
1832{
1833 __kmp_destroy_queuing_lock( lck );
1834 lck->lk.depth_locked = 0;
1835}
1836
1837static void
1838__kmp_destroy_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck )
1839{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001840 char const * const func = "omp_destroy_nest_lock";
1841 if ( lck->lk.initialized != lck ) {
1842 KMP_FATAL( LockIsUninitialized, func );
1843 }
1844 if ( ! __kmp_is_queuing_lock_nestable( lck ) ) {
1845 KMP_FATAL( LockSimpleUsedAsNestable, func );
1846 }
1847 if ( __kmp_get_queuing_lock_owner( lck ) != -1 ) {
1848 KMP_FATAL( LockStillOwned, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001849 }
1850 __kmp_destroy_nested_queuing_lock( lck );
1851}
1852
1853
1854//
1855// access functions to fields which don't exist for all lock kinds.
1856//
1857
1858static int
1859__kmp_is_queuing_lock_initialized( kmp_queuing_lock_t *lck )
1860{
1861 return lck == lck->lk.initialized;
1862}
1863
1864static const ident_t *
1865__kmp_get_queuing_lock_location( kmp_queuing_lock_t *lck )
1866{
1867 return lck->lk.location;
1868}
1869
1870static void
1871__kmp_set_queuing_lock_location( kmp_queuing_lock_t *lck, const ident_t *loc )
1872{
1873 lck->lk.location = loc;
1874}
1875
1876static kmp_lock_flags_t
1877__kmp_get_queuing_lock_flags( kmp_queuing_lock_t *lck )
1878{
1879 return lck->lk.flags;
1880}
1881
1882static void
1883__kmp_set_queuing_lock_flags( kmp_queuing_lock_t *lck, kmp_lock_flags_t flags )
1884{
1885 lck->lk.flags = flags;
1886}
1887
1888#if KMP_USE_ADAPTIVE_LOCKS
1889
1890/*
1891 RTM Adaptive locks
1892*/
1893
Jonathan Peytona03533d2015-12-11 21:49:08 +00001894#if KMP_COMPILER_ICC && __INTEL_COMPILER >= 1300
1895
1896#include <immintrin.h>
1897#define SOFT_ABORT_MASK (_XABORT_RETRY | _XABORT_CONFLICT | _XABORT_EXPLICIT)
1898
1899#else
Jim Cownie5e8470a2013-09-27 10:38:44 +00001900
1901// Values from the status register after failed speculation.
1902#define _XBEGIN_STARTED (~0u)
1903#define _XABORT_EXPLICIT (1 << 0)
1904#define _XABORT_RETRY (1 << 1)
1905#define _XABORT_CONFLICT (1 << 2)
1906#define _XABORT_CAPACITY (1 << 3)
1907#define _XABORT_DEBUG (1 << 4)
1908#define _XABORT_NESTED (1 << 5)
1909#define _XABORT_CODE(x) ((unsigned char)(((x) >> 24) & 0xFF))
1910
1911// Aborts for which it's worth trying again immediately
1912#define SOFT_ABORT_MASK (_XABORT_RETRY | _XABORT_CONFLICT | _XABORT_EXPLICIT)
1913
1914#define STRINGIZE_INTERNAL(arg) #arg
1915#define STRINGIZE(arg) STRINGIZE_INTERNAL(arg)
1916
1917// Access to RTM instructions
1918
1919/*
1920 A version of XBegin which returns -1 on speculation, and the value of EAX on an abort.
1921 This is the same definition as the compiler intrinsic that will be supported at some point.
1922*/
1923static __inline int _xbegin()
1924{
1925 int res = -1;
1926
1927#if KMP_OS_WINDOWS
1928#if KMP_ARCH_X86_64
1929 _asm {
1930 _emit 0xC7
1931 _emit 0xF8
1932 _emit 2
1933 _emit 0
1934 _emit 0
1935 _emit 0
1936 jmp L2
1937 mov res, eax
1938 L2:
1939 }
1940#else /* IA32 */
1941 _asm {
1942 _emit 0xC7
1943 _emit 0xF8
1944 _emit 2
1945 _emit 0
1946 _emit 0
1947 _emit 0
1948 jmp L2
1949 mov res, eax
1950 L2:
1951 }
1952#endif // KMP_ARCH_X86_64
1953#else
1954 /* Note that %eax must be noted as killed (clobbered), because
1955 * the XSR is returned in %eax(%rax) on abort. Other register
1956 * values are restored, so don't need to be killed.
1957 *
1958 * We must also mark 'res' as an input and an output, since otherwise
1959 * 'res=-1' may be dropped as being dead, whereas we do need the
1960 * assignment on the successful (i.e., non-abort) path.
1961 */
1962 __asm__ volatile ("1: .byte 0xC7; .byte 0xF8;\n"
1963 " .long 1f-1b-6\n"
1964 " jmp 2f\n"
1965 "1: movl %%eax,%0\n"
1966 "2:"
1967 :"+r"(res)::"memory","%eax");
1968#endif // KMP_OS_WINDOWS
1969 return res;
1970}
1971
1972/*
1973 Transaction end
1974*/
1975static __inline void _xend()
1976{
1977#if KMP_OS_WINDOWS
1978 __asm {
1979 _emit 0x0f
1980 _emit 0x01
1981 _emit 0xd5
1982 }
1983#else
1984 __asm__ volatile (".byte 0x0f; .byte 0x01; .byte 0xd5" :::"memory");
1985#endif
1986}
1987
1988/*
1989 This is a macro, the argument must be a single byte constant which
1990 can be evaluated by the inline assembler, since it is emitted as a
1991 byte into the assembly code.
1992*/
1993#if KMP_OS_WINDOWS
1994#define _xabort(ARG) \
1995 _asm _emit 0xc6 \
1996 _asm _emit 0xf8 \
1997 _asm _emit ARG
1998#else
1999#define _xabort(ARG) \
2000 __asm__ volatile (".byte 0xC6; .byte 0xF8; .byte " STRINGIZE(ARG) :::"memory");
2001#endif
2002
Jonathan Peytona03533d2015-12-11 21:49:08 +00002003#endif // KMP_COMPILER_ICC && __INTEL_COMPILER >= 1300
2004
Jim Cownie5e8470a2013-09-27 10:38:44 +00002005//
2006// Statistics is collected for testing purpose
2007//
2008#if KMP_DEBUG_ADAPTIVE_LOCKS
2009
2010// We accumulate speculative lock statistics when the lock is destroyed.
2011// We keep locks that haven't been destroyed in the liveLocks list
2012// so that we can grab their statistics too.
2013static kmp_adaptive_lock_statistics_t destroyedStats;
2014
2015// To hold the list of live locks.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002016static kmp_adaptive_lock_info_t liveLocks;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002017
2018// A lock so we can safely update the list of locks.
2019static kmp_bootstrap_lock_t chain_lock;
2020
2021// Initialize the list of stats.
2022void
2023__kmp_init_speculative_stats()
2024{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002025 kmp_adaptive_lock_info_t *lck = &liveLocks;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002026
2027 memset( ( void * ) & ( lck->stats ), 0, sizeof( lck->stats ) );
2028 lck->stats.next = lck;
2029 lck->stats.prev = lck;
2030
2031 KMP_ASSERT( lck->stats.next->stats.prev == lck );
2032 KMP_ASSERT( lck->stats.prev->stats.next == lck );
2033
2034 __kmp_init_bootstrap_lock( &chain_lock );
2035
2036}
2037
2038// Insert the lock into the circular list
2039static void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002040__kmp_remember_lock( kmp_adaptive_lock_info_t * lck )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002041{
2042 __kmp_acquire_bootstrap_lock( &chain_lock );
2043
2044 lck->stats.next = liveLocks.stats.next;
2045 lck->stats.prev = &liveLocks;
2046
2047 liveLocks.stats.next = lck;
2048 lck->stats.next->stats.prev = lck;
2049
2050 KMP_ASSERT( lck->stats.next->stats.prev == lck );
2051 KMP_ASSERT( lck->stats.prev->stats.next == lck );
2052
2053 __kmp_release_bootstrap_lock( &chain_lock );
2054}
2055
2056static void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002057__kmp_forget_lock( kmp_adaptive_lock_info_t * lck )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002058{
2059 KMP_ASSERT( lck->stats.next->stats.prev == lck );
2060 KMP_ASSERT( lck->stats.prev->stats.next == lck );
2061
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002062 kmp_adaptive_lock_info_t * n = lck->stats.next;
2063 kmp_adaptive_lock_info_t * p = lck->stats.prev;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002064
2065 n->stats.prev = p;
2066 p->stats.next = n;
2067}
2068
2069static void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002070__kmp_zero_speculative_stats( kmp_adaptive_lock_info_t * lck )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002071{
2072 memset( ( void * )&lck->stats, 0, sizeof( lck->stats ) );
2073 __kmp_remember_lock( lck );
2074}
2075
2076static void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002077__kmp_add_stats( kmp_adaptive_lock_statistics_t * t, kmp_adaptive_lock_info_t * lck )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002078{
2079 kmp_adaptive_lock_statistics_t volatile *s = &lck->stats;
2080
2081 t->nonSpeculativeAcquireAttempts += lck->acquire_attempts;
2082 t->successfulSpeculations += s->successfulSpeculations;
2083 t->hardFailedSpeculations += s->hardFailedSpeculations;
2084 t->softFailedSpeculations += s->softFailedSpeculations;
2085 t->nonSpeculativeAcquires += s->nonSpeculativeAcquires;
2086 t->lemmingYields += s->lemmingYields;
2087}
2088
2089static void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002090__kmp_accumulate_speculative_stats( kmp_adaptive_lock_info_t * lck)
Jim Cownie5e8470a2013-09-27 10:38:44 +00002091{
2092 kmp_adaptive_lock_statistics_t *t = &destroyedStats;
2093
2094 __kmp_acquire_bootstrap_lock( &chain_lock );
2095
2096 __kmp_add_stats( &destroyedStats, lck );
2097 __kmp_forget_lock( lck );
2098
2099 __kmp_release_bootstrap_lock( &chain_lock );
2100}
2101
2102static float
2103percent (kmp_uint32 count, kmp_uint32 total)
2104{
2105 return (total == 0) ? 0.0: (100.0 * count)/total;
2106}
2107
2108static
2109FILE * __kmp_open_stats_file()
2110{
2111 if (strcmp (__kmp_speculative_statsfile, "-") == 0)
2112 return stdout;
2113
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00002114 size_t buffLen = KMP_STRLEN( __kmp_speculative_statsfile ) + 20;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002115 char buffer[buffLen];
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00002116 KMP_SNPRINTF (&buffer[0], buffLen, __kmp_speculative_statsfile,
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002117 (kmp_int32)getpid());
Jim Cownie5e8470a2013-09-27 10:38:44 +00002118 FILE * result = fopen(&buffer[0], "w");
2119
2120 // Maybe we should issue a warning here...
2121 return result ? result : stdout;
2122}
2123
2124void
2125__kmp_print_speculative_stats()
2126{
2127 if (__kmp_user_lock_kind != lk_adaptive)
2128 return;
2129
2130 FILE * statsFile = __kmp_open_stats_file();
2131
2132 kmp_adaptive_lock_statistics_t total = destroyedStats;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002133 kmp_adaptive_lock_info_t *lck;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002134
2135 for (lck = liveLocks.stats.next; lck != &liveLocks; lck = lck->stats.next) {
2136 __kmp_add_stats( &total, lck );
2137 }
2138 kmp_adaptive_lock_statistics_t *t = &total;
2139 kmp_uint32 totalSections = t->nonSpeculativeAcquires + t->successfulSpeculations;
2140 kmp_uint32 totalSpeculations = t->successfulSpeculations + t->hardFailedSpeculations +
2141 t->softFailedSpeculations;
2142
2143 fprintf ( statsFile, "Speculative lock statistics (all approximate!)\n");
2144 fprintf ( statsFile, " Lock parameters: \n"
2145 " max_soft_retries : %10d\n"
2146 " max_badness : %10d\n",
2147 __kmp_adaptive_backoff_params.max_soft_retries,
2148 __kmp_adaptive_backoff_params.max_badness);
2149 fprintf( statsFile, " Non-speculative acquire attempts : %10d\n", t->nonSpeculativeAcquireAttempts );
2150 fprintf( statsFile, " Total critical sections : %10d\n", totalSections );
2151 fprintf( statsFile, " Successful speculations : %10d (%5.1f%%)\n",
2152 t->successfulSpeculations, percent( t->successfulSpeculations, totalSections ) );
2153 fprintf( statsFile, " Non-speculative acquires : %10d (%5.1f%%)\n",
2154 t->nonSpeculativeAcquires, percent( t->nonSpeculativeAcquires, totalSections ) );
2155 fprintf( statsFile, " Lemming yields : %10d\n\n", t->lemmingYields );
2156
2157 fprintf( statsFile, " Speculative acquire attempts : %10d\n", totalSpeculations );
2158 fprintf( statsFile, " Successes : %10d (%5.1f%%)\n",
2159 t->successfulSpeculations, percent( t->successfulSpeculations, totalSpeculations ) );
2160 fprintf( statsFile, " Soft failures : %10d (%5.1f%%)\n",
2161 t->softFailedSpeculations, percent( t->softFailedSpeculations, totalSpeculations ) );
2162 fprintf( statsFile, " Hard failures : %10d (%5.1f%%)\n",
2163 t->hardFailedSpeculations, percent( t->hardFailedSpeculations, totalSpeculations ) );
2164
2165 if (statsFile != stdout)
2166 fclose( statsFile );
2167}
2168
2169# define KMP_INC_STAT(lck,stat) ( lck->lk.adaptive.stats.stat++ )
2170#else
2171# define KMP_INC_STAT(lck,stat)
2172
2173#endif // KMP_DEBUG_ADAPTIVE_LOCKS
2174
2175static inline bool
2176__kmp_is_unlocked_queuing_lock( kmp_queuing_lock_t *lck )
2177{
2178 // It is enough to check that the head_id is zero.
2179 // We don't also need to check the tail.
2180 bool res = lck->lk.head_id == 0;
2181
2182 // We need a fence here, since we must ensure that no memory operations
2183 // from later in this thread float above that read.
Jim Cownie181b4bb2013-12-23 17:28:57 +00002184#if KMP_COMPILER_ICC
Jim Cownie5e8470a2013-09-27 10:38:44 +00002185 _mm_mfence();
Jim Cownie181b4bb2013-12-23 17:28:57 +00002186#else
2187 __sync_synchronize();
Jim Cownie5e8470a2013-09-27 10:38:44 +00002188#endif
2189
2190 return res;
2191}
2192
2193// Functions for manipulating the badness
2194static __inline void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002195__kmp_update_badness_after_success( kmp_adaptive_lock_t *lck )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002196{
2197 // Reset the badness to zero so we eagerly try to speculate again
2198 lck->lk.adaptive.badness = 0;
2199 KMP_INC_STAT(lck,successfulSpeculations);
2200}
2201
2202// Create a bit mask with one more set bit.
2203static __inline void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002204__kmp_step_badness( kmp_adaptive_lock_t *lck )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002205{
2206 kmp_uint32 newBadness = ( lck->lk.adaptive.badness << 1 ) | 1;
2207 if ( newBadness > lck->lk.adaptive.max_badness) {
2208 return;
2209 } else {
2210 lck->lk.adaptive.badness = newBadness;
2211 }
2212}
2213
2214// Check whether speculation should be attempted.
2215static __inline int
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002216__kmp_should_speculate( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002217{
2218 kmp_uint32 badness = lck->lk.adaptive.badness;
2219 kmp_uint32 attempts= lck->lk.adaptive.acquire_attempts;
2220 int res = (attempts & badness) == 0;
2221 return res;
2222}
2223
2224// Attempt to acquire only the speculative lock.
2225// Does not back off to the non-speculative lock.
2226//
2227static int
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002228__kmp_test_adaptive_lock_only( kmp_adaptive_lock_t * lck, kmp_int32 gtid )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002229{
2230 int retries = lck->lk.adaptive.max_soft_retries;
2231
2232 // We don't explicitly count the start of speculation, rather we record
2233 // the results (success, hard fail, soft fail). The sum of all of those
2234 // is the total number of times we started speculation since all
2235 // speculations must end one of those ways.
2236 do
2237 {
2238 kmp_uint32 status = _xbegin();
2239 // Switch this in to disable actual speculation but exercise
2240 // at least some of the rest of the code. Useful for debugging...
2241 // kmp_uint32 status = _XABORT_NESTED;
2242
2243 if (status == _XBEGIN_STARTED )
2244 { /* We have successfully started speculation
2245 * Check that no-one acquired the lock for real between when we last looked
2246 * and now. This also gets the lock cache line into our read-set,
2247 * which we need so that we'll abort if anyone later claims it for real.
2248 */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002249 if (! __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002250 {
2251 // Lock is now visibly acquired, so someone beat us to it.
2252 // Abort the transaction so we'll restart from _xbegin with the
2253 // failure status.
Jonathan Peytona03533d2015-12-11 21:49:08 +00002254 _xabort(0x01);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002255 KMP_ASSERT2( 0, "should not get here" );
2256 }
2257 return 1; // Lock has been acquired (speculatively)
2258 } else {
2259 // We have aborted, update the statistics
2260 if ( status & SOFT_ABORT_MASK)
2261 {
2262 KMP_INC_STAT(lck,softFailedSpeculations);
2263 // and loop round to retry.
2264 }
2265 else
2266 {
2267 KMP_INC_STAT(lck,hardFailedSpeculations);
2268 // Give up if we had a hard failure.
2269 break;
2270 }
2271 }
2272 } while( retries-- ); // Loop while we have retries, and didn't fail hard.
2273
2274 // Either we had a hard failure or we didn't succeed softly after
2275 // the full set of attempts, so back off the badness.
2276 __kmp_step_badness( lck );
2277 return 0;
2278}
2279
2280// Attempt to acquire the speculative lock, or back off to the non-speculative one
2281// if the speculative lock cannot be acquired.
2282// We can succeed speculatively, non-speculatively, or fail.
2283static int
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002284__kmp_test_adaptive_lock( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002285{
2286 // First try to acquire the lock speculatively
2287 if ( __kmp_should_speculate( lck, gtid ) && __kmp_test_adaptive_lock_only( lck, gtid ) )
2288 return 1;
2289
2290 // Speculative acquisition failed, so try to acquire it non-speculatively.
2291 // Count the non-speculative acquire attempt
2292 lck->lk.adaptive.acquire_attempts++;
2293
2294 // Use base, non-speculative lock.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002295 if ( __kmp_test_queuing_lock( GET_QLK_PTR(lck), gtid ) )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002296 {
2297 KMP_INC_STAT(lck,nonSpeculativeAcquires);
2298 return 1; // Lock is acquired (non-speculatively)
2299 }
2300 else
2301 {
2302 return 0; // Failed to acquire the lock, it's already visibly locked.
2303 }
2304}
2305
2306static int
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002307__kmp_test_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002308{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002309 char const * const func = "omp_test_lock";
2310 if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) {
2311 KMP_FATAL( LockIsUninitialized, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002312 }
2313
2314 int retval = __kmp_test_adaptive_lock( lck, gtid );
2315
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002316 if ( retval ) {
2317 lck->lk.qlk.owner_id = gtid + 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002318 }
2319 return retval;
2320}
2321
2322// Block until we can acquire a speculative, adaptive lock.
2323// We check whether we should be trying to speculate.
2324// If we should be, we check the real lock to see if it is free,
2325// and, if not, pause without attempting to acquire it until it is.
2326// Then we try the speculative acquire.
2327// This means that although we suffer from lemmings a little (
2328// because all we can't acquire the lock speculatively until
2329// the queue of threads waiting has cleared), we don't get into a
2330// state where we can never acquire the lock speculatively (because we
2331// force the queue to clear by preventing new arrivals from entering the
2332// queue).
2333// This does mean that when we're trying to break lemmings, the lock
2334// is no longer fair. However OpenMP makes no guarantee that its
2335// locks are fair, so this isn't a real problem.
2336static void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002337__kmp_acquire_adaptive_lock( kmp_adaptive_lock_t * lck, kmp_int32 gtid )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002338{
2339 if ( __kmp_should_speculate( lck, gtid ) )
2340 {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002341 if ( __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002342 {
2343 if ( __kmp_test_adaptive_lock_only( lck , gtid ) )
2344 return;
2345 // We tried speculation and failed, so give up.
2346 }
2347 else
2348 {
2349 // We can't try speculation until the lock is free, so we
2350 // pause here (without suspending on the queueing lock,
2351 // to allow it to drain, then try again.
2352 // All other threads will also see the same result for
2353 // shouldSpeculate, so will be doing the same if they
2354 // try to claim the lock from now on.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002355 while ( ! __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002356 {
2357 KMP_INC_STAT(lck,lemmingYields);
2358 __kmp_yield (TRUE);
2359 }
2360
2361 if ( __kmp_test_adaptive_lock_only( lck, gtid ) )
2362 return;
2363 }
2364 }
2365
2366 // Speculative acquisition failed, so acquire it non-speculatively.
2367 // Count the non-speculative acquire attempt
2368 lck->lk.adaptive.acquire_attempts++;
2369
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002370 __kmp_acquire_queuing_lock_timed_template<FALSE>( GET_QLK_PTR(lck), gtid );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002371 // We have acquired the base lock, so count that.
2372 KMP_INC_STAT(lck,nonSpeculativeAcquires );
2373}
2374
2375static void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002376__kmp_acquire_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002377{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002378 char const * const func = "omp_set_lock";
2379 if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) {
2380 KMP_FATAL( LockIsUninitialized, func );
2381 }
2382 if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) == gtid ) {
2383 KMP_FATAL( LockIsAlreadyOwned, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002384 }
2385
2386 __kmp_acquire_adaptive_lock( lck, gtid );
2387
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002388 lck->lk.qlk.owner_id = gtid + 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002389}
2390
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00002391static int
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002392__kmp_release_adaptive_lock( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002393{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002394 if ( __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002395 { // If the lock doesn't look claimed we must be speculating.
2396 // (Or the user's code is buggy and they're releasing without locking;
2397 // if we had XTEST we'd be able to check that case...)
2398 _xend(); // Exit speculation
2399 __kmp_update_badness_after_success( lck );
2400 }
2401 else
2402 { // Since the lock *is* visibly locked we're not speculating,
2403 // so should use the underlying lock's release scheme.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002404 __kmp_release_queuing_lock( GET_QLK_PTR(lck), gtid );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002405 }
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00002406 return KMP_LOCK_RELEASED;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002407}
2408
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00002409static int
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002410__kmp_release_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002411{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002412 char const * const func = "omp_unset_lock";
2413 KMP_MB(); /* in case another processor initialized lock */
2414 if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) {
2415 KMP_FATAL( LockIsUninitialized, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002416 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002417 if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) == -1 ) {
2418 KMP_FATAL( LockUnsettingFree, func );
2419 }
2420 if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) != gtid ) {
2421 KMP_FATAL( LockUnsettingSetByAnother, func );
2422 }
2423 lck->lk.qlk.owner_id = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002424 __kmp_release_adaptive_lock( lck, gtid );
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00002425 return KMP_LOCK_RELEASED;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002426}
2427
2428static void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002429__kmp_init_adaptive_lock( kmp_adaptive_lock_t *lck )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002430{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002431 __kmp_init_queuing_lock( GET_QLK_PTR(lck) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002432 lck->lk.adaptive.badness = 0;
2433 lck->lk.adaptive.acquire_attempts = 0; //nonSpeculativeAcquireAttempts = 0;
2434 lck->lk.adaptive.max_soft_retries = __kmp_adaptive_backoff_params.max_soft_retries;
2435 lck->lk.adaptive.max_badness = __kmp_adaptive_backoff_params.max_badness;
2436#if KMP_DEBUG_ADAPTIVE_LOCKS
2437 __kmp_zero_speculative_stats( &lck->lk.adaptive );
2438#endif
2439 KA_TRACE(1000, ("__kmp_init_adaptive_lock: lock %p initialized\n", lck));
2440}
2441
2442static void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002443__kmp_init_adaptive_lock_with_checks( kmp_adaptive_lock_t * lck )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002444{
2445 __kmp_init_adaptive_lock( lck );
2446}
2447
2448static void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002449__kmp_destroy_adaptive_lock( kmp_adaptive_lock_t *lck )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002450{
2451#if KMP_DEBUG_ADAPTIVE_LOCKS
2452 __kmp_accumulate_speculative_stats( &lck->lk.adaptive );
2453#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002454 __kmp_destroy_queuing_lock (GET_QLK_PTR(lck));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002455 // Nothing needed for the speculative part.
2456}
2457
2458static void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002459__kmp_destroy_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002460{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002461 char const * const func = "omp_destroy_lock";
2462 if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) {
2463 KMP_FATAL( LockIsUninitialized, func );
2464 }
2465 if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) != -1 ) {
2466 KMP_FATAL( LockStillOwned, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002467 }
2468 __kmp_destroy_adaptive_lock( lck );
2469}
2470
2471
2472#endif // KMP_USE_ADAPTIVE_LOCKS
2473
2474
2475/* ------------------------------------------------------------------------ */
2476/* DRDPA ticket locks */
2477/* "DRDPA" means Dynamically Reconfigurable Distributed Polling Area */
2478
2479static kmp_int32
2480__kmp_get_drdpa_lock_owner( kmp_drdpa_lock_t *lck )
2481{
2482 return TCR_4( lck->lk.owner_id ) - 1;
2483}
2484
2485static inline bool
2486__kmp_is_drdpa_lock_nestable( kmp_drdpa_lock_t *lck )
2487{
2488 return lck->lk.depth_locked != -1;
2489}
2490
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002491__forceinline static int
Jim Cownie5e8470a2013-09-27 10:38:44 +00002492__kmp_acquire_drdpa_lock_timed_template( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2493{
2494 kmp_uint64 ticket = KMP_TEST_THEN_INC64((kmp_int64 *)&lck->lk.next_ticket);
2495 kmp_uint64 mask = TCR_8(lck->lk.mask); // volatile load
2496 volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls
2497 = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2498 TCR_PTR(lck->lk.polls); // volatile load
2499
2500#ifdef USE_LOCK_PROFILE
2501 if (TCR_8(polls[ticket & mask].poll) != ticket)
2502 __kmp_printf("LOCK CONTENTION: %p\n", lck);
2503 /* else __kmp_printf( "." );*/
2504#endif /* USE_LOCK_PROFILE */
2505
2506 //
2507 // Now spin-wait, but reload the polls pointer and mask, in case the
2508 // polling area has been reconfigured. Unless it is reconfigured, the
2509 // reloads stay in L1 cache and are cheap.
2510 //
2511 // Keep this code in sync with KMP_WAIT_YIELD, in kmp_dispatch.c !!!
2512 //
2513 // The current implementation of KMP_WAIT_YIELD doesn't allow for mask
2514 // and poll to be re-read every spin iteration.
2515 //
2516 kmp_uint32 spins;
2517
2518 KMP_FSYNC_PREPARE(lck);
2519 KMP_INIT_YIELD(spins);
Paul Osmialowski52bef532016-05-07 00:00:00 +00002520 while (TCR_8(polls[ticket & mask].poll) < ticket) { // volatile load
Jim Cownie5e8470a2013-09-27 10:38:44 +00002521 // If we are oversubscribed,
Alp Toker8f2d3f02014-02-24 10:40:15 +00002522 // or have waited a bit (and KMP_LIBRARY=turnaround), then yield.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002523 // CPU Pause is in the macros for yield.
2524 //
2525 KMP_YIELD(TCR_4(__kmp_nth)
2526 > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc));
2527 KMP_YIELD_SPIN(spins);
2528
2529 // Re-read the mask and the poll pointer from the lock structure.
2530 //
2531 // Make certain that "mask" is read before "polls" !!!
2532 //
2533 // If another thread picks reconfigures the polling area and updates
2534 // their values, and we get the new value of mask and the old polls
2535 // pointer, we could access memory beyond the end of the old polling
2536 // area.
2537 //
2538 mask = TCR_8(lck->lk.mask); // volatile load
2539 polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2540 TCR_PTR(lck->lk.polls); // volatile load
2541 }
2542
2543 //
2544 // Critical section starts here
2545 //
2546 KMP_FSYNC_ACQUIRED(lck);
2547 KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld acquired lock %p\n",
2548 ticket, lck));
2549 lck->lk.now_serving = ticket; // non-volatile store
2550
2551 //
2552 // Deallocate a garbage polling area if we know that we are the last
2553 // thread that could possibly access it.
2554 //
2555 // The >= check is in case __kmp_test_drdpa_lock() allocated the cleanup
2556 // ticket.
2557 //
2558 if ((lck->lk.old_polls != NULL) && (ticket >= lck->lk.cleanup_ticket)) {
2559 __kmp_free((void *)lck->lk.old_polls);
2560 lck->lk.old_polls = NULL;
2561 lck->lk.cleanup_ticket = 0;
2562 }
2563
2564 //
2565 // Check to see if we should reconfigure the polling area.
2566 // If there is still a garbage polling area to be deallocated from a
2567 // previous reconfiguration, let a later thread reconfigure it.
2568 //
2569 if (lck->lk.old_polls == NULL) {
2570 bool reconfigure = false;
2571 volatile struct kmp_base_drdpa_lock::kmp_lock_poll *old_polls = polls;
2572 kmp_uint32 num_polls = TCR_4(lck->lk.num_polls);
2573
2574 if (TCR_4(__kmp_nth)
2575 > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) {
2576 //
2577 // We are in oversubscription mode. Contract the polling area
2578 // down to a single location, if that hasn't been done already.
2579 //
2580 if (num_polls > 1) {
2581 reconfigure = true;
2582 num_polls = TCR_4(lck->lk.num_polls);
2583 mask = 0;
2584 num_polls = 1;
2585 polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2586 __kmp_allocate(num_polls * sizeof(*polls));
2587 polls[0].poll = ticket;
2588 }
2589 }
2590 else {
2591 //
2592 // We are in under/fully subscribed mode. Check the number of
2593 // threads waiting on the lock. The size of the polling area
2594 // should be at least the number of threads waiting.
2595 //
2596 kmp_uint64 num_waiting = TCR_8(lck->lk.next_ticket) - ticket - 1;
2597 if (num_waiting > num_polls) {
2598 kmp_uint32 old_num_polls = num_polls;
2599 reconfigure = true;
2600 do {
2601 mask = (mask << 1) | 1;
2602 num_polls *= 2;
2603 } while (num_polls <= num_waiting);
2604
2605 //
2606 // Allocate the new polling area, and copy the relevant portion
2607 // of the old polling area to the new area. __kmp_allocate()
2608 // zeroes the memory it allocates, and most of the old area is
2609 // just zero padding, so we only copy the release counters.
2610 //
2611 polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2612 __kmp_allocate(num_polls * sizeof(*polls));
2613 kmp_uint32 i;
2614 for (i = 0; i < old_num_polls; i++) {
2615 polls[i].poll = old_polls[i].poll;
2616 }
2617 }
2618 }
2619
2620 if (reconfigure) {
2621 //
2622 // Now write the updated fields back to the lock structure.
2623 //
2624 // Make certain that "polls" is written before "mask" !!!
2625 //
2626 // If another thread picks up the new value of mask and the old
2627 // polls pointer , it could access memory beyond the end of the
2628 // old polling area.
2629 //
2630 // On x86, we need memory fences.
2631 //
2632 KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld reconfiguring lock %p to %d polls\n",
2633 ticket, lck, num_polls));
2634
2635 lck->lk.old_polls = old_polls; // non-volatile store
2636 lck->lk.polls = polls; // volatile store
2637
2638 KMP_MB();
2639
2640 lck->lk.num_polls = num_polls; // non-volatile store
2641 lck->lk.mask = mask; // volatile store
2642
2643 KMP_MB();
2644
2645 //
2646 // Only after the new polling area and mask have been flushed
2647 // to main memory can we update the cleanup ticket field.
2648 //
2649 // volatile load / non-volatile store
2650 //
2651 lck->lk.cleanup_ticket = TCR_8(lck->lk.next_ticket);
2652 }
2653 }
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002654 return KMP_LOCK_ACQUIRED_FIRST;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002655}
2656
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002657int
Jim Cownie5e8470a2013-09-27 10:38:44 +00002658__kmp_acquire_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2659{
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002660 return __kmp_acquire_drdpa_lock_timed_template( lck, gtid );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002661}
2662
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002663static int
Jim Cownie5e8470a2013-09-27 10:38:44 +00002664__kmp_acquire_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2665{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002666 char const * const func = "omp_set_lock";
2667 if ( lck->lk.initialized != lck ) {
2668 KMP_FATAL( LockIsUninitialized, func );
2669 }
2670 if ( __kmp_is_drdpa_lock_nestable( lck ) ) {
2671 KMP_FATAL( LockNestableUsedAsSimple, func );
2672 }
2673 if ( ( gtid >= 0 ) && ( __kmp_get_drdpa_lock_owner( lck ) == gtid ) ) {
2674 KMP_FATAL( LockIsAlreadyOwned, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002675 }
2676
2677 __kmp_acquire_drdpa_lock( lck, gtid );
2678
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002679 lck->lk.owner_id = gtid + 1;
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002680 return KMP_LOCK_ACQUIRED_FIRST;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002681}
2682
2683int
2684__kmp_test_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2685{
2686 //
2687 // First get a ticket, then read the polls pointer and the mask.
2688 // The polls pointer must be read before the mask!!! (See above)
2689 //
2690 kmp_uint64 ticket = TCR_8(lck->lk.next_ticket); // volatile load
2691 volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls
2692 = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2693 TCR_PTR(lck->lk.polls); // volatile load
2694 kmp_uint64 mask = TCR_8(lck->lk.mask); // volatile load
2695 if (TCR_8(polls[ticket & mask].poll) == ticket) {
2696 kmp_uint64 next_ticket = ticket + 1;
2697 if (KMP_COMPARE_AND_STORE_ACQ64((kmp_int64 *)&lck->lk.next_ticket,
2698 ticket, next_ticket)) {
2699 KMP_FSYNC_ACQUIRED(lck);
2700 KA_TRACE(1000, ("__kmp_test_drdpa_lock: ticket #%lld acquired lock %p\n",
2701 ticket, lck));
2702 lck->lk.now_serving = ticket; // non-volatile store
2703
2704 //
Alp Toker8f2d3f02014-02-24 10:40:15 +00002705 // Since no threads are waiting, there is no possibility that
Jim Cownie5e8470a2013-09-27 10:38:44 +00002706 // we would want to reconfigure the polling area. We might
2707 // have the cleanup ticket value (which says that it is now
2708 // safe to deallocate old_polls), but we'll let a later thread
2709 // which calls __kmp_acquire_lock do that - this routine
2710 // isn't supposed to block, and we would risk blocks if we
2711 // called __kmp_free() to do the deallocation.
2712 //
2713 return TRUE;
2714 }
2715 }
2716 return FALSE;
2717}
2718
2719static int
2720__kmp_test_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2721{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002722 char const * const func = "omp_test_lock";
2723 if ( lck->lk.initialized != lck ) {
2724 KMP_FATAL( LockIsUninitialized, func );
2725 }
2726 if ( __kmp_is_drdpa_lock_nestable( lck ) ) {
2727 KMP_FATAL( LockNestableUsedAsSimple, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002728 }
2729
2730 int retval = __kmp_test_drdpa_lock( lck, gtid );
2731
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002732 if ( retval ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002733 lck->lk.owner_id = gtid + 1;
2734 }
2735 return retval;
2736}
2737
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00002738int
Jim Cownie5e8470a2013-09-27 10:38:44 +00002739__kmp_release_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2740{
2741 //
2742 // Read the ticket value from the lock data struct, then the polls
2743 // pointer and the mask. The polls pointer must be read before the
2744 // mask!!! (See above)
2745 //
2746 kmp_uint64 ticket = lck->lk.now_serving + 1; // non-volatile load
2747 volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls
2748 = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2749 TCR_PTR(lck->lk.polls); // volatile load
2750 kmp_uint64 mask = TCR_8(lck->lk.mask); // volatile load
2751 KA_TRACE(1000, ("__kmp_release_drdpa_lock: ticket #%lld released lock %p\n",
2752 ticket - 1, lck));
2753 KMP_FSYNC_RELEASING(lck);
2754 KMP_ST_REL64(&(polls[ticket & mask].poll), ticket); // volatile store
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00002755 return KMP_LOCK_RELEASED;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002756}
2757
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00002758static int
Jim Cownie5e8470a2013-09-27 10:38:44 +00002759__kmp_release_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2760{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002761 char const * const func = "omp_unset_lock";
2762 KMP_MB(); /* in case another processor initialized lock */
2763 if ( lck->lk.initialized != lck ) {
2764 KMP_FATAL( LockIsUninitialized, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002765 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002766 if ( __kmp_is_drdpa_lock_nestable( lck ) ) {
2767 KMP_FATAL( LockNestableUsedAsSimple, func );
2768 }
2769 if ( __kmp_get_drdpa_lock_owner( lck ) == -1 ) {
2770 KMP_FATAL( LockUnsettingFree, func );
2771 }
2772 if ( ( gtid >= 0 ) && ( __kmp_get_drdpa_lock_owner( lck ) >= 0 )
2773 && ( __kmp_get_drdpa_lock_owner( lck ) != gtid ) ) {
2774 KMP_FATAL( LockUnsettingSetByAnother, func );
2775 }
2776 lck->lk.owner_id = 0;
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00002777 return __kmp_release_drdpa_lock( lck, gtid );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002778}
2779
2780void
2781__kmp_init_drdpa_lock( kmp_drdpa_lock_t *lck )
2782{
2783 lck->lk.location = NULL;
2784 lck->lk.mask = 0;
2785 lck->lk.num_polls = 1;
2786 lck->lk.polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2787 __kmp_allocate(lck->lk.num_polls * sizeof(*(lck->lk.polls)));
2788 lck->lk.cleanup_ticket = 0;
2789 lck->lk.old_polls = NULL;
2790 lck->lk.next_ticket = 0;
2791 lck->lk.now_serving = 0;
2792 lck->lk.owner_id = 0; // no thread owns the lock.
2793 lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks.
2794 lck->lk.initialized = lck;
2795
2796 KA_TRACE(1000, ("__kmp_init_drdpa_lock: lock %p initialized\n", lck));
2797}
2798
2799static void
2800__kmp_init_drdpa_lock_with_checks( kmp_drdpa_lock_t * lck )
2801{
2802 __kmp_init_drdpa_lock( lck );
2803}
2804
2805void
2806__kmp_destroy_drdpa_lock( kmp_drdpa_lock_t *lck )
2807{
2808 lck->lk.initialized = NULL;
2809 lck->lk.location = NULL;
2810 if (lck->lk.polls != NULL) {
2811 __kmp_free((void *)lck->lk.polls);
2812 lck->lk.polls = NULL;
2813 }
2814 if (lck->lk.old_polls != NULL) {
2815 __kmp_free((void *)lck->lk.old_polls);
2816 lck->lk.old_polls = NULL;
2817 }
2818 lck->lk.mask = 0;
2819 lck->lk.num_polls = 0;
2820 lck->lk.cleanup_ticket = 0;
2821 lck->lk.next_ticket = 0;
2822 lck->lk.now_serving = 0;
2823 lck->lk.owner_id = 0;
2824 lck->lk.depth_locked = -1;
2825}
2826
2827static void
2828__kmp_destroy_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck )
2829{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002830 char const * const func = "omp_destroy_lock";
2831 if ( lck->lk.initialized != lck ) {
2832 KMP_FATAL( LockIsUninitialized, func );
2833 }
2834 if ( __kmp_is_drdpa_lock_nestable( lck ) ) {
2835 KMP_FATAL( LockNestableUsedAsSimple, func );
2836 }
2837 if ( __kmp_get_drdpa_lock_owner( lck ) != -1 ) {
2838 KMP_FATAL( LockStillOwned, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002839 }
2840 __kmp_destroy_drdpa_lock( lck );
2841}
2842
2843
2844//
2845// nested drdpa ticket locks
2846//
2847
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002848int
Jim Cownie5e8470a2013-09-27 10:38:44 +00002849__kmp_acquire_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2850{
2851 KMP_DEBUG_ASSERT( gtid >= 0 );
2852
2853 if ( __kmp_get_drdpa_lock_owner( lck ) == gtid ) {
2854 lck->lk.depth_locked += 1;
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002855 return KMP_LOCK_ACQUIRED_NEXT;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002856 }
2857 else {
2858 __kmp_acquire_drdpa_lock_timed_template( lck, gtid );
2859 KMP_MB();
2860 lck->lk.depth_locked = 1;
2861 KMP_MB();
2862 lck->lk.owner_id = gtid + 1;
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002863 return KMP_LOCK_ACQUIRED_FIRST;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002864 }
2865}
2866
2867static void
2868__kmp_acquire_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2869{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002870 char const * const func = "omp_set_nest_lock";
2871 if ( lck->lk.initialized != lck ) {
2872 KMP_FATAL( LockIsUninitialized, func );
2873 }
2874 if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) {
2875 KMP_FATAL( LockSimpleUsedAsNestable, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002876 }
2877 __kmp_acquire_nested_drdpa_lock( lck, gtid );
2878}
2879
2880int
2881__kmp_test_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2882{
2883 int retval;
2884
2885 KMP_DEBUG_ASSERT( gtid >= 0 );
2886
2887 if ( __kmp_get_drdpa_lock_owner( lck ) == gtid ) {
2888 retval = ++lck->lk.depth_locked;
2889 }
2890 else if ( !__kmp_test_drdpa_lock( lck, gtid ) ) {
2891 retval = 0;
2892 }
2893 else {
2894 KMP_MB();
2895 retval = lck->lk.depth_locked = 1;
2896 KMP_MB();
2897 lck->lk.owner_id = gtid + 1;
2898 }
2899 return retval;
2900}
2901
2902static int
2903__kmp_test_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2904{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002905 char const * const func = "omp_test_nest_lock";
2906 if ( lck->lk.initialized != lck ) {
2907 KMP_FATAL( LockIsUninitialized, func );
2908 }
2909 if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) {
2910 KMP_FATAL( LockSimpleUsedAsNestable, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002911 }
2912 return __kmp_test_nested_drdpa_lock( lck, gtid );
2913}
2914
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00002915int
Jim Cownie5e8470a2013-09-27 10:38:44 +00002916__kmp_release_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2917{
2918 KMP_DEBUG_ASSERT( gtid >= 0 );
2919
2920 KMP_MB();
2921 if ( --(lck->lk.depth_locked) == 0 ) {
2922 KMP_MB();
2923 lck->lk.owner_id = 0;
2924 __kmp_release_drdpa_lock( lck, gtid );
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00002925 return KMP_LOCK_RELEASED;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002926 }
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00002927 return KMP_LOCK_STILL_HELD;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002928}
2929
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00002930static int
Jim Cownie5e8470a2013-09-27 10:38:44 +00002931__kmp_release_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2932{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002933 char const * const func = "omp_unset_nest_lock";
2934 KMP_MB(); /* in case another processor initialized lock */
2935 if ( lck->lk.initialized != lck ) {
2936 KMP_FATAL( LockIsUninitialized, func );
2937 }
2938 if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) {
2939 KMP_FATAL( LockSimpleUsedAsNestable, func );
2940 }
2941 if ( __kmp_get_drdpa_lock_owner( lck ) == -1 ) {
2942 KMP_FATAL( LockUnsettingFree, func );
2943 }
2944 if ( __kmp_get_drdpa_lock_owner( lck ) != gtid ) {
2945 KMP_FATAL( LockUnsettingSetByAnother, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002946 }
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00002947 return __kmp_release_nested_drdpa_lock( lck, gtid );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002948}
2949
2950void
2951__kmp_init_nested_drdpa_lock( kmp_drdpa_lock_t * lck )
2952{
2953 __kmp_init_drdpa_lock( lck );
2954 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
2955}
2956
2957static void
2958__kmp_init_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t * lck )
2959{
2960 __kmp_init_nested_drdpa_lock( lck );
2961}
2962
2963void
2964__kmp_destroy_nested_drdpa_lock( kmp_drdpa_lock_t *lck )
2965{
2966 __kmp_destroy_drdpa_lock( lck );
2967 lck->lk.depth_locked = 0;
2968}
2969
2970static void
2971__kmp_destroy_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck )
2972{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002973 char const * const func = "omp_destroy_nest_lock";
2974 if ( lck->lk.initialized != lck ) {
2975 KMP_FATAL( LockIsUninitialized, func );
2976 }
2977 if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) {
2978 KMP_FATAL( LockSimpleUsedAsNestable, func );
2979 }
2980 if ( __kmp_get_drdpa_lock_owner( lck ) != -1 ) {
2981 KMP_FATAL( LockStillOwned, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002982 }
2983 __kmp_destroy_nested_drdpa_lock( lck );
2984}
2985
2986
2987//
2988// access functions to fields which don't exist for all lock kinds.
2989//
2990
2991static int
2992__kmp_is_drdpa_lock_initialized( kmp_drdpa_lock_t *lck )
2993{
2994 return lck == lck->lk.initialized;
2995}
2996
2997static const ident_t *
2998__kmp_get_drdpa_lock_location( kmp_drdpa_lock_t *lck )
2999{
3000 return lck->lk.location;
3001}
3002
3003static void
3004__kmp_set_drdpa_lock_location( kmp_drdpa_lock_t *lck, const ident_t *loc )
3005{
3006 lck->lk.location = loc;
3007}
3008
3009static kmp_lock_flags_t
3010__kmp_get_drdpa_lock_flags( kmp_drdpa_lock_t *lck )
3011{
3012 return lck->lk.flags;
3013}
3014
3015static void
3016__kmp_set_drdpa_lock_flags( kmp_drdpa_lock_t *lck, kmp_lock_flags_t flags )
3017{
3018 lck->lk.flags = flags;
3019}
3020
Jonathan Peyton377aa402016-04-14 16:00:37 +00003021// Time stamp counter
3022#if KMP_ARCH_X86 || KMP_ARCH_X86_64
3023# define __kmp_tsc() __kmp_hardware_timestamp()
3024// Runtime's default backoff parameters
3025kmp_backoff_t __kmp_spin_backoff_params = { 1, 4096, 100 };
3026#else
3027// Use nanoseconds for other platforms
3028extern kmp_uint64 __kmp_now_nsec();
3029kmp_backoff_t __kmp_spin_backoff_params = { 1, 256, 100 };
3030# define __kmp_tsc() __kmp_now_nsec()
3031#endif
3032
3033// A useful predicate for dealing with timestamps that may wrap.
3034// Is a before b?
3035// Since the timestamps may wrap, this is asking whether it's
3036// shorter to go clockwise from a to b around the clock-face, or anti-clockwise.
3037// Times where going clockwise is less distance than going anti-clockwise
3038// are in the future, others are in the past.
3039// e.g.) a = MAX-1, b = MAX+1 (=0), then a > b (true) does not mean a reached b
3040// whereas signed(a) = -2, signed(b) = 0 captures the actual difference
3041static inline bool before(kmp_uint64 a, kmp_uint64 b)
3042{
3043 return ((kmp_int64)b - (kmp_int64)a) > 0;
3044}
3045
3046// Truncated binary exponential backoff function
3047void
3048__kmp_spin_backoff(kmp_backoff_t *boff)
3049{
3050 // We could flatten this loop, but making it a nested loop gives better result.
3051 kmp_uint32 i;
3052 for (i = boff->step; i > 0; i--) {
3053 kmp_uint64 goal = __kmp_tsc() + boff->min_tick;
3054 do {
3055 KMP_CPU_PAUSE();
3056 } while (before(__kmp_tsc(), goal));
3057 }
3058 boff->step = (boff->step<<1 | 1) & (boff->max_backoff-1);
3059}
3060
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003061#if KMP_USE_DYNAMIC_LOCK
3062
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003063// Direct lock initializers. It simply writes a tag to the low 8 bits of the lock word.
Jonathan Peytona03533d2015-12-11 21:49:08 +00003064static void __kmp_init_direct_lock(kmp_dyna_lock_t *lck, kmp_dyna_lockseq_t seq)
3065{
3066 TCW_4(*lck, KMP_GET_D_TAG(seq));
3067 KA_TRACE(20, ("__kmp_init_direct_lock: initialized direct lock with type#%d\n", seq));
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003068}
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003069
Jonathan Peytondae13d82015-12-11 21:57:06 +00003070#if KMP_USE_TSX
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003071
3072// HLE lock functions - imported from the testbed runtime.
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003073#define HLE_ACQUIRE ".byte 0xf2;"
3074#define HLE_RELEASE ".byte 0xf3;"
3075
3076static inline kmp_uint32
3077swap4(kmp_uint32 volatile *p, kmp_uint32 v)
3078{
3079 __asm__ volatile(HLE_ACQUIRE "xchg %1,%0"
3080 : "+r"(v), "+m"(*p)
3081 :
3082 : "memory");
3083 return v;
3084}
3085
3086static void
3087__kmp_destroy_hle_lock(kmp_dyna_lock_t *lck)
3088{
Jonathan Peytona03533d2015-12-11 21:49:08 +00003089 TCW_4(*lck, 0);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003090}
3091
3092static void
3093__kmp_acquire_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid)
3094{
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00003095 // Use gtid for KMP_LOCK_BUSY if necessary
3096 if (swap4(lck, KMP_LOCK_BUSY(1, hle)) != KMP_LOCK_FREE(hle)) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003097 int delay = 1;
3098 do {
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00003099 while (*(kmp_uint32 volatile *)lck != KMP_LOCK_FREE(hle)) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003100 for (int i = delay; i != 0; --i)
Jonathan Peytona03533d2015-12-11 21:49:08 +00003101 KMP_CPU_PAUSE();
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003102 delay = ((delay << 1) | 1) & 7;
3103 }
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00003104 } while (swap4(lck, KMP_LOCK_BUSY(1, hle)) != KMP_LOCK_FREE(hle));
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003105 }
3106}
3107
3108static void
3109__kmp_acquire_hle_lock_with_checks(kmp_dyna_lock_t *lck, kmp_int32 gtid)
3110{
3111 __kmp_acquire_hle_lock(lck, gtid); // TODO: add checks
3112}
3113
Jonathan Peytona03533d2015-12-11 21:49:08 +00003114static int
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003115__kmp_release_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid)
3116{
3117 __asm__ volatile(HLE_RELEASE "movl %1,%0"
3118 : "=m"(*lck)
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00003119 : "r"(KMP_LOCK_FREE(hle))
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003120 : "memory");
Jonathan Peytona03533d2015-12-11 21:49:08 +00003121 return KMP_LOCK_RELEASED;
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003122}
3123
Jonathan Peytona03533d2015-12-11 21:49:08 +00003124static int
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003125__kmp_release_hle_lock_with_checks(kmp_dyna_lock_t *lck, kmp_int32 gtid)
3126{
Jonathan Peytona03533d2015-12-11 21:49:08 +00003127 return __kmp_release_hle_lock(lck, gtid); // TODO: add checks
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003128}
3129
3130static int
3131__kmp_test_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid)
3132{
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00003133 return swap4(lck, KMP_LOCK_BUSY(1, hle)) == KMP_LOCK_FREE(hle);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003134}
3135
3136static int
3137__kmp_test_hle_lock_with_checks(kmp_dyna_lock_t *lck, kmp_int32 gtid)
3138{
3139 return __kmp_test_hle_lock(lck, gtid); // TODO: add checks
3140}
3141
Jonathan Peytondae13d82015-12-11 21:57:06 +00003142static void
3143__kmp_init_rtm_lock(kmp_queuing_lock_t *lck)
3144{
3145 __kmp_init_queuing_lock(lck);
3146}
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003147
Jonathan Peytondae13d82015-12-11 21:57:06 +00003148static void
3149__kmp_destroy_rtm_lock(kmp_queuing_lock_t *lck)
3150{
3151 __kmp_destroy_queuing_lock(lck);
3152}
3153
3154static void
3155__kmp_acquire_rtm_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid)
3156{
3157 unsigned retries=3, status;
3158 do {
3159 status = _xbegin();
3160 if (status == _XBEGIN_STARTED) {
3161 if (__kmp_is_unlocked_queuing_lock(lck))
3162 return;
3163 _xabort(0xff);
3164 }
3165 if ((status & _XABORT_EXPLICIT) && _XABORT_CODE(status) == 0xff) {
3166 // Wait until lock becomes free
3167 while (! __kmp_is_unlocked_queuing_lock(lck))
3168 __kmp_yield(TRUE);
3169 }
3170 else if (!(status & _XABORT_RETRY))
3171 break;
3172 } while (retries--);
3173
3174 // Fall-back non-speculative lock (xchg)
3175 __kmp_acquire_queuing_lock(lck, gtid);
3176}
3177
3178static void
3179__kmp_acquire_rtm_lock_with_checks(kmp_queuing_lock_t *lck, kmp_int32 gtid)
3180{
3181 __kmp_acquire_rtm_lock(lck, gtid);
3182}
3183
3184static int
3185__kmp_release_rtm_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid)
3186{
3187 if (__kmp_is_unlocked_queuing_lock(lck)) {
3188 // Releasing from speculation
3189 _xend();
3190 }
3191 else {
3192 // Releasing from a real lock
3193 __kmp_release_queuing_lock(lck, gtid);
3194 }
3195 return KMP_LOCK_RELEASED;
3196}
3197
3198static int
3199__kmp_release_rtm_lock_with_checks(kmp_queuing_lock_t *lck, kmp_int32 gtid)
3200{
3201 return __kmp_release_rtm_lock(lck, gtid);
3202}
3203
3204static int
3205__kmp_test_rtm_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid)
3206{
3207 unsigned retries=3, status;
3208 do {
3209 status = _xbegin();
3210 if (status == _XBEGIN_STARTED && __kmp_is_unlocked_queuing_lock(lck)) {
3211 return 1;
3212 }
3213 if (!(status & _XABORT_RETRY))
3214 break;
3215 } while (retries--);
3216
3217 return (__kmp_is_unlocked_queuing_lock(lck))? 1: 0;
3218}
3219
3220static int
3221__kmp_test_rtm_lock_with_checks(kmp_queuing_lock_t *lck, kmp_int32 gtid)
3222{
3223 return __kmp_test_rtm_lock(lck, gtid);
3224}
3225
3226#endif // KMP_USE_TSX
3227
3228// Entry functions for indirect locks (first element of direct lock jump tables).
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003229static void __kmp_init_indirect_lock(kmp_dyna_lock_t * l, kmp_dyna_lockseq_t tag);
3230static void __kmp_destroy_indirect_lock(kmp_dyna_lock_t * lock);
3231static void __kmp_set_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32);
Jonathan Peytona03533d2015-12-11 21:49:08 +00003232static int __kmp_unset_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003233static int __kmp_test_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32);
3234static void __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32);
Jonathan Peytona03533d2015-12-11 21:49:08 +00003235static int __kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003236static int __kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32);
3237
3238//
3239// Jump tables for the indirect lock functions.
3240// Only fill in the odd entries, that avoids the need to shift out the low bit.
3241//
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003242
Jonathan Peytona03533d2015-12-11 21:49:08 +00003243// init functions
3244#define expand(l, op) 0,__kmp_init_direct_lock,
3245void (*__kmp_direct_init[])(kmp_dyna_lock_t *, kmp_dyna_lockseq_t)
3246 = { __kmp_init_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, init) };
3247#undef expand
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003248
Jonathan Peytona03533d2015-12-11 21:49:08 +00003249// destroy functions
3250#define expand(l, op) 0,(void (*)(kmp_dyna_lock_t *))__kmp_##op##_##l##_lock,
3251void (*__kmp_direct_destroy[])(kmp_dyna_lock_t *)
3252 = { __kmp_destroy_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, destroy) };
3253#undef expand
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003254
Jonathan Peytona03533d2015-12-11 21:49:08 +00003255// set/acquire functions
3256#define expand(l, op) 0,(void (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock,
3257static void (*direct_set[])(kmp_dyna_lock_t *, kmp_int32)
3258 = { __kmp_set_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, acquire) };
3259#undef expand
3260#define expand(l, op) 0,(void (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock_with_checks,
3261static void (*direct_set_check[])(kmp_dyna_lock_t *, kmp_int32)
3262 = { __kmp_set_indirect_lock_with_checks, 0, KMP_FOREACH_D_LOCK(expand, acquire) };
3263#undef expand
3264
3265// unset/release and test functions
3266#define expand(l, op) 0,(int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock,
3267static int (*direct_unset[])(kmp_dyna_lock_t *, kmp_int32)
3268 = { __kmp_unset_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, release) };
3269static int (*direct_test[])(kmp_dyna_lock_t *, kmp_int32)
3270 = { __kmp_test_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, test) };
3271#undef expand
3272#define expand(l, op) 0,(int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock_with_checks,
3273static int (*direct_unset_check[])(kmp_dyna_lock_t *, kmp_int32)
3274 = { __kmp_unset_indirect_lock_with_checks, 0, KMP_FOREACH_D_LOCK(expand, release) };
3275static int (*direct_test_check[])(kmp_dyna_lock_t *, kmp_int32)
3276 = { __kmp_test_indirect_lock_with_checks, 0, KMP_FOREACH_D_LOCK(expand, test) };
3277#undef expand
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003278
3279// Exposes only one set of jump tables (*lock or *lock_with_checks).
Jonathan Peytona03533d2015-12-11 21:49:08 +00003280void (*(*__kmp_direct_set))(kmp_dyna_lock_t *, kmp_int32) = 0;
3281int (*(*__kmp_direct_unset))(kmp_dyna_lock_t *, kmp_int32) = 0;
3282int (*(*__kmp_direct_test))(kmp_dyna_lock_t *, kmp_int32) = 0;
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003283
3284//
3285// Jump tables for the indirect lock functions.
3286//
Jonathan Peytona03533d2015-12-11 21:49:08 +00003287#define expand(l, op) (void (*)(kmp_user_lock_p))__kmp_##op##_##l##_##lock,
3288void (*__kmp_indirect_init[])(kmp_user_lock_p) = { KMP_FOREACH_I_LOCK(expand, init) };
3289void (*__kmp_indirect_destroy[])(kmp_user_lock_p) = { KMP_FOREACH_I_LOCK(expand, destroy) };
3290#undef expand
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003291
Jonathan Peytona03533d2015-12-11 21:49:08 +00003292// set/acquire functions
3293#define expand(l, op) (void (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock,
3294static void (*indirect_set[])(kmp_user_lock_p, kmp_int32) = { KMP_FOREACH_I_LOCK(expand, acquire) };
3295#undef expand
3296#define expand(l, op) (void (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock_with_checks,
3297static void (*indirect_set_check[])(kmp_user_lock_p, kmp_int32) = { KMP_FOREACH_I_LOCK(expand, acquire) };
3298#undef expand
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003299
Jonathan Peytona03533d2015-12-11 21:49:08 +00003300// unset/release and test functions
3301#define expand(l, op) (int (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock,
3302static int (*indirect_unset[])(kmp_user_lock_p, kmp_int32) = { KMP_FOREACH_I_LOCK(expand, release) };
3303static int (*indirect_test[])(kmp_user_lock_p, kmp_int32) = { KMP_FOREACH_I_LOCK(expand, test) };
3304#undef expand
3305#define expand(l, op) (int (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock_with_checks,
3306static int (*indirect_unset_check[])(kmp_user_lock_p, kmp_int32) = { KMP_FOREACH_I_LOCK(expand, release) };
3307static int (*indirect_test_check[])(kmp_user_lock_p, kmp_int32) = { KMP_FOREACH_I_LOCK(expand, test) };
3308#undef expand
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003309
Jonathan Peytona03533d2015-12-11 21:49:08 +00003310// Exposes only one jump tables (*lock or *lock_with_checks).
3311void (*(*__kmp_indirect_set))(kmp_user_lock_p, kmp_int32) = 0;
3312int (*(*__kmp_indirect_unset))(kmp_user_lock_p, kmp_int32) = 0;
3313int (*(*__kmp_indirect_test))(kmp_user_lock_p, kmp_int32) = 0;
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003314
3315// Lock index table.
Jonathan Peytondae13d82015-12-11 21:57:06 +00003316kmp_indirect_lock_table_t __kmp_i_lock_table;
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003317
3318// Size of indirect locks.
Jonathan Peytondae13d82015-12-11 21:57:06 +00003319static kmp_uint32 __kmp_indirect_lock_size[KMP_NUM_I_LOCKS] = { 0 };
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003320
3321// Jump tables for lock accessor/modifier.
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00003322void (*__kmp_indirect_set_location[KMP_NUM_I_LOCKS])(kmp_user_lock_p, const ident_t *) = { 0 };
3323void (*__kmp_indirect_set_flags[KMP_NUM_I_LOCKS])(kmp_user_lock_p, kmp_lock_flags_t) = { 0 };
3324const ident_t * (*__kmp_indirect_get_location[KMP_NUM_I_LOCKS])(kmp_user_lock_p) = { 0 };
3325kmp_lock_flags_t (*__kmp_indirect_get_flags[KMP_NUM_I_LOCKS])(kmp_user_lock_p) = { 0 };
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003326
3327// Use different lock pools for different lock types.
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00003328static kmp_indirect_lock_t * __kmp_indirect_lock_pool[KMP_NUM_I_LOCKS] = { 0 };
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003329
Jonathan Peytondae13d82015-12-11 21:57:06 +00003330// User lock allocator for dynamically dispatched indirect locks.
3331// Every entry of the indirect lock table holds the address and type of the allocated indrect lock
3332// (kmp_indirect_lock_t), and the size of the table doubles when it is full. A destroyed indirect lock
3333// object is returned to the reusable pool of locks, unique to each lock type.
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003334kmp_indirect_lock_t *
3335__kmp_allocate_indirect_lock(void **user_lock, kmp_int32 gtid, kmp_indirect_locktag_t tag)
3336{
3337 kmp_indirect_lock_t *lck;
3338 kmp_lock_index_t idx;
3339
3340 __kmp_acquire_lock(&__kmp_global_lock, gtid);
3341
3342 if (__kmp_indirect_lock_pool[tag] != NULL) {
Jonathan Peytondae13d82015-12-11 21:57:06 +00003343 // Reuse the allocated and destroyed lock object
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003344 lck = __kmp_indirect_lock_pool[tag];
3345 if (OMP_LOCK_T_SIZE < sizeof(void *))
3346 idx = lck->lock->pool.index;
3347 __kmp_indirect_lock_pool[tag] = (kmp_indirect_lock_t *)lck->lock->pool.next;
Jonathan Peytondae13d82015-12-11 21:57:06 +00003348 KA_TRACE(20, ("__kmp_allocate_indirect_lock: reusing an existing lock %p\n", lck));
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003349 } else {
Jonathan Peytondae13d82015-12-11 21:57:06 +00003350 idx = __kmp_i_lock_table.next;
3351 // Check capacity and double the size if it is full
3352 if (idx == __kmp_i_lock_table.size) {
3353 // Double up the space for block pointers
3354 int row = __kmp_i_lock_table.size/KMP_I_LOCK_CHUNK;
3355 kmp_indirect_lock_t **old_table = __kmp_i_lock_table.table;
3356 __kmp_i_lock_table.table = (kmp_indirect_lock_t **)__kmp_allocate(2*row*sizeof(kmp_indirect_lock_t *));
3357 KMP_MEMCPY(__kmp_i_lock_table.table, old_table, row*sizeof(kmp_indirect_lock_t *));
3358 __kmp_free(old_table);
3359 // Allocate new objects in the new blocks
3360 for (int i = row; i < 2*row; ++i)
3361 *(__kmp_i_lock_table.table + i) = (kmp_indirect_lock_t *)
3362 __kmp_allocate(KMP_I_LOCK_CHUNK*sizeof(kmp_indirect_lock_t));
3363 __kmp_i_lock_table.size = 2*idx;
3364 }
3365 __kmp_i_lock_table.next++;
3366 lck = KMP_GET_I_LOCK(idx);
3367 // Allocate a new base lock object
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003368 lck->lock = (kmp_user_lock_p)__kmp_allocate(__kmp_indirect_lock_size[tag]);
Jonathan Peytondae13d82015-12-11 21:57:06 +00003369 KA_TRACE(20, ("__kmp_allocate_indirect_lock: allocated a new lock %p\n", lck));
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003370 }
3371
3372 __kmp_release_lock(&__kmp_global_lock, gtid);
3373
3374 lck->type = tag;
3375
3376 if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3377 *((kmp_lock_index_t *)user_lock) = idx << 1; // indirect lock word must be even.
3378 } else {
3379 *((kmp_indirect_lock_t **)user_lock) = lck;
3380 }
3381
3382 return lck;
3383}
3384
3385// User lock lookup for dynamically dispatched locks.
3386static __forceinline
3387kmp_indirect_lock_t *
3388__kmp_lookup_indirect_lock(void **user_lock, const char *func)
3389{
3390 if (__kmp_env_consistency_check) {
3391 kmp_indirect_lock_t *lck = NULL;
3392 if (user_lock == NULL) {
3393 KMP_FATAL(LockIsUninitialized, func);
3394 }
3395 if (OMP_LOCK_T_SIZE < sizeof(void *)) {
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00003396 kmp_lock_index_t idx = KMP_EXTRACT_I_INDEX(user_lock);
Jonathan Peytonb87b5812015-12-11 22:04:05 +00003397 if (idx >= __kmp_i_lock_table.size) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003398 KMP_FATAL(LockIsUninitialized, func);
3399 }
Jonathan Peytondae13d82015-12-11 21:57:06 +00003400 lck = KMP_GET_I_LOCK(idx);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003401 } else {
3402 lck = *((kmp_indirect_lock_t **)user_lock);
3403 }
3404 if (lck == NULL) {
3405 KMP_FATAL(LockIsUninitialized, func);
3406 }
Jonathan Peyton61118492016-05-20 19:03:38 +00003407 return lck;
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003408 } else {
3409 if (OMP_LOCK_T_SIZE < sizeof(void *)) {
Jonathan Peytondae13d82015-12-11 21:57:06 +00003410 return KMP_GET_I_LOCK(KMP_EXTRACT_I_INDEX(user_lock));
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003411 } else {
3412 return *((kmp_indirect_lock_t **)user_lock);
3413 }
3414 }
3415}
3416
3417static void
3418__kmp_init_indirect_lock(kmp_dyna_lock_t * lock, kmp_dyna_lockseq_t seq)
3419{
3420#if KMP_USE_ADAPTIVE_LOCKS
3421 if (seq == lockseq_adaptive && !__kmp_cpuinfo.rtm) {
3422 KMP_WARNING(AdaptiveNotSupported, "kmp_lockseq_t", "adaptive");
3423 seq = lockseq_queuing;
3424 }
3425#endif
Jonathan Peytondae13d82015-12-11 21:57:06 +00003426#if KMP_USE_TSX
3427 if (seq == lockseq_rtm && !__kmp_cpuinfo.rtm) {
3428 seq = lockseq_queuing;
3429 }
3430#endif
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00003431 kmp_indirect_locktag_t tag = KMP_GET_I_TAG(seq);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003432 kmp_indirect_lock_t *l = __kmp_allocate_indirect_lock((void **)lock, __kmp_entry_gtid(), tag);
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00003433 KMP_I_LOCK_FUNC(l, init)(l->lock);
Jonathan Peytondae13d82015-12-11 21:57:06 +00003434 KA_TRACE(20, ("__kmp_init_indirect_lock: initialized indirect lock with type#%d\n", seq));
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003435}
3436
3437static void
3438__kmp_destroy_indirect_lock(kmp_dyna_lock_t * lock)
3439{
3440 kmp_uint32 gtid = __kmp_entry_gtid();
3441 kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_destroy_lock");
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00003442 KMP_I_LOCK_FUNC(l, destroy)(l->lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003443 kmp_indirect_locktag_t tag = l->type;
3444
3445 __kmp_acquire_lock(&__kmp_global_lock, gtid);
3446
3447 // Use the base lock's space to keep the pool chain.
3448 l->lock->pool.next = (kmp_user_lock_p)__kmp_indirect_lock_pool[tag];
3449 if (OMP_LOCK_T_SIZE < sizeof(void *)) {
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00003450 l->lock->pool.index = KMP_EXTRACT_I_INDEX(lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003451 }
3452 __kmp_indirect_lock_pool[tag] = l;
3453
3454 __kmp_release_lock(&__kmp_global_lock, gtid);
3455}
3456
3457static void
3458__kmp_set_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32 gtid)
3459{
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00003460 kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock);
3461 KMP_I_LOCK_FUNC(l, set)(l->lock, gtid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003462}
3463
Jonathan Peytona03533d2015-12-11 21:49:08 +00003464static int
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003465__kmp_unset_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32 gtid)
3466{
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00003467 kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock);
Jonathan Peytona03533d2015-12-11 21:49:08 +00003468 return KMP_I_LOCK_FUNC(l, unset)(l->lock, gtid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003469}
3470
3471static int
3472__kmp_test_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32 gtid)
3473{
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00003474 kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock);
3475 return KMP_I_LOCK_FUNC(l, test)(l->lock, gtid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003476}
3477
3478static void
3479__kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32 gtid)
3480{
3481 kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_set_lock");
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00003482 KMP_I_LOCK_FUNC(l, set)(l->lock, gtid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003483}
3484
Jonathan Peytona03533d2015-12-11 21:49:08 +00003485static int
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003486__kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32 gtid)
3487{
3488 kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_unset_lock");
Jonathan Peytona03533d2015-12-11 21:49:08 +00003489 return KMP_I_LOCK_FUNC(l, unset)(l->lock, gtid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003490}
3491
3492static int
3493__kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32 gtid)
3494{
3495 kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_test_lock");
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00003496 return KMP_I_LOCK_FUNC(l, test)(l->lock, gtid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003497}
3498
3499kmp_dyna_lockseq_t __kmp_user_lock_seq = lockseq_queuing;
3500
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003501// This is used only in kmp_error.c when consistency checking is on.
3502kmp_int32
3503__kmp_get_user_lock_owner(kmp_user_lock_p lck, kmp_uint32 seq)
3504{
3505 switch (seq) {
3506 case lockseq_tas:
3507 case lockseq_nested_tas:
3508 return __kmp_get_tas_lock_owner((kmp_tas_lock_t *)lck);
Paul Osmialowskifb043fd2016-05-16 09:44:11 +00003509#if KMP_USE_FUTEX
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003510 case lockseq_futex:
3511 case lockseq_nested_futex:
3512 return __kmp_get_futex_lock_owner((kmp_futex_lock_t *)lck);
3513#endif
3514 case lockseq_ticket:
3515 case lockseq_nested_ticket:
3516 return __kmp_get_ticket_lock_owner((kmp_ticket_lock_t *)lck);
3517 case lockseq_queuing:
3518 case lockseq_nested_queuing:
3519#if KMP_USE_ADAPTIVE_LOCKS
3520 case lockseq_adaptive:
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003521#endif
Jonathan Peyton1d46d972016-03-30 21:50:59 +00003522 return __kmp_get_queuing_lock_owner((kmp_queuing_lock_t *)lck);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003523 case lockseq_drdpa:
3524 case lockseq_nested_drdpa:
3525 return __kmp_get_drdpa_lock_owner((kmp_drdpa_lock_t *)lck);
3526 default:
3527 return 0;
3528 }
3529}
3530
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003531// Initializes data for dynamic user locks.
3532void
3533__kmp_init_dynamic_user_locks()
3534{
Jonathan Peytona03533d2015-12-11 21:49:08 +00003535 // Initialize jump table for the lock functions
3536 if (__kmp_env_consistency_check) {
3537 __kmp_direct_set = direct_set_check;
3538 __kmp_direct_unset = direct_unset_check;
3539 __kmp_direct_test = direct_test_check;
3540 __kmp_indirect_set = indirect_set_check;
3541 __kmp_indirect_unset = indirect_unset_check;
3542 __kmp_indirect_test = indirect_test_check;
3543 }
3544 else {
3545 __kmp_direct_set = direct_set;
3546 __kmp_direct_unset = direct_unset;
3547 __kmp_direct_test = direct_test;
3548 __kmp_indirect_set = indirect_set;
3549 __kmp_indirect_unset = indirect_unset;
3550 __kmp_indirect_test = indirect_test;
3551 }
3552
3553 // Initialize lock index table
Jonathan Peytondae13d82015-12-11 21:57:06 +00003554 __kmp_i_lock_table.size = KMP_I_LOCK_CHUNK;
3555 __kmp_i_lock_table.table = (kmp_indirect_lock_t **)__kmp_allocate(sizeof(kmp_indirect_lock_t *));
3556 *(__kmp_i_lock_table.table) = (kmp_indirect_lock_t *)
Jonathan Peyton61118492016-05-20 19:03:38 +00003557 __kmp_allocate(KMP_I_LOCK_CHUNK*sizeof(kmp_indirect_lock_t));
Jonathan Peytondae13d82015-12-11 21:57:06 +00003558 __kmp_i_lock_table.next = 0;
3559
3560 // Indirect lock size
3561 __kmp_indirect_lock_size[locktag_ticket] = sizeof(kmp_ticket_lock_t);
3562 __kmp_indirect_lock_size[locktag_queuing] = sizeof(kmp_queuing_lock_t);
3563#if KMP_USE_ADAPTIVE_LOCKS
3564 __kmp_indirect_lock_size[locktag_adaptive] = sizeof(kmp_adaptive_lock_t);
3565#endif
3566 __kmp_indirect_lock_size[locktag_drdpa] = sizeof(kmp_drdpa_lock_t);
3567#if KMP_USE_TSX
3568 __kmp_indirect_lock_size[locktag_rtm] = sizeof(kmp_queuing_lock_t);
3569#endif
3570 __kmp_indirect_lock_size[locktag_nested_tas] = sizeof(kmp_tas_lock_t);
3571#if KMP_USE_FUTEX
3572 __kmp_indirect_lock_size[locktag_nested_futex] = sizeof(kmp_futex_lock_t);
3573#endif
3574 __kmp_indirect_lock_size[locktag_nested_ticket] = sizeof(kmp_ticket_lock_t);
3575 __kmp_indirect_lock_size[locktag_nested_queuing] = sizeof(kmp_queuing_lock_t);
3576 __kmp_indirect_lock_size[locktag_nested_drdpa] = sizeof(kmp_drdpa_lock_t);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003577
3578 // Initialize lock accessor/modifier
Jonathan Peytondae13d82015-12-11 21:57:06 +00003579#define fill_jumps(table, expand, sep) { \
3580 table[locktag##sep##ticket] = expand(ticket); \
3581 table[locktag##sep##queuing] = expand(queuing); \
3582 table[locktag##sep##drdpa] = expand(drdpa); \
3583}
3584
3585#if KMP_USE_ADAPTIVE_LOCKS
3586# define fill_table(table, expand) { \
3587 fill_jumps(table, expand, _); \
3588 table[locktag_adaptive] = expand(queuing); \
3589 fill_jumps(table, expand, _nested_); \
3590}
3591#else
3592# define fill_table(table, expand) { \
3593 fill_jumps(table, expand, _); \
3594 fill_jumps(table, expand, _nested_); \
3595}
3596#endif // KMP_USE_ADAPTIVE_LOCKS
3597
3598#define expand(l) (void (*)(kmp_user_lock_p, const ident_t *))__kmp_set_##l##_lock_location
3599 fill_table(__kmp_indirect_set_location, expand);
3600#undef expand
3601#define expand(l) (void (*)(kmp_user_lock_p, kmp_lock_flags_t))__kmp_set_##l##_lock_flags
3602 fill_table(__kmp_indirect_set_flags, expand);
3603#undef expand
3604#define expand(l) (const ident_t * (*)(kmp_user_lock_p))__kmp_get_##l##_lock_location
3605 fill_table(__kmp_indirect_get_location, expand);
3606#undef expand
3607#define expand(l) (kmp_lock_flags_t (*)(kmp_user_lock_p))__kmp_get_##l##_lock_flags
3608 fill_table(__kmp_indirect_get_flags, expand);
3609#undef expand
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003610
3611 __kmp_init_user_locks = TRUE;
3612}
3613
3614// Clean up the lock table.
3615void
3616__kmp_cleanup_indirect_user_locks()
3617{
3618 kmp_lock_index_t i;
3619 int k;
3620
3621 // Clean up locks in the pools first (they were already destroyed before going into the pools).
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00003622 for (k = 0; k < KMP_NUM_I_LOCKS; ++k) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003623 kmp_indirect_lock_t *l = __kmp_indirect_lock_pool[k];
3624 while (l != NULL) {
3625 kmp_indirect_lock_t *ll = l;
3626 l = (kmp_indirect_lock_t *)l->lock->pool.next;
Jonathan Peytondae13d82015-12-11 21:57:06 +00003627 KA_TRACE(20, ("__kmp_cleanup_indirect_user_locks: freeing %p from pool\n", ll));
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003628 __kmp_free(ll->lock);
Jonathan Peytondae13d82015-12-11 21:57:06 +00003629 ll->lock = NULL;
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003630 }
Jonathan Peyton3bd88d42016-01-22 19:16:14 +00003631 __kmp_indirect_lock_pool[k] = NULL;
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003632 }
3633 // Clean up the remaining undestroyed locks.
Jonathan Peytondae13d82015-12-11 21:57:06 +00003634 for (i = 0; i < __kmp_i_lock_table.next; i++) {
3635 kmp_indirect_lock_t *l = KMP_GET_I_LOCK(i);
3636 if (l->lock != NULL) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003637 // Locks not destroyed explicitly need to be destroyed here.
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00003638 KMP_I_LOCK_FUNC(l, destroy)(l->lock);
Jonathan Peytondae13d82015-12-11 21:57:06 +00003639 KA_TRACE(20, ("__kmp_cleanup_indirect_user_locks: destroy/freeing %p from table\n", l));
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003640 __kmp_free(l->lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003641 }
3642 }
3643 // Free the table
Jonathan Peytondae13d82015-12-11 21:57:06 +00003644 for (i = 0; i < __kmp_i_lock_table.size / KMP_I_LOCK_CHUNK; i++)
3645 __kmp_free(__kmp_i_lock_table.table[i]);
3646 __kmp_free(__kmp_i_lock_table.table);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003647
3648 __kmp_init_user_locks = FALSE;
3649}
3650
3651enum kmp_lock_kind __kmp_user_lock_kind = lk_default;
3652int __kmp_num_locks_in_block = 1; // FIXME - tune this value
3653
3654#else // KMP_USE_DYNAMIC_LOCK
3655
Jim Cownie5e8470a2013-09-27 10:38:44 +00003656/* ------------------------------------------------------------------------ */
3657/* user locks
3658 *
3659 * They are implemented as a table of function pointers which are set to the
3660 * lock functions of the appropriate kind, once that has been determined.
3661 */
3662
3663enum kmp_lock_kind __kmp_user_lock_kind = lk_default;
3664
3665size_t __kmp_base_user_lock_size = 0;
3666size_t __kmp_user_lock_size = 0;
3667
3668kmp_int32 ( *__kmp_get_user_lock_owner_ )( kmp_user_lock_p lck ) = NULL;
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00003669int ( *__kmp_acquire_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003670
3671int ( *__kmp_test_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00003672int ( *__kmp_release_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003673void ( *__kmp_init_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL;
3674void ( *__kmp_destroy_user_lock_ )( kmp_user_lock_p lck ) = NULL;
3675void ( *__kmp_destroy_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL;
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00003676int ( *__kmp_acquire_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003677
3678int ( *__kmp_test_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00003679int ( *__kmp_release_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003680void ( *__kmp_init_nested_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL;
3681void ( *__kmp_destroy_nested_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL;
3682
3683int ( *__kmp_is_user_lock_initialized_ )( kmp_user_lock_p lck ) = NULL;
3684const ident_t * ( *__kmp_get_user_lock_location_ )( kmp_user_lock_p lck ) = NULL;
3685void ( *__kmp_set_user_lock_location_ )( kmp_user_lock_p lck, const ident_t *loc ) = NULL;
3686kmp_lock_flags_t ( *__kmp_get_user_lock_flags_ )( kmp_user_lock_p lck ) = NULL;
3687void ( *__kmp_set_user_lock_flags_ )( kmp_user_lock_p lck, kmp_lock_flags_t flags ) = NULL;
3688
3689void __kmp_set_user_lock_vptrs( kmp_lock_kind_t user_lock_kind )
3690{
3691 switch ( user_lock_kind ) {
3692 case lk_default:
3693 default:
3694 KMP_ASSERT( 0 );
3695
3696 case lk_tas: {
3697 __kmp_base_user_lock_size = sizeof( kmp_base_tas_lock_t );
3698 __kmp_user_lock_size = sizeof( kmp_tas_lock_t );
3699
3700 __kmp_get_user_lock_owner_ =
3701 ( kmp_int32 ( * )( kmp_user_lock_p ) )
3702 ( &__kmp_get_tas_lock_owner );
3703
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003704 if ( __kmp_env_consistency_check ) {
3705 KMP_BIND_USER_LOCK_WITH_CHECKS(tas);
3706 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(tas);
3707 }
3708 else {
3709 KMP_BIND_USER_LOCK(tas);
3710 KMP_BIND_NESTED_USER_LOCK(tas);
3711 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003712
3713 __kmp_destroy_user_lock_ =
3714 ( void ( * )( kmp_user_lock_p ) )
3715 ( &__kmp_destroy_tas_lock );
3716
Jim Cownie5e8470a2013-09-27 10:38:44 +00003717 __kmp_is_user_lock_initialized_ =
3718 ( int ( * )( kmp_user_lock_p ) ) NULL;
3719
3720 __kmp_get_user_lock_location_ =
3721 ( const ident_t * ( * )( kmp_user_lock_p ) ) NULL;
3722
3723 __kmp_set_user_lock_location_ =
3724 ( void ( * )( kmp_user_lock_p, const ident_t * ) ) NULL;
3725
3726 __kmp_get_user_lock_flags_ =
3727 ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) NULL;
3728
3729 __kmp_set_user_lock_flags_ =
3730 ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) NULL;
3731 }
3732 break;
3733
Paul Osmialowskifb043fd2016-05-16 09:44:11 +00003734#if KMP_USE_FUTEX
Jim Cownie5e8470a2013-09-27 10:38:44 +00003735
3736 case lk_futex: {
3737 __kmp_base_user_lock_size = sizeof( kmp_base_futex_lock_t );
3738 __kmp_user_lock_size = sizeof( kmp_futex_lock_t );
3739
3740 __kmp_get_user_lock_owner_ =
3741 ( kmp_int32 ( * )( kmp_user_lock_p ) )
3742 ( &__kmp_get_futex_lock_owner );
3743
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003744 if ( __kmp_env_consistency_check ) {
3745 KMP_BIND_USER_LOCK_WITH_CHECKS(futex);
3746 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(futex);
3747 }
3748 else {
3749 KMP_BIND_USER_LOCK(futex);
3750 KMP_BIND_NESTED_USER_LOCK(futex);
3751 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003752
3753 __kmp_destroy_user_lock_ =
3754 ( void ( * )( kmp_user_lock_p ) )
3755 ( &__kmp_destroy_futex_lock );
3756
Jim Cownie5e8470a2013-09-27 10:38:44 +00003757 __kmp_is_user_lock_initialized_ =
3758 ( int ( * )( kmp_user_lock_p ) ) NULL;
3759
3760 __kmp_get_user_lock_location_ =
3761 ( const ident_t * ( * )( kmp_user_lock_p ) ) NULL;
3762
3763 __kmp_set_user_lock_location_ =
3764 ( void ( * )( kmp_user_lock_p, const ident_t * ) ) NULL;
3765
3766 __kmp_get_user_lock_flags_ =
3767 ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) NULL;
3768
3769 __kmp_set_user_lock_flags_ =
3770 ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) NULL;
3771 }
3772 break;
3773
Paul Osmialowskifb043fd2016-05-16 09:44:11 +00003774#endif // KMP_USE_FUTEX
Jim Cownie5e8470a2013-09-27 10:38:44 +00003775
3776 case lk_ticket: {
3777 __kmp_base_user_lock_size = sizeof( kmp_base_ticket_lock_t );
3778 __kmp_user_lock_size = sizeof( kmp_ticket_lock_t );
3779
3780 __kmp_get_user_lock_owner_ =
3781 ( kmp_int32 ( * )( kmp_user_lock_p ) )
3782 ( &__kmp_get_ticket_lock_owner );
3783
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003784 if ( __kmp_env_consistency_check ) {
3785 KMP_BIND_USER_LOCK_WITH_CHECKS(ticket);
3786 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(ticket);
3787 }
3788 else {
3789 KMP_BIND_USER_LOCK(ticket);
3790 KMP_BIND_NESTED_USER_LOCK(ticket);
3791 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003792
3793 __kmp_destroy_user_lock_ =
3794 ( void ( * )( kmp_user_lock_p ) )
3795 ( &__kmp_destroy_ticket_lock );
3796
Jim Cownie5e8470a2013-09-27 10:38:44 +00003797 __kmp_is_user_lock_initialized_ =
3798 ( int ( * )( kmp_user_lock_p ) )
3799 ( &__kmp_is_ticket_lock_initialized );
3800
3801 __kmp_get_user_lock_location_ =
3802 ( const ident_t * ( * )( kmp_user_lock_p ) )
3803 ( &__kmp_get_ticket_lock_location );
3804
3805 __kmp_set_user_lock_location_ =
3806 ( void ( * )( kmp_user_lock_p, const ident_t * ) )
3807 ( &__kmp_set_ticket_lock_location );
3808
3809 __kmp_get_user_lock_flags_ =
3810 ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) )
3811 ( &__kmp_get_ticket_lock_flags );
3812
3813 __kmp_set_user_lock_flags_ =
3814 ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) )
3815 ( &__kmp_set_ticket_lock_flags );
3816 }
3817 break;
3818
3819 case lk_queuing: {
3820 __kmp_base_user_lock_size = sizeof( kmp_base_queuing_lock_t );
3821 __kmp_user_lock_size = sizeof( kmp_queuing_lock_t );
3822
3823 __kmp_get_user_lock_owner_ =
3824 ( kmp_int32 ( * )( kmp_user_lock_p ) )
3825 ( &__kmp_get_queuing_lock_owner );
3826
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003827 if ( __kmp_env_consistency_check ) {
3828 KMP_BIND_USER_LOCK_WITH_CHECKS(queuing);
3829 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(queuing);
3830 }
3831 else {
3832 KMP_BIND_USER_LOCK(queuing);
3833 KMP_BIND_NESTED_USER_LOCK(queuing);
3834 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003835
3836 __kmp_destroy_user_lock_ =
3837 ( void ( * )( kmp_user_lock_p ) )
3838 ( &__kmp_destroy_queuing_lock );
3839
Jim Cownie5e8470a2013-09-27 10:38:44 +00003840 __kmp_is_user_lock_initialized_ =
3841 ( int ( * )( kmp_user_lock_p ) )
3842 ( &__kmp_is_queuing_lock_initialized );
3843
3844 __kmp_get_user_lock_location_ =
3845 ( const ident_t * ( * )( kmp_user_lock_p ) )
3846 ( &__kmp_get_queuing_lock_location );
3847
3848 __kmp_set_user_lock_location_ =
3849 ( void ( * )( kmp_user_lock_p, const ident_t * ) )
3850 ( &__kmp_set_queuing_lock_location );
3851
3852 __kmp_get_user_lock_flags_ =
3853 ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) )
3854 ( &__kmp_get_queuing_lock_flags );
3855
3856 __kmp_set_user_lock_flags_ =
3857 ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) )
3858 ( &__kmp_set_queuing_lock_flags );
3859 }
3860 break;
3861
3862#if KMP_USE_ADAPTIVE_LOCKS
3863 case lk_adaptive: {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003864 __kmp_base_user_lock_size = sizeof( kmp_base_adaptive_lock_t );
3865 __kmp_user_lock_size = sizeof( kmp_adaptive_lock_t );
Jim Cownie5e8470a2013-09-27 10:38:44 +00003866
3867 __kmp_get_user_lock_owner_ =
3868 ( kmp_int32 ( * )( kmp_user_lock_p ) )
3869 ( &__kmp_get_queuing_lock_owner );
3870
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003871 if ( __kmp_env_consistency_check ) {
3872 KMP_BIND_USER_LOCK_WITH_CHECKS(adaptive);
3873 }
3874 else {
3875 KMP_BIND_USER_LOCK(adaptive);
3876 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003877
3878 __kmp_destroy_user_lock_ =
3879 ( void ( * )( kmp_user_lock_p ) )
3880 ( &__kmp_destroy_adaptive_lock );
3881
3882 __kmp_is_user_lock_initialized_ =
3883 ( int ( * )( kmp_user_lock_p ) )
3884 ( &__kmp_is_queuing_lock_initialized );
3885
3886 __kmp_get_user_lock_location_ =
3887 ( const ident_t * ( * )( kmp_user_lock_p ) )
3888 ( &__kmp_get_queuing_lock_location );
3889
3890 __kmp_set_user_lock_location_ =
3891 ( void ( * )( kmp_user_lock_p, const ident_t * ) )
3892 ( &__kmp_set_queuing_lock_location );
3893
3894 __kmp_get_user_lock_flags_ =
3895 ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) )
3896 ( &__kmp_get_queuing_lock_flags );
3897
3898 __kmp_set_user_lock_flags_ =
3899 ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) )
3900 ( &__kmp_set_queuing_lock_flags );
3901
3902 }
3903 break;
3904#endif // KMP_USE_ADAPTIVE_LOCKS
3905
3906 case lk_drdpa: {
3907 __kmp_base_user_lock_size = sizeof( kmp_base_drdpa_lock_t );
3908 __kmp_user_lock_size = sizeof( kmp_drdpa_lock_t );
3909
3910 __kmp_get_user_lock_owner_ =
3911 ( kmp_int32 ( * )( kmp_user_lock_p ) )
3912 ( &__kmp_get_drdpa_lock_owner );
3913
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003914 if ( __kmp_env_consistency_check ) {
3915 KMP_BIND_USER_LOCK_WITH_CHECKS(drdpa);
3916 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(drdpa);
3917 }
3918 else {
3919 KMP_BIND_USER_LOCK(drdpa);
3920 KMP_BIND_NESTED_USER_LOCK(drdpa);
3921 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003922
3923 __kmp_destroy_user_lock_ =
3924 ( void ( * )( kmp_user_lock_p ) )
3925 ( &__kmp_destroy_drdpa_lock );
3926
Jim Cownie5e8470a2013-09-27 10:38:44 +00003927 __kmp_is_user_lock_initialized_ =
3928 ( int ( * )( kmp_user_lock_p ) )
3929 ( &__kmp_is_drdpa_lock_initialized );
3930
3931 __kmp_get_user_lock_location_ =
3932 ( const ident_t * ( * )( kmp_user_lock_p ) )
3933 ( &__kmp_get_drdpa_lock_location );
3934
3935 __kmp_set_user_lock_location_ =
3936 ( void ( * )( kmp_user_lock_p, const ident_t * ) )
3937 ( &__kmp_set_drdpa_lock_location );
3938
3939 __kmp_get_user_lock_flags_ =
3940 ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) )
3941 ( &__kmp_get_drdpa_lock_flags );
3942
3943 __kmp_set_user_lock_flags_ =
3944 ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) )
3945 ( &__kmp_set_drdpa_lock_flags );
3946 }
3947 break;
3948 }
3949}
3950
3951
3952// ----------------------------------------------------------------------------
3953// User lock table & lock allocation
3954
3955kmp_lock_table_t __kmp_user_lock_table = { 1, 0, NULL };
3956kmp_user_lock_p __kmp_lock_pool = NULL;
3957
3958// Lock block-allocation support.
3959kmp_block_of_locks* __kmp_lock_blocks = NULL;
3960int __kmp_num_locks_in_block = 1; // FIXME - tune this value
3961
3962static kmp_lock_index_t
3963__kmp_lock_table_insert( kmp_user_lock_p lck )
3964{
3965 // Assume that kmp_global_lock is held upon entry/exit.
3966 kmp_lock_index_t index;
3967 if ( __kmp_user_lock_table.used >= __kmp_user_lock_table.allocated ) {
3968 kmp_lock_index_t size;
3969 kmp_user_lock_p *table;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003970 // Reallocate lock table.
3971 if ( __kmp_user_lock_table.allocated == 0 ) {
3972 size = 1024;
3973 }
3974 else {
3975 size = __kmp_user_lock_table.allocated * 2;
3976 }
3977 table = (kmp_user_lock_p *)__kmp_allocate( sizeof( kmp_user_lock_p ) * size );
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00003978 KMP_MEMCPY( table + 1, __kmp_user_lock_table.table + 1, sizeof( kmp_user_lock_p ) * ( __kmp_user_lock_table.used - 1 ) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00003979 table[ 0 ] = (kmp_user_lock_p)__kmp_user_lock_table.table;
Jonathan Peyton1406f012015-05-22 22:35:51 +00003980 // We cannot free the previous table now, since it may be in use by other
Jim Cownie5e8470a2013-09-27 10:38:44 +00003981 // threads. So save the pointer to the previous table in in the first element of the
3982 // new table. All the tables will be organized into a list, and could be freed when
3983 // library shutting down.
3984 __kmp_user_lock_table.table = table;
3985 __kmp_user_lock_table.allocated = size;
3986 }
3987 KMP_DEBUG_ASSERT( __kmp_user_lock_table.used < __kmp_user_lock_table.allocated );
3988 index = __kmp_user_lock_table.used;
3989 __kmp_user_lock_table.table[ index ] = lck;
3990 ++ __kmp_user_lock_table.used;
3991 return index;
3992}
3993
3994static kmp_user_lock_p
3995__kmp_lock_block_allocate()
3996{
3997 // Assume that kmp_global_lock is held upon entry/exit.
3998 static int last_index = 0;
3999 if ( ( last_index >= __kmp_num_locks_in_block )
4000 || ( __kmp_lock_blocks == NULL ) ) {
4001 // Restart the index.
4002 last_index = 0;
4003 // Need to allocate a new block.
4004 KMP_DEBUG_ASSERT( __kmp_user_lock_size > 0 );
4005 size_t space_for_locks = __kmp_user_lock_size * __kmp_num_locks_in_block;
4006 char* buffer = (char*)__kmp_allocate( space_for_locks + sizeof( kmp_block_of_locks ) );
4007 // Set up the new block.
4008 kmp_block_of_locks *new_block = (kmp_block_of_locks *)(& buffer[space_for_locks]);
4009 new_block->next_block = __kmp_lock_blocks;
4010 new_block->locks = (void *)buffer;
4011 // Publish the new block.
4012 KMP_MB();
4013 __kmp_lock_blocks = new_block;
4014 }
4015 kmp_user_lock_p ret = (kmp_user_lock_p)(& ( ( (char *)( __kmp_lock_blocks->locks ) )
4016 [ last_index * __kmp_user_lock_size ] ) );
4017 last_index++;
4018 return ret;
4019}
4020
4021//
4022// Get memory for a lock. It may be freshly allocated memory or reused memory
4023// from lock pool.
4024//
4025kmp_user_lock_p
4026__kmp_user_lock_allocate( void **user_lock, kmp_int32 gtid,
4027 kmp_lock_flags_t flags )
4028{
4029 kmp_user_lock_p lck;
4030 kmp_lock_index_t index;
4031 KMP_DEBUG_ASSERT( user_lock );
4032
4033 __kmp_acquire_lock( &__kmp_global_lock, gtid );
4034
4035 if ( __kmp_lock_pool == NULL ) {
4036 // Lock pool is empty. Allocate new memory.
4037 if ( __kmp_num_locks_in_block <= 1 ) { // Tune this cutoff point.
4038 lck = (kmp_user_lock_p) __kmp_allocate( __kmp_user_lock_size );
4039 }
4040 else {
4041 lck = __kmp_lock_block_allocate();
4042 }
4043
4044 // Insert lock in the table so that it can be freed in __kmp_cleanup,
4045 // and debugger has info on all allocated locks.
4046 index = __kmp_lock_table_insert( lck );
4047 }
4048 else {
4049 // Pick up lock from pool.
4050 lck = __kmp_lock_pool;
4051 index = __kmp_lock_pool->pool.index;
4052 __kmp_lock_pool = __kmp_lock_pool->pool.next;
4053 }
4054
4055 //
4056 // We could potentially differentiate between nested and regular locks
4057 // here, and do the lock table lookup for regular locks only.
4058 //
4059 if ( OMP_LOCK_T_SIZE < sizeof(void *) ) {
4060 * ( (kmp_lock_index_t *) user_lock ) = index;
4061 }
4062 else {
4063 * ( (kmp_user_lock_p *) user_lock ) = lck;
4064 }
4065
4066 // mark the lock if it is critical section lock.
4067 __kmp_set_user_lock_flags( lck, flags );
4068
4069 __kmp_release_lock( & __kmp_global_lock, gtid ); // AC: TODO: move this line upper
4070
4071 return lck;
4072}
4073
4074// Put lock's memory to pool for reusing.
4075void
4076__kmp_user_lock_free( void **user_lock, kmp_int32 gtid, kmp_user_lock_p lck )
4077{
Jim Cownie5e8470a2013-09-27 10:38:44 +00004078 KMP_DEBUG_ASSERT( user_lock != NULL );
4079 KMP_DEBUG_ASSERT( lck != NULL );
4080
4081 __kmp_acquire_lock( & __kmp_global_lock, gtid );
4082
4083 lck->pool.next = __kmp_lock_pool;
4084 __kmp_lock_pool = lck;
4085 if ( OMP_LOCK_T_SIZE < sizeof(void *) ) {
4086 kmp_lock_index_t index = * ( (kmp_lock_index_t *) user_lock );
4087 KMP_DEBUG_ASSERT( 0 < index && index <= __kmp_user_lock_table.used );
4088 lck->pool.index = index;
4089 }
4090
4091 __kmp_release_lock( & __kmp_global_lock, gtid );
4092}
4093
4094kmp_user_lock_p
4095__kmp_lookup_user_lock( void **user_lock, char const *func )
4096{
4097 kmp_user_lock_p lck = NULL;
4098
4099 if ( __kmp_env_consistency_check ) {
4100 if ( user_lock == NULL ) {
4101 KMP_FATAL( LockIsUninitialized, func );
4102 }
4103 }
4104
4105 if ( OMP_LOCK_T_SIZE < sizeof(void *) ) {
4106 kmp_lock_index_t index = *( (kmp_lock_index_t *)user_lock );
4107 if ( __kmp_env_consistency_check ) {
4108 if ( ! ( 0 < index && index < __kmp_user_lock_table.used ) ) {
4109 KMP_FATAL( LockIsUninitialized, func );
4110 }
4111 }
4112 KMP_DEBUG_ASSERT( 0 < index && index < __kmp_user_lock_table.used );
4113 KMP_DEBUG_ASSERT( __kmp_user_lock_size > 0 );
4114 lck = __kmp_user_lock_table.table[index];
4115 }
4116 else {
4117 lck = *( (kmp_user_lock_p *)user_lock );
4118 }
4119
4120 if ( __kmp_env_consistency_check ) {
4121 if ( lck == NULL ) {
4122 KMP_FATAL( LockIsUninitialized, func );
4123 }
4124 }
4125
4126 return lck;
4127}
4128
4129void
4130__kmp_cleanup_user_locks( void )
4131{
4132 //
4133 // Reset lock pool. Do not worry about lock in the pool -- we will free
4134 // them when iterating through lock table (it includes all the locks,
4135 // dead or alive).
4136 //
4137 __kmp_lock_pool = NULL;
4138
4139#define IS_CRITICAL(lck) \
4140 ( ( __kmp_get_user_lock_flags_ != NULL ) && \
4141 ( ( *__kmp_get_user_lock_flags_ )( lck ) & kmp_lf_critical_section ) )
4142
4143 //
4144 // Loop through lock table, free all locks.
4145 //
4146 // Do not free item [0], it is reserved for lock tables list.
4147 //
4148 // FIXME - we are iterating through a list of (pointers to) objects of
4149 // type union kmp_user_lock, but we have no way of knowing whether the
4150 // base type is currently "pool" or whatever the global user lock type
4151 // is.
4152 //
4153 // We are relying on the fact that for all of the user lock types
4154 // (except "tas"), the first field in the lock struct is the "initialized"
4155 // field, which is set to the address of the lock object itself when
4156 // the lock is initialized. When the union is of type "pool", the
4157 // first field is a pointer to the next object in the free list, which
4158 // will not be the same address as the object itself.
4159 //
4160 // This means that the check ( *__kmp_is_user_lock_initialized_ )( lck )
4161 // will fail for "pool" objects on the free list. This must happen as
4162 // the "location" field of real user locks overlaps the "index" field
4163 // of "pool" objects.
4164 //
4165 // It would be better to run through the free list, and remove all "pool"
4166 // objects from the lock table before executing this loop. However,
4167 // "pool" objects do not always have their index field set (only on
4168 // lin_32e), and I don't want to search the lock table for the address
4169 // of every "pool" object on the free list.
4170 //
4171 while ( __kmp_user_lock_table.used > 1 ) {
4172 const ident *loc;
4173
4174 //
4175 // reduce __kmp_user_lock_table.used before freeing the lock,
4176 // so that state of locks is consistent
4177 //
4178 kmp_user_lock_p lck = __kmp_user_lock_table.table[
4179 --__kmp_user_lock_table.used ];
4180
4181 if ( ( __kmp_is_user_lock_initialized_ != NULL ) &&
4182 ( *__kmp_is_user_lock_initialized_ )( lck ) ) {
4183 //
4184 // Issue a warning if: KMP_CONSISTENCY_CHECK AND lock is
4185 // initialized AND it is NOT a critical section (user is not
4186 // responsible for destroying criticals) AND we know source
4187 // location to report.
4188 //
4189 if ( __kmp_env_consistency_check && ( ! IS_CRITICAL( lck ) ) &&
4190 ( ( loc = __kmp_get_user_lock_location( lck ) ) != NULL ) &&
4191 ( loc->psource != NULL ) ) {
4192 kmp_str_loc_t str_loc = __kmp_str_loc_init( loc->psource, 0 );
Andrey Churbanove8595de2015-02-20 18:19:41 +00004193 KMP_WARNING( CnsLockNotDestroyed, str_loc.file, str_loc.line );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004194 __kmp_str_loc_free( &str_loc);
4195 }
4196
4197#ifdef KMP_DEBUG
4198 if ( IS_CRITICAL( lck ) ) {
4199 KA_TRACE( 20, ("__kmp_cleanup_user_locks: free critical section lock %p (%p)\n", lck, *(void**)lck ) );
4200 }
4201 else {
4202 KA_TRACE( 20, ("__kmp_cleanup_user_locks: free lock %p (%p)\n", lck, *(void**)lck ) );
4203 }
4204#endif // KMP_DEBUG
4205
4206 //
4207 // Cleanup internal lock dynamic resources
4208 // (for drdpa locks particularly).
4209 //
4210 __kmp_destroy_user_lock( lck );
4211 }
4212
4213 //
4214 // Free the lock if block allocation of locks is not used.
4215 //
4216 if ( __kmp_lock_blocks == NULL ) {
4217 __kmp_free( lck );
4218 }
4219 }
4220
4221#undef IS_CRITICAL
4222
4223 //
4224 // delete lock table(s).
4225 //
4226 kmp_user_lock_p *table_ptr = __kmp_user_lock_table.table;
4227 __kmp_user_lock_table.table = NULL;
4228 __kmp_user_lock_table.allocated = 0;
4229
4230 while ( table_ptr != NULL ) {
4231 //
4232 // In the first element we saved the pointer to the previous
4233 // (smaller) lock table.
4234 //
4235 kmp_user_lock_p *next = (kmp_user_lock_p *)( table_ptr[ 0 ] );
4236 __kmp_free( table_ptr );
4237 table_ptr = next;
4238 }
4239
4240 //
4241 // Free buffers allocated for blocks of locks.
4242 //
4243 kmp_block_of_locks_t *block_ptr = __kmp_lock_blocks;
4244 __kmp_lock_blocks = NULL;
4245
4246 while ( block_ptr != NULL ) {
4247 kmp_block_of_locks_t *next = block_ptr->next_block;
4248 __kmp_free( block_ptr->locks );
4249 //
4250 // *block_ptr itself was allocated at the end of the locks vector.
4251 //
4252 block_ptr = next;
4253 }
4254
4255 TCW_4(__kmp_init_user_locks, FALSE);
4256}
4257
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00004258#endif // KMP_USE_DYNAMIC_LOCK