blob: 0055116471c675adafc8115d64ddf347d7e9da60 [file] [log] [blame]
Jim Cownie5e8470a2013-09-27 10:38:44 +00001/*
2 * kmp_lock.cpp -- lock-related functions
Jim Cownie5e8470a2013-09-27 10:38:44 +00003 */
4
5
6//===----------------------------------------------------------------------===//
7//
8// The LLVM Compiler Infrastructure
9//
10// This file is dual licensed under the MIT and the University of Illinois Open
11// Source Licenses. See LICENSE.txt for details.
12//
13//===----------------------------------------------------------------------===//
14
15
16#include <stddef.h>
Paul Osmialowskif7cc6af2016-05-31 20:20:32 +000017#include <atomic>
Jim Cownie5e8470a2013-09-27 10:38:44 +000018
19#include "kmp.h"
20#include "kmp_itt.h"
21#include "kmp_i18n.h"
22#include "kmp_lock.h"
23#include "kmp_io.h"
24
Jonas Hahnfeld50fed042016-11-07 15:58:36 +000025#include "tsan_annotations.h"
26
Paul Osmialowskifb043fd2016-05-16 09:44:11 +000027#if KMP_USE_FUTEX
Jim Cownie5e8470a2013-09-27 10:38:44 +000028# include <unistd.h>
29# include <sys/syscall.h>
30// We should really include <futex.h>, but that causes compatibility problems on different
31// Linux* OS distributions that either require that you include (or break when you try to include)
32// <pci/types.h>.
33// Since all we need is the two macros below (which are part of the kernel ABI, so can't change)
34// we just define the constants here and don't include <futex.h>
35# ifndef FUTEX_WAIT
36# define FUTEX_WAIT 0
37# endif
38# ifndef FUTEX_WAKE
39# define FUTEX_WAKE 1
40# endif
41#endif
42
Jim Cownie5e8470a2013-09-27 10:38:44 +000043/* Implement spin locks for internal library use. */
44/* The algorithm implemented is Lamport's bakery lock [1974]. */
45
46void
47__kmp_validate_locks( void )
48{
49 int i;
50 kmp_uint32 x, y;
51
52 /* Check to make sure unsigned arithmetic does wraps properly */
53 x = ~((kmp_uint32) 0) - 2;
54 y = x - 2;
55
56 for (i = 0; i < 8; ++i, ++x, ++y) {
57 kmp_uint32 z = (x - y);
58 KMP_ASSERT( z == 2 );
59 }
60
61 KMP_ASSERT( offsetof( kmp_base_queuing_lock, tail_id ) % 8 == 0 );
62}
63
64
65/* ------------------------------------------------------------------------ */
66/* test and set locks */
67
68//
69// For the non-nested locks, we can only assume that the first 4 bytes were
70// allocated, since gcc only allocates 4 bytes for omp_lock_t, and the Intel
71// compiler only allocates a 4 byte pointer on IA-32 architecture. On
72// Windows* OS on Intel(R) 64, we can assume that all 8 bytes were allocated.
73//
74// gcc reserves >= 8 bytes for nested locks, so we can assume that the
75// entire 8 bytes were allocated for nested locks on all 64-bit platforms.
76//
77
78static kmp_int32
79__kmp_get_tas_lock_owner( kmp_tas_lock_t *lck )
80{
Jonathan Peytonf2d119f2015-12-03 19:37:20 +000081 return KMP_LOCK_STRIP(TCR_4( lck->lk.poll )) - 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +000082}
83
84static inline bool
85__kmp_is_tas_lock_nestable( kmp_tas_lock_t *lck )
86{
87 return lck->lk.depth_locked != -1;
88}
89
Jonathan Peyton0e6d4572015-10-16 16:52:58 +000090__forceinline static int
Jim Cownie5e8470a2013-09-27 10:38:44 +000091__kmp_acquire_tas_lock_timed_template( kmp_tas_lock_t *lck, kmp_int32 gtid )
92{
93 KMP_MB();
94
95#ifdef USE_LOCK_PROFILE
Jonathan Peytonac7ba402016-06-28 19:37:24 +000096 kmp_uint32 curr = KMP_LOCK_STRIP( TCR_4( lck->lk.poll ) );
Jim Cownie5e8470a2013-09-27 10:38:44 +000097 if ( ( curr != 0 ) && ( curr != gtid + 1 ) )
98 __kmp_printf( "LOCK CONTENTION: %p\n", lck );
99 /* else __kmp_printf( "." );*/
100#endif /* USE_LOCK_PROFILE */
101
Jonathan Peytonf2d119f2015-12-03 19:37:20 +0000102 if ( ( lck->lk.poll == KMP_LOCK_FREE(tas) )
103 && KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), KMP_LOCK_FREE(tas), KMP_LOCK_BUSY(gtid+1, tas) ) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000104 KMP_FSYNC_ACQUIRED(lck);
Jonathan Peyton0e6d4572015-10-16 16:52:58 +0000105 return KMP_LOCK_ACQUIRED_FIRST;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000106 }
107
108 kmp_uint32 spins;
109 KMP_FSYNC_PREPARE( lck );
110 KMP_INIT_YIELD( spins );
111 if ( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc :
112 __kmp_xproc ) ) {
113 KMP_YIELD( TRUE );
114 }
115 else {
116 KMP_YIELD_SPIN( spins );
117 }
118
Jonathan Peyton377aa402016-04-14 16:00:37 +0000119 kmp_backoff_t backoff = __kmp_spin_backoff_params;
Jonathan Peytonf2d119f2015-12-03 19:37:20 +0000120 while ( ( lck->lk.poll != KMP_LOCK_FREE(tas) ) ||
121 ( ! KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), KMP_LOCK_FREE(tas), KMP_LOCK_BUSY(gtid+1, tas) ) ) ) {
Jonathan Peyton377aa402016-04-14 16:00:37 +0000122
123 __kmp_spin_backoff(&backoff);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000124 if ( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc :
125 __kmp_xproc ) ) {
126 KMP_YIELD( TRUE );
127 }
128 else {
129 KMP_YIELD_SPIN( spins );
130 }
131 }
132 KMP_FSYNC_ACQUIRED( lck );
Jonathan Peyton0e6d4572015-10-16 16:52:58 +0000133 return KMP_LOCK_ACQUIRED_FIRST;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000134}
135
Jonathan Peyton0e6d4572015-10-16 16:52:58 +0000136int
Jim Cownie5e8470a2013-09-27 10:38:44 +0000137__kmp_acquire_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
138{
Jonas Hahnfeld50fed042016-11-07 15:58:36 +0000139 int retval = __kmp_acquire_tas_lock_timed_template( lck, gtid );
140 ANNOTATE_TAS_ACQUIRED(lck);
141 return retval;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000142}
143
Jonathan Peyton0e6d4572015-10-16 16:52:58 +0000144static int
Jim Cownie5e8470a2013-09-27 10:38:44 +0000145__kmp_acquire_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
146{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000147 char const * const func = "omp_set_lock";
148 if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE )
149 && __kmp_is_tas_lock_nestable( lck ) ) {
150 KMP_FATAL( LockNestableUsedAsSimple, func );
151 }
152 if ( ( gtid >= 0 ) && ( __kmp_get_tas_lock_owner( lck ) == gtid ) ) {
153 KMP_FATAL( LockIsAlreadyOwned, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000154 }
Jonathan Peyton0e6d4572015-10-16 16:52:58 +0000155 return __kmp_acquire_tas_lock( lck, gtid );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000156}
157
158int
159__kmp_test_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
160{
Jonathan Peytonf2d119f2015-12-03 19:37:20 +0000161 if ( ( lck->lk.poll == KMP_LOCK_FREE(tas) )
162 && KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), KMP_LOCK_FREE(tas), KMP_LOCK_BUSY(gtid+1, tas) ) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000163 KMP_FSYNC_ACQUIRED( lck );
164 return TRUE;
165 }
166 return FALSE;
167}
168
169static int
170__kmp_test_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
171{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000172 char const * const func = "omp_test_lock";
173 if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE )
174 && __kmp_is_tas_lock_nestable( lck ) ) {
175 KMP_FATAL( LockNestableUsedAsSimple, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000176 }
177 return __kmp_test_tas_lock( lck, gtid );
178}
179
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000180int
Jim Cownie5e8470a2013-09-27 10:38:44 +0000181__kmp_release_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
182{
183 KMP_MB(); /* Flush all pending memory write invalidates. */
184
185 KMP_FSYNC_RELEASING(lck);
Jonas Hahnfeld50fed042016-11-07 15:58:36 +0000186 ANNOTATE_TAS_RELEASED(lck);
Jonathan Peytonf2d119f2015-12-03 19:37:20 +0000187 KMP_ST_REL32( &(lck->lk.poll), KMP_LOCK_FREE(tas) );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000188 KMP_MB(); /* Flush all pending memory write invalidates. */
189
190 KMP_YIELD( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc :
191 __kmp_xproc ) );
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000192 return KMP_LOCK_RELEASED;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000193}
194
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000195static int
Jim Cownie5e8470a2013-09-27 10:38:44 +0000196__kmp_release_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
197{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000198 char const * const func = "omp_unset_lock";
199 KMP_MB(); /* in case another processor initialized lock */
200 if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE )
201 && __kmp_is_tas_lock_nestable( lck ) ) {
202 KMP_FATAL( LockNestableUsedAsSimple, func );
203 }
204 if ( __kmp_get_tas_lock_owner( lck ) == -1 ) {
205 KMP_FATAL( LockUnsettingFree, func );
206 }
207 if ( ( gtid >= 0 ) && ( __kmp_get_tas_lock_owner( lck ) >= 0 )
208 && ( __kmp_get_tas_lock_owner( lck ) != gtid ) ) {
209 KMP_FATAL( LockUnsettingSetByAnother, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000210 }
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000211 return __kmp_release_tas_lock( lck, gtid );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000212}
213
214void
215__kmp_init_tas_lock( kmp_tas_lock_t * lck )
216{
Jonathan Peytonf2d119f2015-12-03 19:37:20 +0000217 TCW_4( lck->lk.poll, KMP_LOCK_FREE(tas) );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000218}
219
220static void
221__kmp_init_tas_lock_with_checks( kmp_tas_lock_t * lck )
222{
223 __kmp_init_tas_lock( lck );
224}
225
226void
227__kmp_destroy_tas_lock( kmp_tas_lock_t *lck )
228{
229 lck->lk.poll = 0;
230}
231
232static void
233__kmp_destroy_tas_lock_with_checks( kmp_tas_lock_t *lck )
234{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000235 char const * const func = "omp_destroy_lock";
236 if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE )
237 && __kmp_is_tas_lock_nestable( lck ) ) {
238 KMP_FATAL( LockNestableUsedAsSimple, func );
239 }
240 if ( __kmp_get_tas_lock_owner( lck ) != -1 ) {
241 KMP_FATAL( LockStillOwned, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000242 }
243 __kmp_destroy_tas_lock( lck );
244}
245
246
247//
248// nested test and set locks
249//
250
Jonathan Peyton0e6d4572015-10-16 16:52:58 +0000251int
Jim Cownie5e8470a2013-09-27 10:38:44 +0000252__kmp_acquire_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
253{
254 KMP_DEBUG_ASSERT( gtid >= 0 );
255
256 if ( __kmp_get_tas_lock_owner( lck ) == gtid ) {
257 lck->lk.depth_locked += 1;
Jonathan Peyton0e6d4572015-10-16 16:52:58 +0000258 return KMP_LOCK_ACQUIRED_NEXT;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000259 }
260 else {
261 __kmp_acquire_tas_lock_timed_template( lck, gtid );
Jonas Hahnfeld50fed042016-11-07 15:58:36 +0000262 ANNOTATE_TAS_ACQUIRED(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000263 lck->lk.depth_locked = 1;
Jonathan Peyton0e6d4572015-10-16 16:52:58 +0000264 return KMP_LOCK_ACQUIRED_FIRST;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000265 }
266}
267
Jonathan Peyton0e6d4572015-10-16 16:52:58 +0000268static int
Jim Cownie5e8470a2013-09-27 10:38:44 +0000269__kmp_acquire_nested_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
270{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000271 char const * const func = "omp_set_nest_lock";
272 if ( ! __kmp_is_tas_lock_nestable( lck ) ) {
273 KMP_FATAL( LockSimpleUsedAsNestable, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000274 }
Jonathan Peyton0e6d4572015-10-16 16:52:58 +0000275 return __kmp_acquire_nested_tas_lock( lck, gtid );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000276}
277
278int
279__kmp_test_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
280{
281 int retval;
282
283 KMP_DEBUG_ASSERT( gtid >= 0 );
284
285 if ( __kmp_get_tas_lock_owner( lck ) == gtid ) {
286 retval = ++lck->lk.depth_locked;
287 }
288 else if ( !__kmp_test_tas_lock( lck, gtid ) ) {
289 retval = 0;
290 }
291 else {
292 KMP_MB();
293 retval = lck->lk.depth_locked = 1;
294 }
295 return retval;
296}
297
298static int
299__kmp_test_nested_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
300{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000301 char const * const func = "omp_test_nest_lock";
302 if ( ! __kmp_is_tas_lock_nestable( lck ) ) {
303 KMP_FATAL( LockSimpleUsedAsNestable, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000304 }
305 return __kmp_test_nested_tas_lock( lck, gtid );
306}
307
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000308int
Jim Cownie5e8470a2013-09-27 10:38:44 +0000309__kmp_release_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
310{
311 KMP_DEBUG_ASSERT( gtid >= 0 );
312
313 KMP_MB();
314 if ( --(lck->lk.depth_locked) == 0 ) {
315 __kmp_release_tas_lock( lck, gtid );
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000316 return KMP_LOCK_RELEASED;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000317 }
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000318 return KMP_LOCK_STILL_HELD;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000319}
320
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000321static int
Jim Cownie5e8470a2013-09-27 10:38:44 +0000322__kmp_release_nested_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
323{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000324 char const * const func = "omp_unset_nest_lock";
325 KMP_MB(); /* in case another processor initialized lock */
326 if ( ! __kmp_is_tas_lock_nestable( lck ) ) {
327 KMP_FATAL( LockSimpleUsedAsNestable, func );
328 }
329 if ( __kmp_get_tas_lock_owner( lck ) == -1 ) {
330 KMP_FATAL( LockUnsettingFree, func );
331 }
332 if ( __kmp_get_tas_lock_owner( lck ) != gtid ) {
333 KMP_FATAL( LockUnsettingSetByAnother, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000334 }
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000335 return __kmp_release_nested_tas_lock( lck, gtid );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000336}
337
338void
339__kmp_init_nested_tas_lock( kmp_tas_lock_t * lck )
340{
341 __kmp_init_tas_lock( lck );
342 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
343}
344
345static void
346__kmp_init_nested_tas_lock_with_checks( kmp_tas_lock_t * lck )
347{
348 __kmp_init_nested_tas_lock( lck );
349}
350
351void
352__kmp_destroy_nested_tas_lock( kmp_tas_lock_t *lck )
353{
354 __kmp_destroy_tas_lock( lck );
355 lck->lk.depth_locked = 0;
356}
357
358static void
359__kmp_destroy_nested_tas_lock_with_checks( kmp_tas_lock_t *lck )
360{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000361 char const * const func = "omp_destroy_nest_lock";
362 if ( ! __kmp_is_tas_lock_nestable( lck ) ) {
363 KMP_FATAL( LockSimpleUsedAsNestable, func );
364 }
365 if ( __kmp_get_tas_lock_owner( lck ) != -1 ) {
366 KMP_FATAL( LockStillOwned, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000367 }
368 __kmp_destroy_nested_tas_lock( lck );
369}
370
371
Paul Osmialowskifb043fd2016-05-16 09:44:11 +0000372#if KMP_USE_FUTEX
Jim Cownie5e8470a2013-09-27 10:38:44 +0000373
374/* ------------------------------------------------------------------------ */
375/* futex locks */
376
377// futex locks are really just test and set locks, with a different method
378// of handling contention. They take the same amount of space as test and
379// set locks, and are allocated the same way (i.e. use the area allocated by
380// the compiler for non-nested locks / allocate nested locks on the heap).
381
382static kmp_int32
383__kmp_get_futex_lock_owner( kmp_futex_lock_t *lck )
384{
Jonathan Peytonf2d119f2015-12-03 19:37:20 +0000385 return KMP_LOCK_STRIP(( TCR_4( lck->lk.poll ) >> 1 )) - 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000386}
387
388static inline bool
389__kmp_is_futex_lock_nestable( kmp_futex_lock_t *lck )
390{
391 return lck->lk.depth_locked != -1;
392}
393
Jonathan Peyton0e6d4572015-10-16 16:52:58 +0000394__forceinline static int
Jim Cownie5e8470a2013-09-27 10:38:44 +0000395__kmp_acquire_futex_lock_timed_template( kmp_futex_lock_t *lck, kmp_int32 gtid )
396{
397 kmp_int32 gtid_code = ( gtid + 1 ) << 1;
398
399 KMP_MB();
400
401#ifdef USE_LOCK_PROFILE
Jonathan Peytonac7ba402016-06-28 19:37:24 +0000402 kmp_uint32 curr = KMP_LOCK_STRIP( TCR_4( lck->lk.poll ) );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000403 if ( ( curr != 0 ) && ( curr != gtid_code ) )
404 __kmp_printf( "LOCK CONTENTION: %p\n", lck );
405 /* else __kmp_printf( "." );*/
406#endif /* USE_LOCK_PROFILE */
407
408 KMP_FSYNC_PREPARE( lck );
409 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d entering\n",
410 lck, lck->lk.poll, gtid ) );
411
412 kmp_int32 poll_val;
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000413
Jonathan Peytonf2d119f2015-12-03 19:37:20 +0000414 while ( ( poll_val = KMP_COMPARE_AND_STORE_RET32( & ( lck->lk.poll ), KMP_LOCK_FREE(futex),
415 KMP_LOCK_BUSY(gtid_code, futex) ) ) != KMP_LOCK_FREE(futex) ) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000416
Jonathan Peytonf2d119f2015-12-03 19:37:20 +0000417 kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000418 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d poll_val = 0x%x cond = 0x%x\n",
419 lck, gtid, poll_val, cond ) );
420
421 //
422 // NOTE: if you try to use the following condition for this branch
423 //
424 // if ( poll_val & 1 == 0 )
425 //
426 // Then the 12.0 compiler has a bug where the following block will
427 // always be skipped, regardless of the value of the LSB of poll_val.
428 //
429 if ( ! cond ) {
430 //
431 // Try to set the lsb in the poll to indicate to the owner
432 // thread that they need to wake this thread up.
433 //
Jonathan Peytonf2d119f2015-12-03 19:37:20 +0000434 if ( ! KMP_COMPARE_AND_STORE_REL32( & ( lck->lk.poll ), poll_val, poll_val | KMP_LOCK_BUSY(1, futex) ) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000435 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d can't set bit 0\n",
436 lck, lck->lk.poll, gtid ) );
437 continue;
438 }
Jonathan Peytonf2d119f2015-12-03 19:37:20 +0000439 poll_val |= KMP_LOCK_BUSY(1, futex);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000440
441 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d bit 0 set\n",
442 lck, lck->lk.poll, gtid ) );
443 }
444
445 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d before futex_wait(0x%x)\n",
446 lck, gtid, poll_val ) );
447
448 kmp_int32 rc;
449 if ( ( rc = syscall( __NR_futex, & ( lck->lk.poll ), FUTEX_WAIT,
450 poll_val, NULL, NULL, 0 ) ) != 0 ) {
451 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d futex_wait(0x%x) failed (rc=%d errno=%d)\n",
452 lck, gtid, poll_val, rc, errno ) );
453 continue;
454 }
455
456 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d after futex_wait(0x%x)\n",
457 lck, gtid, poll_val ) );
458 //
Alp Toker8f2d3f02014-02-24 10:40:15 +0000459 // This thread has now done a successful futex wait call and was
Jim Cownie5e8470a2013-09-27 10:38:44 +0000460 // entered on the OS futex queue. We must now perform a futex
461 // wake call when releasing the lock, as we have no idea how many
462 // other threads are in the queue.
463 //
464 gtid_code |= 1;
465 }
466
467 KMP_FSYNC_ACQUIRED( lck );
468 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d exiting\n",
469 lck, lck->lk.poll, gtid ) );
Jonathan Peyton0e6d4572015-10-16 16:52:58 +0000470 return KMP_LOCK_ACQUIRED_FIRST;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000471}
472
Jonathan Peyton0e6d4572015-10-16 16:52:58 +0000473int
Jim Cownie5e8470a2013-09-27 10:38:44 +0000474__kmp_acquire_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
475{
Jonas Hahnfeld50fed042016-11-07 15:58:36 +0000476 int retval = __kmp_acquire_futex_lock_timed_template( lck, gtid );
477 ANNOTATE_FUTEX_ACQUIRED(lck);
478 return retval;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000479}
480
Jonathan Peyton0e6d4572015-10-16 16:52:58 +0000481static int
Jim Cownie5e8470a2013-09-27 10:38:44 +0000482__kmp_acquire_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
483{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000484 char const * const func = "omp_set_lock";
485 if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE )
486 && __kmp_is_futex_lock_nestable( lck ) ) {
487 KMP_FATAL( LockNestableUsedAsSimple, func );
488 }
489 if ( ( gtid >= 0 ) && ( __kmp_get_futex_lock_owner( lck ) == gtid ) ) {
490 KMP_FATAL( LockIsAlreadyOwned, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000491 }
Jonathan Peyton0e6d4572015-10-16 16:52:58 +0000492 return __kmp_acquire_futex_lock( lck, gtid );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000493}
494
495int
496__kmp_test_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
497{
Jonathan Peytonac7ba402016-06-28 19:37:24 +0000498 if ( KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), KMP_LOCK_FREE(futex), KMP_LOCK_BUSY((gtid+1) << 1, futex) ) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000499 KMP_FSYNC_ACQUIRED( lck );
500 return TRUE;
501 }
502 return FALSE;
503}
504
505static int
506__kmp_test_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
507{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000508 char const * const func = "omp_test_lock";
509 if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE )
510 && __kmp_is_futex_lock_nestable( lck ) ) {
511 KMP_FATAL( LockNestableUsedAsSimple, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000512 }
513 return __kmp_test_futex_lock( lck, gtid );
514}
515
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000516int
Jim Cownie5e8470a2013-09-27 10:38:44 +0000517__kmp_release_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
518{
519 KMP_MB(); /* Flush all pending memory write invalidates. */
520
521 KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d entering\n",
522 lck, lck->lk.poll, gtid ) );
523
524 KMP_FSYNC_RELEASING(lck);
Jonas Hahnfeld50fed042016-11-07 15:58:36 +0000525 ANNOTATE_FUTEX_RELEASED(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000526
Jonathan Peytonf2d119f2015-12-03 19:37:20 +0000527 kmp_int32 poll_val = KMP_XCHG_FIXED32( & ( lck->lk.poll ), KMP_LOCK_FREE(futex) );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000528
529 KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p, T#%d released poll_val = 0x%x\n",
530 lck, gtid, poll_val ) );
531
Jonathan Peytonf2d119f2015-12-03 19:37:20 +0000532 if ( KMP_LOCK_STRIP(poll_val) & 1 ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000533 KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p, T#%d futex_wake 1 thread\n",
534 lck, gtid ) );
Jonathan Peytonf2d119f2015-12-03 19:37:20 +0000535 syscall( __NR_futex, & ( lck->lk.poll ), FUTEX_WAKE, KMP_LOCK_BUSY(1, futex), NULL, NULL, 0 );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000536 }
537
538 KMP_MB(); /* Flush all pending memory write invalidates. */
539
540 KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d exiting\n",
541 lck, lck->lk.poll, gtid ) );
542
543 KMP_YIELD( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc :
544 __kmp_xproc ) );
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000545 return KMP_LOCK_RELEASED;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000546}
547
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000548static int
Jim Cownie5e8470a2013-09-27 10:38:44 +0000549__kmp_release_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
550{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000551 char const * const func = "omp_unset_lock";
552 KMP_MB(); /* in case another processor initialized lock */
553 if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE )
554 && __kmp_is_futex_lock_nestable( lck ) ) {
555 KMP_FATAL( LockNestableUsedAsSimple, func );
556 }
557 if ( __kmp_get_futex_lock_owner( lck ) == -1 ) {
558 KMP_FATAL( LockUnsettingFree, func );
559 }
560 if ( ( gtid >= 0 ) && ( __kmp_get_futex_lock_owner( lck ) >= 0 )
561 && ( __kmp_get_futex_lock_owner( lck ) != gtid ) ) {
562 KMP_FATAL( LockUnsettingSetByAnother, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000563 }
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000564 return __kmp_release_futex_lock( lck, gtid );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000565}
566
567void
568__kmp_init_futex_lock( kmp_futex_lock_t * lck )
569{
Jonathan Peytonf2d119f2015-12-03 19:37:20 +0000570 TCW_4( lck->lk.poll, KMP_LOCK_FREE(futex) );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000571}
572
573static void
574__kmp_init_futex_lock_with_checks( kmp_futex_lock_t * lck )
575{
576 __kmp_init_futex_lock( lck );
577}
578
579void
580__kmp_destroy_futex_lock( kmp_futex_lock_t *lck )
581{
582 lck->lk.poll = 0;
583}
584
585static void
586__kmp_destroy_futex_lock_with_checks( kmp_futex_lock_t *lck )
587{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000588 char const * const func = "omp_destroy_lock";
589 if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE )
590 && __kmp_is_futex_lock_nestable( lck ) ) {
591 KMP_FATAL( LockNestableUsedAsSimple, func );
592 }
593 if ( __kmp_get_futex_lock_owner( lck ) != -1 ) {
594 KMP_FATAL( LockStillOwned, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000595 }
596 __kmp_destroy_futex_lock( lck );
597}
598
599
600//
601// nested futex locks
602//
603
Jonathan Peyton0e6d4572015-10-16 16:52:58 +0000604int
Jim Cownie5e8470a2013-09-27 10:38:44 +0000605__kmp_acquire_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
606{
607 KMP_DEBUG_ASSERT( gtid >= 0 );
608
609 if ( __kmp_get_futex_lock_owner( lck ) == gtid ) {
610 lck->lk.depth_locked += 1;
Jonathan Peyton0e6d4572015-10-16 16:52:58 +0000611 return KMP_LOCK_ACQUIRED_NEXT;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000612 }
613 else {
614 __kmp_acquire_futex_lock_timed_template( lck, gtid );
Jonas Hahnfeld50fed042016-11-07 15:58:36 +0000615 ANNOTATE_FUTEX_ACQUIRED(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000616 lck->lk.depth_locked = 1;
Jonathan Peyton0e6d4572015-10-16 16:52:58 +0000617 return KMP_LOCK_ACQUIRED_FIRST;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000618 }
619}
620
Jonathan Peyton0e6d4572015-10-16 16:52:58 +0000621static int
Jim Cownie5e8470a2013-09-27 10:38:44 +0000622__kmp_acquire_nested_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
623{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000624 char const * const func = "omp_set_nest_lock";
625 if ( ! __kmp_is_futex_lock_nestable( lck ) ) {
626 KMP_FATAL( LockSimpleUsedAsNestable, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000627 }
Jonathan Peyton0e6d4572015-10-16 16:52:58 +0000628 return __kmp_acquire_nested_futex_lock( lck, gtid );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000629}
630
631int
632__kmp_test_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
633{
634 int retval;
635
636 KMP_DEBUG_ASSERT( gtid >= 0 );
637
638 if ( __kmp_get_futex_lock_owner( lck ) == gtid ) {
639 retval = ++lck->lk.depth_locked;
640 }
641 else if ( !__kmp_test_futex_lock( lck, gtid ) ) {
642 retval = 0;
643 }
644 else {
645 KMP_MB();
646 retval = lck->lk.depth_locked = 1;
647 }
648 return retval;
649}
650
651static int
652__kmp_test_nested_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
653{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000654 char const * const func = "omp_test_nest_lock";
655 if ( ! __kmp_is_futex_lock_nestable( lck ) ) {
656 KMP_FATAL( LockSimpleUsedAsNestable, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000657 }
658 return __kmp_test_nested_futex_lock( lck, gtid );
659}
660
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000661int
Jim Cownie5e8470a2013-09-27 10:38:44 +0000662__kmp_release_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
663{
664 KMP_DEBUG_ASSERT( gtid >= 0 );
665
666 KMP_MB();
667 if ( --(lck->lk.depth_locked) == 0 ) {
668 __kmp_release_futex_lock( lck, gtid );
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000669 return KMP_LOCK_RELEASED;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000670 }
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000671 return KMP_LOCK_STILL_HELD;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000672}
673
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000674static int
Jim Cownie5e8470a2013-09-27 10:38:44 +0000675__kmp_release_nested_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
676{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000677 char const * const func = "omp_unset_nest_lock";
678 KMP_MB(); /* in case another processor initialized lock */
679 if ( ! __kmp_is_futex_lock_nestable( lck ) ) {
680 KMP_FATAL( LockSimpleUsedAsNestable, func );
681 }
682 if ( __kmp_get_futex_lock_owner( lck ) == -1 ) {
683 KMP_FATAL( LockUnsettingFree, func );
684 }
685 if ( __kmp_get_futex_lock_owner( lck ) != gtid ) {
686 KMP_FATAL( LockUnsettingSetByAnother, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000687 }
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000688 return __kmp_release_nested_futex_lock( lck, gtid );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000689}
690
691void
692__kmp_init_nested_futex_lock( kmp_futex_lock_t * lck )
693{
694 __kmp_init_futex_lock( lck );
695 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
696}
697
698static void
699__kmp_init_nested_futex_lock_with_checks( kmp_futex_lock_t * lck )
700{
701 __kmp_init_nested_futex_lock( lck );
702}
703
704void
705__kmp_destroy_nested_futex_lock( kmp_futex_lock_t *lck )
706{
707 __kmp_destroy_futex_lock( lck );
708 lck->lk.depth_locked = 0;
709}
710
711static void
712__kmp_destroy_nested_futex_lock_with_checks( kmp_futex_lock_t *lck )
713{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000714 char const * const func = "omp_destroy_nest_lock";
715 if ( ! __kmp_is_futex_lock_nestable( lck ) ) {
716 KMP_FATAL( LockSimpleUsedAsNestable, func );
717 }
718 if ( __kmp_get_futex_lock_owner( lck ) != -1 ) {
719 KMP_FATAL( LockStillOwned, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000720 }
721 __kmp_destroy_nested_futex_lock( lck );
722}
723
Paul Osmialowskifb043fd2016-05-16 09:44:11 +0000724#endif // KMP_USE_FUTEX
Jim Cownie5e8470a2013-09-27 10:38:44 +0000725
726
727/* ------------------------------------------------------------------------ */
728/* ticket (bakery) locks */
729
730static kmp_int32
731__kmp_get_ticket_lock_owner( kmp_ticket_lock_t *lck )
732{
Paul Osmialowskif7cc6af2016-05-31 20:20:32 +0000733 return std::atomic_load_explicit( &lck->lk.owner_id, std::memory_order_relaxed ) - 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000734}
735
736static inline bool
737__kmp_is_ticket_lock_nestable( kmp_ticket_lock_t *lck )
738{
Paul Osmialowskif7cc6af2016-05-31 20:20:32 +0000739 return std::atomic_load_explicit( &lck->lk.depth_locked, std::memory_order_relaxed ) != -1;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000740}
741
742static kmp_uint32
Paul Osmialowskif7cc6af2016-05-31 20:20:32 +0000743__kmp_bakery_check( void *now_serving, kmp_uint32 my_ticket )
Jim Cownie5e8470a2013-09-27 10:38:44 +0000744{
Paul Osmialowskif7cc6af2016-05-31 20:20:32 +0000745 return std::atomic_load_explicit( (std::atomic<unsigned> *)now_serving, std::memory_order_acquire ) == my_ticket;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000746}
747
Jonathan Peyton0e6d4572015-10-16 16:52:58 +0000748__forceinline static int
Jim Cownie5e8470a2013-09-27 10:38:44 +0000749__kmp_acquire_ticket_lock_timed_template( kmp_ticket_lock_t *lck, kmp_int32 gtid )
750{
Paul Osmialowskif7cc6af2016-05-31 20:20:32 +0000751 kmp_uint32 my_ticket = std::atomic_fetch_add_explicit( &lck->lk.next_ticket, 1U, std::memory_order_relaxed );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000752
753#ifdef USE_LOCK_PROFILE
Paul Osmialowskif7cc6af2016-05-31 20:20:32 +0000754 if ( std::atomic_load_explicit( &lck->lk.now_serving, std::memory_order_relaxed ) != my_ticket )
Jim Cownie5e8470a2013-09-27 10:38:44 +0000755 __kmp_printf( "LOCK CONTENTION: %p\n", lck );
756 /* else __kmp_printf( "." );*/
757#endif /* USE_LOCK_PROFILE */
758
Paul Osmialowskif7cc6af2016-05-31 20:20:32 +0000759 if ( std::atomic_load_explicit( &lck->lk.now_serving, std::memory_order_acquire ) == my_ticket ) {
Jonathan Peyton0e6d4572015-10-16 16:52:58 +0000760 return KMP_LOCK_ACQUIRED_FIRST;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000761 }
Paul Osmialowskif7cc6af2016-05-31 20:20:32 +0000762 KMP_WAIT_YIELD_PTR( &lck->lk.now_serving, my_ticket, __kmp_bakery_check, lck );
Jonathan Peyton0e6d4572015-10-16 16:52:58 +0000763 return KMP_LOCK_ACQUIRED_FIRST;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000764}
765
Jonathan Peyton0e6d4572015-10-16 16:52:58 +0000766int
Jim Cownie5e8470a2013-09-27 10:38:44 +0000767__kmp_acquire_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
768{
Jonas Hahnfeld50fed042016-11-07 15:58:36 +0000769 int retval = __kmp_acquire_ticket_lock_timed_template( lck, gtid );
770 ANNOTATE_TICKET_ACQUIRED(lck);
771 return retval;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000772}
773
Jonathan Peyton0e6d4572015-10-16 16:52:58 +0000774static int
Jim Cownie5e8470a2013-09-27 10:38:44 +0000775__kmp_acquire_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
776{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000777 char const * const func = "omp_set_lock";
Paul Osmialowskif7cc6af2016-05-31 20:20:32 +0000778
779 if ( ! std::atomic_load_explicit( &lck->lk.initialized, std::memory_order_relaxed ) ) {
780 KMP_FATAL( LockIsUninitialized, func );
781 }
782 if ( lck->lk.self != lck ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000783 KMP_FATAL( LockIsUninitialized, func );
784 }
785 if ( __kmp_is_ticket_lock_nestable( lck ) ) {
786 KMP_FATAL( LockNestableUsedAsSimple, func );
787 }
788 if ( ( gtid >= 0 ) && ( __kmp_get_ticket_lock_owner( lck ) == gtid ) ) {
789 KMP_FATAL( LockIsAlreadyOwned, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000790 }
791
792 __kmp_acquire_ticket_lock( lck, gtid );
793
Paul Osmialowskif7cc6af2016-05-31 20:20:32 +0000794 std::atomic_store_explicit( &lck->lk.owner_id, gtid + 1, std::memory_order_relaxed );
Jonathan Peyton0e6d4572015-10-16 16:52:58 +0000795 return KMP_LOCK_ACQUIRED_FIRST;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000796}
797
798int
799__kmp_test_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
800{
Paul Osmialowskif7cc6af2016-05-31 20:20:32 +0000801 kmp_uint32 my_ticket = std::atomic_load_explicit( &lck->lk.next_ticket, std::memory_order_relaxed );
802
803 if ( std::atomic_load_explicit( &lck->lk.now_serving, std::memory_order_relaxed ) == my_ticket ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000804 kmp_uint32 next_ticket = my_ticket + 1;
Paul Osmialowskif7cc6af2016-05-31 20:20:32 +0000805 if ( std::atomic_compare_exchange_strong_explicit( &lck->lk.next_ticket,
806 &my_ticket, next_ticket, std::memory_order_acquire, std::memory_order_acquire )) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000807 return TRUE;
808 }
809 }
810 return FALSE;
811}
812
813static int
814__kmp_test_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
815{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000816 char const * const func = "omp_test_lock";
Paul Osmialowskif7cc6af2016-05-31 20:20:32 +0000817
818 if ( ! std::atomic_load_explicit( &lck->lk.initialized, std::memory_order_relaxed ) ) {
819 KMP_FATAL( LockIsUninitialized, func );
820 }
821 if ( lck->lk.self != lck ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000822 KMP_FATAL( LockIsUninitialized, func );
823 }
824 if ( __kmp_is_ticket_lock_nestable( lck ) ) {
825 KMP_FATAL( LockNestableUsedAsSimple, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000826 }
827
828 int retval = __kmp_test_ticket_lock( lck, gtid );
829
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000830 if ( retval ) {
Paul Osmialowskif7cc6af2016-05-31 20:20:32 +0000831 std::atomic_store_explicit( &lck->lk.owner_id, gtid + 1, std::memory_order_relaxed );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000832 }
833 return retval;
834}
835
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000836int
Jim Cownie5e8470a2013-09-27 10:38:44 +0000837__kmp_release_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
838{
Paul Osmialowskif7cc6af2016-05-31 20:20:32 +0000839 kmp_uint32 distance = std::atomic_load_explicit( &lck->lk.next_ticket, std::memory_order_relaxed ) - std::atomic_load_explicit( &lck->lk.now_serving, std::memory_order_relaxed );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000840
Jonas Hahnfeld50fed042016-11-07 15:58:36 +0000841 ANNOTATE_TICKET_RELEASED(lck);
Paul Osmialowskif7cc6af2016-05-31 20:20:32 +0000842 std::atomic_fetch_add_explicit( &lck->lk.now_serving, 1U, std::memory_order_release );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000843
844 KMP_YIELD( distance
845 > (kmp_uint32) (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc) );
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000846 return KMP_LOCK_RELEASED;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000847}
848
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000849static int
Jim Cownie5e8470a2013-09-27 10:38:44 +0000850__kmp_release_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
851{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000852 char const * const func = "omp_unset_lock";
Paul Osmialowskif7cc6af2016-05-31 20:20:32 +0000853
854 if ( ! std::atomic_load_explicit( &lck->lk.initialized, std::memory_order_relaxed ) ) {
855 KMP_FATAL( LockIsUninitialized, func );
856 }
857 if ( lck->lk.self != lck ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000858 KMP_FATAL( LockIsUninitialized, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000859 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000860 if ( __kmp_is_ticket_lock_nestable( lck ) ) {
861 KMP_FATAL( LockNestableUsedAsSimple, func );
862 }
863 if ( __kmp_get_ticket_lock_owner( lck ) == -1 ) {
864 KMP_FATAL( LockUnsettingFree, func );
865 }
866 if ( ( gtid >= 0 ) && ( __kmp_get_ticket_lock_owner( lck ) >= 0 )
867 && ( __kmp_get_ticket_lock_owner( lck ) != gtid ) ) {
868 KMP_FATAL( LockUnsettingSetByAnother, func );
869 }
Paul Osmialowskif7cc6af2016-05-31 20:20:32 +0000870 std::atomic_store_explicit( &lck->lk.owner_id, 0, std::memory_order_relaxed );
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000871 return __kmp_release_ticket_lock( lck, gtid );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000872}
873
874void
875__kmp_init_ticket_lock( kmp_ticket_lock_t * lck )
876{
877 lck->lk.location = NULL;
Paul Osmialowskif7cc6af2016-05-31 20:20:32 +0000878 lck->lk.self = lck;
879 std::atomic_store_explicit( &lck->lk.next_ticket, 0U, std::memory_order_relaxed );
880 std::atomic_store_explicit( &lck->lk.now_serving, 0U, std::memory_order_relaxed );
881 std::atomic_store_explicit( &lck->lk.owner_id, 0, std::memory_order_relaxed ); // no thread owns the lock.
882 std::atomic_store_explicit( &lck->lk.depth_locked, -1, std::memory_order_relaxed ); // -1 => not a nested lock.
883 std::atomic_store_explicit( &lck->lk.initialized, true, std::memory_order_release );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000884}
885
886static void
887__kmp_init_ticket_lock_with_checks( kmp_ticket_lock_t * lck )
888{
889 __kmp_init_ticket_lock( lck );
890}
891
892void
893__kmp_destroy_ticket_lock( kmp_ticket_lock_t *lck )
894{
Paul Osmialowskif7cc6af2016-05-31 20:20:32 +0000895 std::atomic_store_explicit( &lck->lk.initialized, false, std::memory_order_release );
896 lck->lk.self = NULL;
897 lck->lk.location = NULL;
898 std::atomic_store_explicit( &lck->lk.next_ticket, 0U, std::memory_order_relaxed );
899 std::atomic_store_explicit( &lck->lk.now_serving, 0U, std::memory_order_relaxed );
900 std::atomic_store_explicit( &lck->lk.owner_id, 0, std::memory_order_relaxed );
901 std::atomic_store_explicit( &lck->lk.depth_locked, -1, std::memory_order_relaxed );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000902}
903
904static void
905__kmp_destroy_ticket_lock_with_checks( kmp_ticket_lock_t *lck )
906{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000907 char const * const func = "omp_destroy_lock";
Paul Osmialowskif7cc6af2016-05-31 20:20:32 +0000908
909 if ( ! std::atomic_load_explicit( &lck->lk.initialized, std::memory_order_relaxed ) ) {
910 KMP_FATAL( LockIsUninitialized, func );
911 }
912 if ( lck->lk.self != lck ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000913 KMP_FATAL( LockIsUninitialized, func );
914 }
915 if ( __kmp_is_ticket_lock_nestable( lck ) ) {
916 KMP_FATAL( LockNestableUsedAsSimple, func );
917 }
918 if ( __kmp_get_ticket_lock_owner( lck ) != -1 ) {
919 KMP_FATAL( LockStillOwned, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000920 }
921 __kmp_destroy_ticket_lock( lck );
922}
923
924
925//
926// nested ticket locks
927//
928
Jonathan Peyton0e6d4572015-10-16 16:52:58 +0000929int
Jim Cownie5e8470a2013-09-27 10:38:44 +0000930__kmp_acquire_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
931{
932 KMP_DEBUG_ASSERT( gtid >= 0 );
933
934 if ( __kmp_get_ticket_lock_owner( lck ) == gtid ) {
Paul Osmialowskif7cc6af2016-05-31 20:20:32 +0000935 std::atomic_fetch_add_explicit( &lck->lk.depth_locked, 1, std::memory_order_relaxed );
Jonathan Peyton0e6d4572015-10-16 16:52:58 +0000936 return KMP_LOCK_ACQUIRED_NEXT;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000937 }
938 else {
939 __kmp_acquire_ticket_lock_timed_template( lck, gtid );
Jonas Hahnfeld50fed042016-11-07 15:58:36 +0000940 ANNOTATE_TICKET_ACQUIRED(lck);
Paul Osmialowskif7cc6af2016-05-31 20:20:32 +0000941 std::atomic_store_explicit( &lck->lk.depth_locked, 1, std::memory_order_relaxed );
942 std::atomic_store_explicit( &lck->lk.owner_id, gtid + 1, std::memory_order_relaxed );
Jonathan Peyton0e6d4572015-10-16 16:52:58 +0000943 return KMP_LOCK_ACQUIRED_FIRST;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000944 }
945}
946
Jonathan Peyton0e6d4572015-10-16 16:52:58 +0000947static int
Jim Cownie5e8470a2013-09-27 10:38:44 +0000948__kmp_acquire_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
949{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000950 char const * const func = "omp_set_nest_lock";
Paul Osmialowskif7cc6af2016-05-31 20:20:32 +0000951
952 if ( ! std::atomic_load_explicit( &lck->lk.initialized, std::memory_order_relaxed ) ) {
953 KMP_FATAL( LockIsUninitialized, func );
954 }
955 if ( lck->lk.self != lck ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000956 KMP_FATAL( LockIsUninitialized, func );
957 }
958 if ( ! __kmp_is_ticket_lock_nestable( lck ) ) {
959 KMP_FATAL( LockSimpleUsedAsNestable, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000960 }
Jonathan Peyton0e6d4572015-10-16 16:52:58 +0000961 return __kmp_acquire_nested_ticket_lock( lck, gtid );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000962}
963
964int
965__kmp_test_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
966{
967 int retval;
968
969 KMP_DEBUG_ASSERT( gtid >= 0 );
970
971 if ( __kmp_get_ticket_lock_owner( lck ) == gtid ) {
Paul Osmialowskif7cc6af2016-05-31 20:20:32 +0000972 retval = std::atomic_fetch_add_explicit( &lck->lk.depth_locked, 1, std::memory_order_relaxed ) + 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000973 }
974 else if ( !__kmp_test_ticket_lock( lck, gtid ) ) {
975 retval = 0;
976 }
977 else {
Paul Osmialowskif7cc6af2016-05-31 20:20:32 +0000978 std::atomic_store_explicit( &lck->lk.depth_locked, 1, std::memory_order_relaxed );
979 std::atomic_store_explicit( &lck->lk.owner_id, gtid + 1, std::memory_order_relaxed );
980 retval = 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000981 }
982 return retval;
983}
984
985static int
986__kmp_test_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck,
987 kmp_int32 gtid )
988{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000989 char const * const func = "omp_test_nest_lock";
Paul Osmialowskif7cc6af2016-05-31 20:20:32 +0000990
991 if ( ! std::atomic_load_explicit( &lck->lk.initialized, std::memory_order_relaxed ) ) {
992 KMP_FATAL( LockIsUninitialized, func );
993 }
994 if ( lck->lk.self != lck ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000995 KMP_FATAL( LockIsUninitialized, func );
996 }
997 if ( ! __kmp_is_ticket_lock_nestable( lck ) ) {
998 KMP_FATAL( LockSimpleUsedAsNestable, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000999 }
1000 return __kmp_test_nested_ticket_lock( lck, gtid );
1001}
1002
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00001003int
Jim Cownie5e8470a2013-09-27 10:38:44 +00001004__kmp_release_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
1005{
1006 KMP_DEBUG_ASSERT( gtid >= 0 );
1007
Paul Osmialowskif7cc6af2016-05-31 20:20:32 +00001008 if ( ( std::atomic_fetch_add_explicit( &lck->lk.depth_locked, -1, std::memory_order_relaxed ) - 1 ) == 0 ) {
1009 std::atomic_store_explicit( &lck->lk.owner_id, 0, std::memory_order_relaxed );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001010 __kmp_release_ticket_lock( lck, gtid );
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00001011 return KMP_LOCK_RELEASED;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001012 }
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00001013 return KMP_LOCK_STILL_HELD;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001014}
1015
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00001016static int
Jim Cownie5e8470a2013-09-27 10:38:44 +00001017__kmp_release_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
1018{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001019 char const * const func = "omp_unset_nest_lock";
Paul Osmialowskif7cc6af2016-05-31 20:20:32 +00001020
1021 if ( ! std::atomic_load_explicit( &lck->lk.initialized, std::memory_order_relaxed ) ) {
1022 KMP_FATAL( LockIsUninitialized, func );
1023 }
1024 if ( lck->lk.self != lck ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001025 KMP_FATAL( LockIsUninitialized, func );
1026 }
1027 if ( ! __kmp_is_ticket_lock_nestable( lck ) ) {
1028 KMP_FATAL( LockSimpleUsedAsNestable, func );
1029 }
1030 if ( __kmp_get_ticket_lock_owner( lck ) == -1 ) {
1031 KMP_FATAL( LockUnsettingFree, func );
1032 }
1033 if ( __kmp_get_ticket_lock_owner( lck ) != gtid ) {
1034 KMP_FATAL( LockUnsettingSetByAnother, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001035 }
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00001036 return __kmp_release_nested_ticket_lock( lck, gtid );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001037}
1038
1039void
1040__kmp_init_nested_ticket_lock( kmp_ticket_lock_t * lck )
1041{
1042 __kmp_init_ticket_lock( lck );
Paul Osmialowskif7cc6af2016-05-31 20:20:32 +00001043 std::atomic_store_explicit( &lck->lk.depth_locked, 0, std::memory_order_relaxed ); // >= 0 for nestable locks, -1 for simple locks
Jim Cownie5e8470a2013-09-27 10:38:44 +00001044}
1045
1046static void
1047__kmp_init_nested_ticket_lock_with_checks( kmp_ticket_lock_t * lck )
1048{
1049 __kmp_init_nested_ticket_lock( lck );
1050}
1051
1052void
1053__kmp_destroy_nested_ticket_lock( kmp_ticket_lock_t *lck )
1054{
1055 __kmp_destroy_ticket_lock( lck );
Paul Osmialowskif7cc6af2016-05-31 20:20:32 +00001056 std::atomic_store_explicit( &lck->lk.depth_locked, 0, std::memory_order_relaxed );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001057}
1058
1059static void
1060__kmp_destroy_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck )
1061{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001062 char const * const func = "omp_destroy_nest_lock";
Paul Osmialowskif7cc6af2016-05-31 20:20:32 +00001063
1064 if ( ! std::atomic_load_explicit( &lck->lk.initialized, std::memory_order_relaxed ) ) {
1065 KMP_FATAL( LockIsUninitialized, func );
1066 }
1067 if ( lck->lk.self != lck ) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001068 KMP_FATAL( LockIsUninitialized, func );
1069 }
1070 if ( ! __kmp_is_ticket_lock_nestable( lck ) ) {
1071 KMP_FATAL( LockSimpleUsedAsNestable, func );
1072 }
1073 if ( __kmp_get_ticket_lock_owner( lck ) != -1 ) {
1074 KMP_FATAL( LockStillOwned, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001075 }
1076 __kmp_destroy_nested_ticket_lock( lck );
1077}
1078
1079
1080//
1081// access functions to fields which don't exist for all lock kinds.
1082//
1083
1084static int
1085__kmp_is_ticket_lock_initialized( kmp_ticket_lock_t *lck )
1086{
Paul Osmialowskif7cc6af2016-05-31 20:20:32 +00001087 return std::atomic_load_explicit( &lck->lk.initialized, std::memory_order_relaxed ) && ( lck->lk.self == lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001088}
1089
1090static const ident_t *
1091__kmp_get_ticket_lock_location( kmp_ticket_lock_t *lck )
1092{
1093 return lck->lk.location;
1094}
1095
1096static void
1097__kmp_set_ticket_lock_location( kmp_ticket_lock_t *lck, const ident_t *loc )
1098{
1099 lck->lk.location = loc;
1100}
1101
1102static kmp_lock_flags_t
1103__kmp_get_ticket_lock_flags( kmp_ticket_lock_t *lck )
1104{
1105 return lck->lk.flags;
1106}
1107
1108static void
1109__kmp_set_ticket_lock_flags( kmp_ticket_lock_t *lck, kmp_lock_flags_t flags )
1110{
1111 lck->lk.flags = flags;
1112}
1113
1114/* ------------------------------------------------------------------------ */
1115/* queuing locks */
1116
1117/*
1118 * First the states
1119 * (head,tail) = 0, 0 means lock is unheld, nobody on queue
1120 * UINT_MAX or -1, 0 means lock is held, nobody on queue
1121 * h, h means lock is held or about to transition, 1 element on queue
1122 * h, t h <> t, means lock is held or about to transition, >1 elements on queue
1123 *
1124 * Now the transitions
1125 * Acquire(0,0) = -1 ,0
1126 * Release(0,0) = Error
1127 * Acquire(-1,0) = h ,h h > 0
1128 * Release(-1,0) = 0 ,0
1129 * Acquire(h,h) = h ,t h > 0, t > 0, h <> t
1130 * Release(h,h) = -1 ,0 h > 0
1131 * Acquire(h,t) = h ,t' h > 0, t > 0, t' > 0, h <> t, h <> t', t <> t'
1132 * Release(h,t) = h',t h > 0, t > 0, h <> t, h <> h', h' maybe = t
1133 *
1134 * And pictorially
1135 *
1136 *
1137 * +-----+
1138 * | 0, 0|------- release -------> Error
1139 * +-----+
1140 * | ^
1141 * acquire| |release
1142 * | |
1143 * | |
1144 * v |
1145 * +-----+
1146 * |-1, 0|
1147 * +-----+
1148 * | ^
1149 * acquire| |release
1150 * | |
1151 * | |
1152 * v |
1153 * +-----+
1154 * | h, h|
1155 * +-----+
1156 * | ^
1157 * acquire| |release
1158 * | |
1159 * | |
1160 * v |
1161 * +-----+
1162 * | h, t|----- acquire, release loopback ---+
1163 * +-----+ |
1164 * ^ |
1165 * | |
1166 * +------------------------------------+
1167 *
1168 */
1169
1170#ifdef DEBUG_QUEUING_LOCKS
1171
1172/* Stuff for circular trace buffer */
1173#define TRACE_BUF_ELE 1024
1174static char traces[TRACE_BUF_ELE][128] = { 0 }
1175static int tc = 0;
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00001176#define TRACE_LOCK(X,Y) KMP_SNPRINTF( traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s\n", X, Y );
1177#define TRACE_LOCK_T(X,Y,Z) KMP_SNPRINTF( traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s%d\n", X,Y,Z );
1178#define TRACE_LOCK_HT(X,Y,Z,Q) KMP_SNPRINTF( traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s %d,%d\n", X, Y, Z, Q );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001179
1180static void
1181__kmp_dump_queuing_lock( kmp_info_t *this_thr, kmp_int32 gtid,
1182 kmp_queuing_lock_t *lck, kmp_int32 head_id, kmp_int32 tail_id )
1183{
1184 kmp_int32 t, i;
1185
1186 __kmp_printf_no_lock( "\n__kmp_dump_queuing_lock: TRACE BEGINS HERE! \n" );
1187
1188 i = tc % TRACE_BUF_ELE;
1189 __kmp_printf_no_lock( "%s\n", traces[i] );
1190 i = (i+1) % TRACE_BUF_ELE;
1191 while ( i != (tc % TRACE_BUF_ELE) ) {
1192 __kmp_printf_no_lock( "%s", traces[i] );
1193 i = (i+1) % TRACE_BUF_ELE;
1194 }
1195 __kmp_printf_no_lock( "\n" );
1196
1197 __kmp_printf_no_lock(
1198 "\n__kmp_dump_queuing_lock: gtid+1:%d, spin_here:%d, next_wait:%d, head_id:%d, tail_id:%d\n",
1199 gtid+1, this_thr->th.th_spin_here, this_thr->th.th_next_waiting,
1200 head_id, tail_id );
1201
1202 __kmp_printf_no_lock( "\t\thead: %d ", lck->lk.head_id );
1203
1204 if ( lck->lk.head_id >= 1 ) {
1205 t = __kmp_threads[lck->lk.head_id-1]->th.th_next_waiting;
1206 while (t > 0) {
1207 __kmp_printf_no_lock( "-> %d ", t );
1208 t = __kmp_threads[t-1]->th.th_next_waiting;
1209 }
1210 }
1211 __kmp_printf_no_lock( "; tail: %d ", lck->lk.tail_id );
1212 __kmp_printf_no_lock( "\n\n" );
1213}
1214
1215#endif /* DEBUG_QUEUING_LOCKS */
1216
1217static kmp_int32
1218__kmp_get_queuing_lock_owner( kmp_queuing_lock_t *lck )
1219{
1220 return TCR_4( lck->lk.owner_id ) - 1;
1221}
1222
1223static inline bool
1224__kmp_is_queuing_lock_nestable( kmp_queuing_lock_t *lck )
1225{
1226 return lck->lk.depth_locked != -1;
1227}
1228
1229/* Acquire a lock using a the queuing lock implementation */
1230template <bool takeTime>
1231/* [TLW] The unused template above is left behind because of what BEB believes is a
1232 potential compiler problem with __forceinline. */
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00001233__forceinline static int
Jim Cownie5e8470a2013-09-27 10:38:44 +00001234__kmp_acquire_queuing_lock_timed_template( kmp_queuing_lock_t *lck,
1235 kmp_int32 gtid )
1236{
1237 register kmp_info_t *this_thr = __kmp_thread_from_gtid( gtid );
1238 volatile kmp_int32 *head_id_p = & lck->lk.head_id;
1239 volatile kmp_int32 *tail_id_p = & lck->lk.tail_id;
1240 volatile kmp_uint32 *spin_here_p;
1241 kmp_int32 need_mf = 1;
1242
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001243#if OMPT_SUPPORT
1244 ompt_state_t prev_state = ompt_state_undefined;
1245#endif
1246
Jim Cownie5e8470a2013-09-27 10:38:44 +00001247 KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d entering\n", lck, gtid ));
1248
1249 KMP_FSYNC_PREPARE( lck );
1250 KMP_DEBUG_ASSERT( this_thr != NULL );
1251 spin_here_p = & this_thr->th.th_spin_here;
1252
1253#ifdef DEBUG_QUEUING_LOCKS
1254 TRACE_LOCK( gtid+1, "acq ent" );
1255 if ( *spin_here_p )
1256 __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1257 if ( this_thr->th.th_next_waiting != 0 )
1258 __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1259#endif
1260 KMP_DEBUG_ASSERT( !*spin_here_p );
1261 KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 );
1262
1263
1264 /* The following st.rel to spin_here_p needs to precede the cmpxchg.acq to head_id_p
1265 that may follow, not just in execution order, but also in visibility order. This way,
1266 when a releasing thread observes the changes to the queue by this thread, it can
1267 rightly assume that spin_here_p has already been set to TRUE, so that when it sets
1268 spin_here_p to FALSE, it is not premature. If the releasing thread sets spin_here_p
1269 to FALSE before this thread sets it to TRUE, this thread will hang.
1270 */
1271 *spin_here_p = TRUE; /* before enqueuing to prevent race */
1272
1273 while( 1 ) {
1274 kmp_int32 enqueued;
1275 kmp_int32 head;
1276 kmp_int32 tail;
1277
1278 head = *head_id_p;
1279
1280 switch ( head ) {
1281
1282 case -1:
1283 {
1284#ifdef DEBUG_QUEUING_LOCKS
1285 tail = *tail_id_p;
1286 TRACE_LOCK_HT( gtid+1, "acq read: ", head, tail );
1287#endif
1288 tail = 0; /* to make sure next link asynchronously read is not set accidentally;
1289 this assignment prevents us from entering the if ( t > 0 )
1290 condition in the enqueued case below, which is not necessary for
1291 this state transition */
1292
1293 need_mf = 0;
1294 /* try (-1,0)->(tid,tid) */
1295 enqueued = KMP_COMPARE_AND_STORE_ACQ64( (volatile kmp_int64 *) tail_id_p,
1296 KMP_PACK_64( -1, 0 ),
1297 KMP_PACK_64( gtid+1, gtid+1 ) );
1298#ifdef DEBUG_QUEUING_LOCKS
1299 if ( enqueued ) TRACE_LOCK( gtid+1, "acq enq: (-1,0)->(tid,tid)" );
1300#endif
1301 }
1302 break;
1303
1304 default:
1305 {
1306 tail = *tail_id_p;
1307 KMP_DEBUG_ASSERT( tail != gtid + 1 );
1308
1309#ifdef DEBUG_QUEUING_LOCKS
1310 TRACE_LOCK_HT( gtid+1, "acq read: ", head, tail );
1311#endif
1312
1313 if ( tail == 0 ) {
1314 enqueued = FALSE;
1315 }
1316 else {
1317 need_mf = 0;
1318 /* try (h,t) or (h,h)->(h,tid) */
1319 enqueued = KMP_COMPARE_AND_STORE_ACQ32( tail_id_p, tail, gtid+1 );
1320
1321#ifdef DEBUG_QUEUING_LOCKS
1322 if ( enqueued ) TRACE_LOCK( gtid+1, "acq enq: (h,t)->(h,tid)" );
1323#endif
1324 }
1325 }
1326 break;
1327
1328 case 0: /* empty queue */
1329 {
1330 kmp_int32 grabbed_lock;
1331
1332#ifdef DEBUG_QUEUING_LOCKS
1333 tail = *tail_id_p;
1334 TRACE_LOCK_HT( gtid+1, "acq read: ", head, tail );
1335#endif
1336 /* try (0,0)->(-1,0) */
1337
1338 /* only legal transition out of head = 0 is head = -1 with no change to tail */
1339 grabbed_lock = KMP_COMPARE_AND_STORE_ACQ32( head_id_p, 0, -1 );
1340
1341 if ( grabbed_lock ) {
1342
1343 *spin_here_p = FALSE;
1344
1345 KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: no queuing\n",
1346 lck, gtid ));
1347#ifdef DEBUG_QUEUING_LOCKS
1348 TRACE_LOCK_HT( gtid+1, "acq exit: ", head, 0 );
1349#endif
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001350
1351#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001352 if (ompt_enabled && prev_state != ompt_state_undefined) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001353 /* change the state before clearing wait_id */
1354 this_thr->th.ompt_thread_info.state = prev_state;
1355 this_thr->th.ompt_thread_info.wait_id = 0;
1356 }
1357#endif
1358
Jim Cownie5e8470a2013-09-27 10:38:44 +00001359 KMP_FSYNC_ACQUIRED( lck );
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00001360 return KMP_LOCK_ACQUIRED_FIRST; /* lock holder cannot be on queue */
Jim Cownie5e8470a2013-09-27 10:38:44 +00001361 }
1362 enqueued = FALSE;
1363 }
1364 break;
1365 }
1366
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001367#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001368 if (ompt_enabled && prev_state == ompt_state_undefined) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001369 /* this thread will spin; set wait_id before entering wait state */
1370 prev_state = this_thr->th.ompt_thread_info.state;
1371 this_thr->th.ompt_thread_info.wait_id = (uint64_t) lck;
1372 this_thr->th.ompt_thread_info.state = ompt_state_wait_lock;
1373 }
1374#endif
1375
Jim Cownie5e8470a2013-09-27 10:38:44 +00001376 if ( enqueued ) {
1377 if ( tail > 0 ) {
1378 kmp_info_t *tail_thr = __kmp_thread_from_gtid( tail - 1 );
1379 KMP_ASSERT( tail_thr != NULL );
1380 tail_thr->th.th_next_waiting = gtid+1;
1381 /* corresponding wait for this write in release code */
1382 }
1383 KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d waiting for lock\n", lck, gtid ));
1384
1385
1386 /* ToDo: May want to consider using __kmp_wait_sleep or something that sleeps for
1387 * throughput only here.
1388 */
1389 KMP_MB();
1390 KMP_WAIT_YIELD(spin_here_p, FALSE, KMP_EQ, lck);
1391
1392#ifdef DEBUG_QUEUING_LOCKS
1393 TRACE_LOCK( gtid+1, "acq spin" );
1394
1395 if ( this_thr->th.th_next_waiting != 0 )
1396 __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1397#endif
1398 KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 );
1399 KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: after waiting on queue\n",
1400 lck, gtid ));
1401
1402#ifdef DEBUG_QUEUING_LOCKS
1403 TRACE_LOCK( gtid+1, "acq exit 2" );
1404#endif
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001405
1406#if OMPT_SUPPORT
1407 /* change the state before clearing wait_id */
1408 this_thr->th.ompt_thread_info.state = prev_state;
1409 this_thr->th.ompt_thread_info.wait_id = 0;
1410#endif
1411
Jim Cownie5e8470a2013-09-27 10:38:44 +00001412 /* got lock, we were dequeued by the thread that released lock */
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00001413 return KMP_LOCK_ACQUIRED_FIRST;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001414 }
1415
1416 /* Yield if number of threads > number of logical processors */
1417 /* ToDo: Not sure why this should only be in oversubscription case,
1418 maybe should be traditional YIELD_INIT/YIELD_WHEN loop */
1419 KMP_YIELD( TCR_4( __kmp_nth ) > (__kmp_avail_proc ? __kmp_avail_proc :
1420 __kmp_xproc ) );
1421#ifdef DEBUG_QUEUING_LOCKS
1422 TRACE_LOCK( gtid+1, "acq retry" );
1423#endif
1424
1425 }
1426 KMP_ASSERT2( 0, "should not get here" );
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00001427 return KMP_LOCK_ACQUIRED_FIRST;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001428}
1429
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00001430int
Jim Cownie5e8470a2013-09-27 10:38:44 +00001431__kmp_acquire_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1432{
1433 KMP_DEBUG_ASSERT( gtid >= 0 );
1434
Jonas Hahnfeld50fed042016-11-07 15:58:36 +00001435 int retval = __kmp_acquire_queuing_lock_timed_template<false>( lck, gtid );
1436 ANNOTATE_QUEUING_ACQUIRED(lck);
1437 return retval;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001438}
1439
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00001440static int
Jim Cownie5e8470a2013-09-27 10:38:44 +00001441__kmp_acquire_queuing_lock_with_checks( kmp_queuing_lock_t *lck,
1442 kmp_int32 gtid )
1443{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001444 char const * const func = "omp_set_lock";
1445 if ( lck->lk.initialized != lck ) {
1446 KMP_FATAL( LockIsUninitialized, func );
1447 }
1448 if ( __kmp_is_queuing_lock_nestable( lck ) ) {
1449 KMP_FATAL( LockNestableUsedAsSimple, func );
1450 }
1451 if ( __kmp_get_queuing_lock_owner( lck ) == gtid ) {
1452 KMP_FATAL( LockIsAlreadyOwned, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001453 }
1454
1455 __kmp_acquire_queuing_lock( lck, gtid );
1456
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001457 lck->lk.owner_id = gtid + 1;
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00001458 return KMP_LOCK_ACQUIRED_FIRST;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001459}
1460
1461int
1462__kmp_test_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1463{
1464 volatile kmp_int32 *head_id_p = & lck->lk.head_id;
1465 kmp_int32 head;
1466#ifdef KMP_DEBUG
1467 kmp_info_t *this_thr;
1468#endif
1469
1470 KA_TRACE( 1000, ("__kmp_test_queuing_lock: T#%d entering\n", gtid ));
1471 KMP_DEBUG_ASSERT( gtid >= 0 );
1472#ifdef KMP_DEBUG
1473 this_thr = __kmp_thread_from_gtid( gtid );
1474 KMP_DEBUG_ASSERT( this_thr != NULL );
1475 KMP_DEBUG_ASSERT( !this_thr->th.th_spin_here );
1476#endif
1477
1478 head = *head_id_p;
1479
1480 if ( head == 0 ) { /* nobody on queue, nobody holding */
1481
1482 /* try (0,0)->(-1,0) */
1483
1484 if ( KMP_COMPARE_AND_STORE_ACQ32( head_id_p, 0, -1 ) ) {
1485 KA_TRACE( 1000, ("__kmp_test_queuing_lock: T#%d exiting: holding lock\n", gtid ));
1486 KMP_FSYNC_ACQUIRED(lck);
Jonas Hahnfeld50fed042016-11-07 15:58:36 +00001487 ANNOTATE_QUEUING_ACQUIRED(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001488 return TRUE;
1489 }
1490 }
1491
1492 KA_TRACE( 1000, ("__kmp_test_queuing_lock: T#%d exiting: without lock\n", gtid ));
1493 return FALSE;
1494}
1495
1496static int
1497__kmp_test_queuing_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1498{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001499 char const * const func = "omp_test_lock";
1500 if ( lck->lk.initialized != lck ) {
1501 KMP_FATAL( LockIsUninitialized, func );
1502 }
1503 if ( __kmp_is_queuing_lock_nestable( lck ) ) {
1504 KMP_FATAL( LockNestableUsedAsSimple, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001505 }
1506
1507 int retval = __kmp_test_queuing_lock( lck, gtid );
1508
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001509 if ( retval ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001510 lck->lk.owner_id = gtid + 1;
1511 }
1512 return retval;
1513}
1514
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00001515int
Jim Cownie5e8470a2013-09-27 10:38:44 +00001516__kmp_release_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1517{
1518 register kmp_info_t *this_thr;
1519 volatile kmp_int32 *head_id_p = & lck->lk.head_id;
1520 volatile kmp_int32 *tail_id_p = & lck->lk.tail_id;
1521
1522 KA_TRACE( 1000, ("__kmp_release_queuing_lock: lck:%p, T#%d entering\n", lck, gtid ));
1523 KMP_DEBUG_ASSERT( gtid >= 0 );
1524 this_thr = __kmp_thread_from_gtid( gtid );
1525 KMP_DEBUG_ASSERT( this_thr != NULL );
1526#ifdef DEBUG_QUEUING_LOCKS
1527 TRACE_LOCK( gtid+1, "rel ent" );
1528
1529 if ( this_thr->th.th_spin_here )
1530 __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1531 if ( this_thr->th.th_next_waiting != 0 )
1532 __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1533#endif
1534 KMP_DEBUG_ASSERT( !this_thr->th.th_spin_here );
1535 KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 );
1536
1537 KMP_FSYNC_RELEASING(lck);
Jonas Hahnfeld50fed042016-11-07 15:58:36 +00001538 ANNOTATE_QUEUING_RELEASED(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001539
1540 while( 1 ) {
1541 kmp_int32 dequeued;
1542 kmp_int32 head;
1543 kmp_int32 tail;
1544
1545 head = *head_id_p;
1546
1547#ifdef DEBUG_QUEUING_LOCKS
1548 tail = *tail_id_p;
1549 TRACE_LOCK_HT( gtid+1, "rel read: ", head, tail );
1550 if ( head == 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail );
1551#endif
1552 KMP_DEBUG_ASSERT( head != 0 ); /* holding the lock, head must be -1 or queue head */
1553
1554 if ( head == -1 ) { /* nobody on queue */
1555
1556 /* try (-1,0)->(0,0) */
1557 if ( KMP_COMPARE_AND_STORE_REL32( head_id_p, -1, 0 ) ) {
1558 KA_TRACE( 1000, ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: queue empty\n",
1559 lck, gtid ));
1560#ifdef DEBUG_QUEUING_LOCKS
1561 TRACE_LOCK_HT( gtid+1, "rel exit: ", 0, 0 );
1562#endif
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001563
1564#if OMPT_SUPPORT
1565 /* nothing to do - no other thread is trying to shift blame */
1566#endif
1567
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00001568 return KMP_LOCK_RELEASED;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001569 }
1570 dequeued = FALSE;
1571
1572 }
1573 else {
1574
1575 tail = *tail_id_p;
1576 if ( head == tail ) { /* only one thread on the queue */
1577
1578#ifdef DEBUG_QUEUING_LOCKS
1579 if ( head <= 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail );
1580#endif
1581 KMP_DEBUG_ASSERT( head > 0 );
1582
1583 /* try (h,h)->(-1,0) */
1584 dequeued = KMP_COMPARE_AND_STORE_REL64( (kmp_int64 *) tail_id_p,
1585 KMP_PACK_64( head, head ), KMP_PACK_64( -1, 0 ) );
1586#ifdef DEBUG_QUEUING_LOCKS
1587 TRACE_LOCK( gtid+1, "rel deq: (h,h)->(-1,0)" );
1588#endif
1589
1590 }
1591 else {
1592 volatile kmp_int32 *waiting_id_p;
1593 kmp_info_t *head_thr = __kmp_thread_from_gtid( head - 1 );
1594 KMP_DEBUG_ASSERT( head_thr != NULL );
1595 waiting_id_p = & head_thr->th.th_next_waiting;
1596
1597 /* Does this require synchronous reads? */
1598#ifdef DEBUG_QUEUING_LOCKS
1599 if ( head <= 0 || tail <= 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail );
1600#endif
1601 KMP_DEBUG_ASSERT( head > 0 && tail > 0 );
1602
1603 /* try (h,t)->(h',t) or (t,t) */
1604
1605 KMP_MB();
1606 /* make sure enqueuing thread has time to update next waiting thread field */
Jonathan Peyton32a1ea12016-01-04 23:20:26 +00001607 *head_id_p = KMP_WAIT_YIELD((volatile kmp_uint32*)waiting_id_p, 0, KMP_NEQ, NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001608#ifdef DEBUG_QUEUING_LOCKS
1609 TRACE_LOCK( gtid+1, "rel deq: (h,t)->(h',t)" );
1610#endif
1611 dequeued = TRUE;
1612 }
1613 }
1614
1615 if ( dequeued ) {
1616 kmp_info_t *head_thr = __kmp_thread_from_gtid( head - 1 );
1617 KMP_DEBUG_ASSERT( head_thr != NULL );
1618
1619 /* Does this require synchronous reads? */
1620#ifdef DEBUG_QUEUING_LOCKS
1621 if ( head <= 0 || tail <= 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail );
1622#endif
1623 KMP_DEBUG_ASSERT( head > 0 && tail > 0 );
1624
1625 /* For clean code only.
1626 * Thread not released until next statement prevents race with acquire code.
1627 */
1628 head_thr->th.th_next_waiting = 0;
1629#ifdef DEBUG_QUEUING_LOCKS
1630 TRACE_LOCK_T( gtid+1, "rel nw=0 for t=", head );
1631#endif
1632
1633 KMP_MB();
1634 /* reset spin value */
1635 head_thr->th.th_spin_here = FALSE;
1636
1637 KA_TRACE( 1000, ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: after dequeuing\n",
1638 lck, gtid ));
1639#ifdef DEBUG_QUEUING_LOCKS
1640 TRACE_LOCK( gtid+1, "rel exit 2" );
1641#endif
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00001642 return KMP_LOCK_RELEASED;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001643 }
1644 /* KMP_CPU_PAUSE( ); don't want to make releasing thread hold up acquiring threads */
1645
1646#ifdef DEBUG_QUEUING_LOCKS
1647 TRACE_LOCK( gtid+1, "rel retry" );
1648#endif
1649
1650 } /* while */
1651 KMP_ASSERT2( 0, "should not get here" );
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00001652 return KMP_LOCK_RELEASED;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001653}
1654
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00001655static int
Jim Cownie5e8470a2013-09-27 10:38:44 +00001656__kmp_release_queuing_lock_with_checks( kmp_queuing_lock_t *lck,
1657 kmp_int32 gtid )
1658{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001659 char const * const func = "omp_unset_lock";
1660 KMP_MB(); /* in case another processor initialized lock */
1661 if ( lck->lk.initialized != lck ) {
1662 KMP_FATAL( LockIsUninitialized, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001663 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001664 if ( __kmp_is_queuing_lock_nestable( lck ) ) {
1665 KMP_FATAL( LockNestableUsedAsSimple, func );
1666 }
1667 if ( __kmp_get_queuing_lock_owner( lck ) == -1 ) {
1668 KMP_FATAL( LockUnsettingFree, func );
1669 }
1670 if ( __kmp_get_queuing_lock_owner( lck ) != gtid ) {
1671 KMP_FATAL( LockUnsettingSetByAnother, func );
1672 }
1673 lck->lk.owner_id = 0;
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00001674 return __kmp_release_queuing_lock( lck, gtid );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001675}
1676
1677void
1678__kmp_init_queuing_lock( kmp_queuing_lock_t *lck )
1679{
1680 lck->lk.location = NULL;
1681 lck->lk.head_id = 0;
1682 lck->lk.tail_id = 0;
1683 lck->lk.next_ticket = 0;
1684 lck->lk.now_serving = 0;
1685 lck->lk.owner_id = 0; // no thread owns the lock.
1686 lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks.
1687 lck->lk.initialized = lck;
1688
1689 KA_TRACE(1000, ("__kmp_init_queuing_lock: lock %p initialized\n", lck));
1690}
1691
1692static void
1693__kmp_init_queuing_lock_with_checks( kmp_queuing_lock_t * lck )
1694{
1695 __kmp_init_queuing_lock( lck );
1696}
1697
1698void
1699__kmp_destroy_queuing_lock( kmp_queuing_lock_t *lck )
1700{
1701 lck->lk.initialized = NULL;
1702 lck->lk.location = NULL;
1703 lck->lk.head_id = 0;
1704 lck->lk.tail_id = 0;
1705 lck->lk.next_ticket = 0;
1706 lck->lk.now_serving = 0;
1707 lck->lk.owner_id = 0;
1708 lck->lk.depth_locked = -1;
1709}
1710
1711static void
1712__kmp_destroy_queuing_lock_with_checks( kmp_queuing_lock_t *lck )
1713{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001714 char const * const func = "omp_destroy_lock";
1715 if ( lck->lk.initialized != lck ) {
1716 KMP_FATAL( LockIsUninitialized, func );
1717 }
1718 if ( __kmp_is_queuing_lock_nestable( lck ) ) {
1719 KMP_FATAL( LockNestableUsedAsSimple, func );
1720 }
1721 if ( __kmp_get_queuing_lock_owner( lck ) != -1 ) {
1722 KMP_FATAL( LockStillOwned, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001723 }
1724 __kmp_destroy_queuing_lock( lck );
1725}
1726
1727
1728//
1729// nested queuing locks
1730//
1731
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00001732int
Jim Cownie5e8470a2013-09-27 10:38:44 +00001733__kmp_acquire_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1734{
1735 KMP_DEBUG_ASSERT( gtid >= 0 );
1736
1737 if ( __kmp_get_queuing_lock_owner( lck ) == gtid ) {
1738 lck->lk.depth_locked += 1;
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00001739 return KMP_LOCK_ACQUIRED_NEXT;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001740 }
1741 else {
1742 __kmp_acquire_queuing_lock_timed_template<false>( lck, gtid );
Jonas Hahnfeld50fed042016-11-07 15:58:36 +00001743 ANNOTATE_QUEUING_ACQUIRED(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001744 KMP_MB();
1745 lck->lk.depth_locked = 1;
1746 KMP_MB();
1747 lck->lk.owner_id = gtid + 1;
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00001748 return KMP_LOCK_ACQUIRED_FIRST;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001749 }
1750}
1751
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00001752static int
Jim Cownie5e8470a2013-09-27 10:38:44 +00001753__kmp_acquire_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1754{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001755 char const * const func = "omp_set_nest_lock";
1756 if ( lck->lk.initialized != lck ) {
1757 KMP_FATAL( LockIsUninitialized, func );
1758 }
1759 if ( ! __kmp_is_queuing_lock_nestable( lck ) ) {
1760 KMP_FATAL( LockSimpleUsedAsNestable, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001761 }
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00001762 return __kmp_acquire_nested_queuing_lock( lck, gtid );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001763}
1764
1765int
1766__kmp_test_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1767{
1768 int retval;
1769
1770 KMP_DEBUG_ASSERT( gtid >= 0 );
1771
1772 if ( __kmp_get_queuing_lock_owner( lck ) == gtid ) {
1773 retval = ++lck->lk.depth_locked;
1774 }
1775 else if ( !__kmp_test_queuing_lock( lck, gtid ) ) {
1776 retval = 0;
1777 }
1778 else {
1779 KMP_MB();
1780 retval = lck->lk.depth_locked = 1;
1781 KMP_MB();
1782 lck->lk.owner_id = gtid + 1;
1783 }
1784 return retval;
1785}
1786
1787static int
1788__kmp_test_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck,
1789 kmp_int32 gtid )
1790{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001791 char const * const func = "omp_test_nest_lock";
1792 if ( lck->lk.initialized != lck ) {
1793 KMP_FATAL( LockIsUninitialized, func );
1794 }
1795 if ( ! __kmp_is_queuing_lock_nestable( lck ) ) {
1796 KMP_FATAL( LockSimpleUsedAsNestable, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001797 }
1798 return __kmp_test_nested_queuing_lock( lck, gtid );
1799}
1800
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00001801int
Jim Cownie5e8470a2013-09-27 10:38:44 +00001802__kmp_release_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1803{
1804 KMP_DEBUG_ASSERT( gtid >= 0 );
1805
1806 KMP_MB();
1807 if ( --(lck->lk.depth_locked) == 0 ) {
1808 KMP_MB();
1809 lck->lk.owner_id = 0;
1810 __kmp_release_queuing_lock( lck, gtid );
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00001811 return KMP_LOCK_RELEASED;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001812 }
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00001813 return KMP_LOCK_STILL_HELD;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001814}
1815
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00001816static int
Jim Cownie5e8470a2013-09-27 10:38:44 +00001817__kmp_release_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1818{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001819 char const * const func = "omp_unset_nest_lock";
1820 KMP_MB(); /* in case another processor initialized lock */
1821 if ( lck->lk.initialized != lck ) {
1822 KMP_FATAL( LockIsUninitialized, func );
1823 }
1824 if ( ! __kmp_is_queuing_lock_nestable( lck ) ) {
1825 KMP_FATAL( LockSimpleUsedAsNestable, func );
1826 }
1827 if ( __kmp_get_queuing_lock_owner( lck ) == -1 ) {
1828 KMP_FATAL( LockUnsettingFree, func );
1829 }
1830 if ( __kmp_get_queuing_lock_owner( lck ) != gtid ) {
1831 KMP_FATAL( LockUnsettingSetByAnother, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001832 }
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00001833 return __kmp_release_nested_queuing_lock( lck, gtid );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001834}
1835
1836void
1837__kmp_init_nested_queuing_lock( kmp_queuing_lock_t * lck )
1838{
1839 __kmp_init_queuing_lock( lck );
1840 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
1841}
1842
1843static void
1844__kmp_init_nested_queuing_lock_with_checks( kmp_queuing_lock_t * lck )
1845{
1846 __kmp_init_nested_queuing_lock( lck );
1847}
1848
1849void
1850__kmp_destroy_nested_queuing_lock( kmp_queuing_lock_t *lck )
1851{
1852 __kmp_destroy_queuing_lock( lck );
1853 lck->lk.depth_locked = 0;
1854}
1855
1856static void
1857__kmp_destroy_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck )
1858{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001859 char const * const func = "omp_destroy_nest_lock";
1860 if ( lck->lk.initialized != lck ) {
1861 KMP_FATAL( LockIsUninitialized, func );
1862 }
1863 if ( ! __kmp_is_queuing_lock_nestable( lck ) ) {
1864 KMP_FATAL( LockSimpleUsedAsNestable, func );
1865 }
1866 if ( __kmp_get_queuing_lock_owner( lck ) != -1 ) {
1867 KMP_FATAL( LockStillOwned, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001868 }
1869 __kmp_destroy_nested_queuing_lock( lck );
1870}
1871
1872
1873//
1874// access functions to fields which don't exist for all lock kinds.
1875//
1876
1877static int
1878__kmp_is_queuing_lock_initialized( kmp_queuing_lock_t *lck )
1879{
1880 return lck == lck->lk.initialized;
1881}
1882
1883static const ident_t *
1884__kmp_get_queuing_lock_location( kmp_queuing_lock_t *lck )
1885{
1886 return lck->lk.location;
1887}
1888
1889static void
1890__kmp_set_queuing_lock_location( kmp_queuing_lock_t *lck, const ident_t *loc )
1891{
1892 lck->lk.location = loc;
1893}
1894
1895static kmp_lock_flags_t
1896__kmp_get_queuing_lock_flags( kmp_queuing_lock_t *lck )
1897{
1898 return lck->lk.flags;
1899}
1900
1901static void
1902__kmp_set_queuing_lock_flags( kmp_queuing_lock_t *lck, kmp_lock_flags_t flags )
1903{
1904 lck->lk.flags = flags;
1905}
1906
1907#if KMP_USE_ADAPTIVE_LOCKS
1908
1909/*
1910 RTM Adaptive locks
1911*/
1912
Jonathan Peytona03533d2015-12-11 21:49:08 +00001913#if KMP_COMPILER_ICC && __INTEL_COMPILER >= 1300
1914
1915#include <immintrin.h>
1916#define SOFT_ABORT_MASK (_XABORT_RETRY | _XABORT_CONFLICT | _XABORT_EXPLICIT)
1917
1918#else
Jim Cownie5e8470a2013-09-27 10:38:44 +00001919
1920// Values from the status register after failed speculation.
1921#define _XBEGIN_STARTED (~0u)
1922#define _XABORT_EXPLICIT (1 << 0)
1923#define _XABORT_RETRY (1 << 1)
1924#define _XABORT_CONFLICT (1 << 2)
1925#define _XABORT_CAPACITY (1 << 3)
1926#define _XABORT_DEBUG (1 << 4)
1927#define _XABORT_NESTED (1 << 5)
1928#define _XABORT_CODE(x) ((unsigned char)(((x) >> 24) & 0xFF))
1929
1930// Aborts for which it's worth trying again immediately
1931#define SOFT_ABORT_MASK (_XABORT_RETRY | _XABORT_CONFLICT | _XABORT_EXPLICIT)
1932
1933#define STRINGIZE_INTERNAL(arg) #arg
1934#define STRINGIZE(arg) STRINGIZE_INTERNAL(arg)
1935
1936// Access to RTM instructions
1937
1938/*
1939 A version of XBegin which returns -1 on speculation, and the value of EAX on an abort.
1940 This is the same definition as the compiler intrinsic that will be supported at some point.
1941*/
1942static __inline int _xbegin()
1943{
1944 int res = -1;
1945
1946#if KMP_OS_WINDOWS
1947#if KMP_ARCH_X86_64
1948 _asm {
1949 _emit 0xC7
1950 _emit 0xF8
1951 _emit 2
1952 _emit 0
1953 _emit 0
1954 _emit 0
1955 jmp L2
1956 mov res, eax
1957 L2:
1958 }
1959#else /* IA32 */
1960 _asm {
1961 _emit 0xC7
1962 _emit 0xF8
1963 _emit 2
1964 _emit 0
1965 _emit 0
1966 _emit 0
1967 jmp L2
1968 mov res, eax
1969 L2:
1970 }
1971#endif // KMP_ARCH_X86_64
1972#else
1973 /* Note that %eax must be noted as killed (clobbered), because
1974 * the XSR is returned in %eax(%rax) on abort. Other register
1975 * values are restored, so don't need to be killed.
1976 *
1977 * We must also mark 'res' as an input and an output, since otherwise
1978 * 'res=-1' may be dropped as being dead, whereas we do need the
1979 * assignment on the successful (i.e., non-abort) path.
1980 */
1981 __asm__ volatile ("1: .byte 0xC7; .byte 0xF8;\n"
1982 " .long 1f-1b-6\n"
1983 " jmp 2f\n"
1984 "1: movl %%eax,%0\n"
1985 "2:"
1986 :"+r"(res)::"memory","%eax");
1987#endif // KMP_OS_WINDOWS
1988 return res;
1989}
1990
1991/*
1992 Transaction end
1993*/
1994static __inline void _xend()
1995{
1996#if KMP_OS_WINDOWS
1997 __asm {
1998 _emit 0x0f
1999 _emit 0x01
2000 _emit 0xd5
2001 }
2002#else
2003 __asm__ volatile (".byte 0x0f; .byte 0x01; .byte 0xd5" :::"memory");
2004#endif
2005}
2006
2007/*
2008 This is a macro, the argument must be a single byte constant which
2009 can be evaluated by the inline assembler, since it is emitted as a
2010 byte into the assembly code.
2011*/
2012#if KMP_OS_WINDOWS
2013#define _xabort(ARG) \
2014 _asm _emit 0xc6 \
2015 _asm _emit 0xf8 \
2016 _asm _emit ARG
2017#else
2018#define _xabort(ARG) \
2019 __asm__ volatile (".byte 0xC6; .byte 0xF8; .byte " STRINGIZE(ARG) :::"memory");
2020#endif
2021
Jonathan Peytona03533d2015-12-11 21:49:08 +00002022#endif // KMP_COMPILER_ICC && __INTEL_COMPILER >= 1300
2023
Jim Cownie5e8470a2013-09-27 10:38:44 +00002024//
2025// Statistics is collected for testing purpose
2026//
2027#if KMP_DEBUG_ADAPTIVE_LOCKS
2028
2029// We accumulate speculative lock statistics when the lock is destroyed.
2030// We keep locks that haven't been destroyed in the liveLocks list
2031// so that we can grab their statistics too.
2032static kmp_adaptive_lock_statistics_t destroyedStats;
2033
2034// To hold the list of live locks.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002035static kmp_adaptive_lock_info_t liveLocks;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002036
2037// A lock so we can safely update the list of locks.
2038static kmp_bootstrap_lock_t chain_lock;
2039
2040// Initialize the list of stats.
2041void
2042__kmp_init_speculative_stats()
2043{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002044 kmp_adaptive_lock_info_t *lck = &liveLocks;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002045
2046 memset( ( void * ) & ( lck->stats ), 0, sizeof( lck->stats ) );
2047 lck->stats.next = lck;
2048 lck->stats.prev = lck;
2049
2050 KMP_ASSERT( lck->stats.next->stats.prev == lck );
2051 KMP_ASSERT( lck->stats.prev->stats.next == lck );
2052
2053 __kmp_init_bootstrap_lock( &chain_lock );
2054
2055}
2056
2057// Insert the lock into the circular list
2058static void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002059__kmp_remember_lock( kmp_adaptive_lock_info_t * lck )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002060{
2061 __kmp_acquire_bootstrap_lock( &chain_lock );
2062
2063 lck->stats.next = liveLocks.stats.next;
2064 lck->stats.prev = &liveLocks;
2065
2066 liveLocks.stats.next = lck;
2067 lck->stats.next->stats.prev = lck;
2068
2069 KMP_ASSERT( lck->stats.next->stats.prev == lck );
2070 KMP_ASSERT( lck->stats.prev->stats.next == lck );
2071
2072 __kmp_release_bootstrap_lock( &chain_lock );
2073}
2074
2075static void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002076__kmp_forget_lock( kmp_adaptive_lock_info_t * lck )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002077{
2078 KMP_ASSERT( lck->stats.next->stats.prev == lck );
2079 KMP_ASSERT( lck->stats.prev->stats.next == lck );
2080
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002081 kmp_adaptive_lock_info_t * n = lck->stats.next;
2082 kmp_adaptive_lock_info_t * p = lck->stats.prev;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002083
2084 n->stats.prev = p;
2085 p->stats.next = n;
2086}
2087
2088static void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002089__kmp_zero_speculative_stats( kmp_adaptive_lock_info_t * lck )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002090{
2091 memset( ( void * )&lck->stats, 0, sizeof( lck->stats ) );
2092 __kmp_remember_lock( lck );
2093}
2094
2095static void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002096__kmp_add_stats( kmp_adaptive_lock_statistics_t * t, kmp_adaptive_lock_info_t * lck )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002097{
2098 kmp_adaptive_lock_statistics_t volatile *s = &lck->stats;
2099
2100 t->nonSpeculativeAcquireAttempts += lck->acquire_attempts;
2101 t->successfulSpeculations += s->successfulSpeculations;
2102 t->hardFailedSpeculations += s->hardFailedSpeculations;
2103 t->softFailedSpeculations += s->softFailedSpeculations;
2104 t->nonSpeculativeAcquires += s->nonSpeculativeAcquires;
2105 t->lemmingYields += s->lemmingYields;
2106}
2107
2108static void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002109__kmp_accumulate_speculative_stats( kmp_adaptive_lock_info_t * lck)
Jim Cownie5e8470a2013-09-27 10:38:44 +00002110{
2111 kmp_adaptive_lock_statistics_t *t = &destroyedStats;
2112
2113 __kmp_acquire_bootstrap_lock( &chain_lock );
2114
2115 __kmp_add_stats( &destroyedStats, lck );
2116 __kmp_forget_lock( lck );
2117
2118 __kmp_release_bootstrap_lock( &chain_lock );
2119}
2120
2121static float
2122percent (kmp_uint32 count, kmp_uint32 total)
2123{
2124 return (total == 0) ? 0.0: (100.0 * count)/total;
2125}
2126
2127static
2128FILE * __kmp_open_stats_file()
2129{
2130 if (strcmp (__kmp_speculative_statsfile, "-") == 0)
2131 return stdout;
2132
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00002133 size_t buffLen = KMP_STRLEN( __kmp_speculative_statsfile ) + 20;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002134 char buffer[buffLen];
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00002135 KMP_SNPRINTF (&buffer[0], buffLen, __kmp_speculative_statsfile,
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002136 (kmp_int32)getpid());
Jim Cownie5e8470a2013-09-27 10:38:44 +00002137 FILE * result = fopen(&buffer[0], "w");
2138
2139 // Maybe we should issue a warning here...
2140 return result ? result : stdout;
2141}
2142
2143void
2144__kmp_print_speculative_stats()
2145{
2146 if (__kmp_user_lock_kind != lk_adaptive)
2147 return;
2148
2149 FILE * statsFile = __kmp_open_stats_file();
2150
2151 kmp_adaptive_lock_statistics_t total = destroyedStats;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002152 kmp_adaptive_lock_info_t *lck;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002153
2154 for (lck = liveLocks.stats.next; lck != &liveLocks; lck = lck->stats.next) {
2155 __kmp_add_stats( &total, lck );
2156 }
2157 kmp_adaptive_lock_statistics_t *t = &total;
2158 kmp_uint32 totalSections = t->nonSpeculativeAcquires + t->successfulSpeculations;
2159 kmp_uint32 totalSpeculations = t->successfulSpeculations + t->hardFailedSpeculations +
2160 t->softFailedSpeculations;
2161
2162 fprintf ( statsFile, "Speculative lock statistics (all approximate!)\n");
2163 fprintf ( statsFile, " Lock parameters: \n"
2164 " max_soft_retries : %10d\n"
2165 " max_badness : %10d\n",
2166 __kmp_adaptive_backoff_params.max_soft_retries,
2167 __kmp_adaptive_backoff_params.max_badness);
2168 fprintf( statsFile, " Non-speculative acquire attempts : %10d\n", t->nonSpeculativeAcquireAttempts );
2169 fprintf( statsFile, " Total critical sections : %10d\n", totalSections );
2170 fprintf( statsFile, " Successful speculations : %10d (%5.1f%%)\n",
2171 t->successfulSpeculations, percent( t->successfulSpeculations, totalSections ) );
2172 fprintf( statsFile, " Non-speculative acquires : %10d (%5.1f%%)\n",
2173 t->nonSpeculativeAcquires, percent( t->nonSpeculativeAcquires, totalSections ) );
2174 fprintf( statsFile, " Lemming yields : %10d\n\n", t->lemmingYields );
2175
2176 fprintf( statsFile, " Speculative acquire attempts : %10d\n", totalSpeculations );
2177 fprintf( statsFile, " Successes : %10d (%5.1f%%)\n",
2178 t->successfulSpeculations, percent( t->successfulSpeculations, totalSpeculations ) );
2179 fprintf( statsFile, " Soft failures : %10d (%5.1f%%)\n",
2180 t->softFailedSpeculations, percent( t->softFailedSpeculations, totalSpeculations ) );
2181 fprintf( statsFile, " Hard failures : %10d (%5.1f%%)\n",
2182 t->hardFailedSpeculations, percent( t->hardFailedSpeculations, totalSpeculations ) );
2183
2184 if (statsFile != stdout)
2185 fclose( statsFile );
2186}
2187
2188# define KMP_INC_STAT(lck,stat) ( lck->lk.adaptive.stats.stat++ )
2189#else
2190# define KMP_INC_STAT(lck,stat)
2191
2192#endif // KMP_DEBUG_ADAPTIVE_LOCKS
2193
2194static inline bool
2195__kmp_is_unlocked_queuing_lock( kmp_queuing_lock_t *lck )
2196{
2197 // It is enough to check that the head_id is zero.
2198 // We don't also need to check the tail.
2199 bool res = lck->lk.head_id == 0;
2200
2201 // We need a fence here, since we must ensure that no memory operations
2202 // from later in this thread float above that read.
Jim Cownie181b4bb2013-12-23 17:28:57 +00002203#if KMP_COMPILER_ICC
Jim Cownie5e8470a2013-09-27 10:38:44 +00002204 _mm_mfence();
Jim Cownie181b4bb2013-12-23 17:28:57 +00002205#else
2206 __sync_synchronize();
Jim Cownie5e8470a2013-09-27 10:38:44 +00002207#endif
2208
2209 return res;
2210}
2211
2212// Functions for manipulating the badness
2213static __inline void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002214__kmp_update_badness_after_success( kmp_adaptive_lock_t *lck )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002215{
2216 // Reset the badness to zero so we eagerly try to speculate again
2217 lck->lk.adaptive.badness = 0;
2218 KMP_INC_STAT(lck,successfulSpeculations);
2219}
2220
2221// Create a bit mask with one more set bit.
2222static __inline void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002223__kmp_step_badness( kmp_adaptive_lock_t *lck )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002224{
2225 kmp_uint32 newBadness = ( lck->lk.adaptive.badness << 1 ) | 1;
2226 if ( newBadness > lck->lk.adaptive.max_badness) {
2227 return;
2228 } else {
2229 lck->lk.adaptive.badness = newBadness;
2230 }
2231}
2232
2233// Check whether speculation should be attempted.
2234static __inline int
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002235__kmp_should_speculate( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002236{
2237 kmp_uint32 badness = lck->lk.adaptive.badness;
2238 kmp_uint32 attempts= lck->lk.adaptive.acquire_attempts;
2239 int res = (attempts & badness) == 0;
2240 return res;
2241}
2242
2243// Attempt to acquire only the speculative lock.
2244// Does not back off to the non-speculative lock.
2245//
2246static int
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002247__kmp_test_adaptive_lock_only( kmp_adaptive_lock_t * lck, kmp_int32 gtid )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002248{
2249 int retries = lck->lk.adaptive.max_soft_retries;
2250
2251 // We don't explicitly count the start of speculation, rather we record
2252 // the results (success, hard fail, soft fail). The sum of all of those
2253 // is the total number of times we started speculation since all
2254 // speculations must end one of those ways.
2255 do
2256 {
2257 kmp_uint32 status = _xbegin();
2258 // Switch this in to disable actual speculation but exercise
2259 // at least some of the rest of the code. Useful for debugging...
2260 // kmp_uint32 status = _XABORT_NESTED;
2261
2262 if (status == _XBEGIN_STARTED )
2263 { /* We have successfully started speculation
2264 * Check that no-one acquired the lock for real between when we last looked
2265 * and now. This also gets the lock cache line into our read-set,
2266 * which we need so that we'll abort if anyone later claims it for real.
2267 */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002268 if (! __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002269 {
2270 // Lock is now visibly acquired, so someone beat us to it.
2271 // Abort the transaction so we'll restart from _xbegin with the
2272 // failure status.
Jonathan Peytona03533d2015-12-11 21:49:08 +00002273 _xabort(0x01);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002274 KMP_ASSERT2( 0, "should not get here" );
2275 }
2276 return 1; // Lock has been acquired (speculatively)
2277 } else {
2278 // We have aborted, update the statistics
2279 if ( status & SOFT_ABORT_MASK)
2280 {
2281 KMP_INC_STAT(lck,softFailedSpeculations);
2282 // and loop round to retry.
2283 }
2284 else
2285 {
2286 KMP_INC_STAT(lck,hardFailedSpeculations);
2287 // Give up if we had a hard failure.
2288 break;
2289 }
2290 }
2291 } while( retries-- ); // Loop while we have retries, and didn't fail hard.
2292
2293 // Either we had a hard failure or we didn't succeed softly after
2294 // the full set of attempts, so back off the badness.
2295 __kmp_step_badness( lck );
2296 return 0;
2297}
2298
2299// Attempt to acquire the speculative lock, or back off to the non-speculative one
2300// if the speculative lock cannot be acquired.
2301// We can succeed speculatively, non-speculatively, or fail.
2302static int
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002303__kmp_test_adaptive_lock( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002304{
2305 // First try to acquire the lock speculatively
2306 if ( __kmp_should_speculate( lck, gtid ) && __kmp_test_adaptive_lock_only( lck, gtid ) )
2307 return 1;
2308
2309 // Speculative acquisition failed, so try to acquire it non-speculatively.
2310 // Count the non-speculative acquire attempt
2311 lck->lk.adaptive.acquire_attempts++;
2312
2313 // Use base, non-speculative lock.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002314 if ( __kmp_test_queuing_lock( GET_QLK_PTR(lck), gtid ) )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002315 {
2316 KMP_INC_STAT(lck,nonSpeculativeAcquires);
2317 return 1; // Lock is acquired (non-speculatively)
2318 }
2319 else
2320 {
2321 return 0; // Failed to acquire the lock, it's already visibly locked.
2322 }
2323}
2324
2325static int
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002326__kmp_test_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002327{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002328 char const * const func = "omp_test_lock";
2329 if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) {
2330 KMP_FATAL( LockIsUninitialized, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002331 }
2332
2333 int retval = __kmp_test_adaptive_lock( lck, gtid );
2334
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002335 if ( retval ) {
2336 lck->lk.qlk.owner_id = gtid + 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002337 }
2338 return retval;
2339}
2340
2341// Block until we can acquire a speculative, adaptive lock.
2342// We check whether we should be trying to speculate.
2343// If we should be, we check the real lock to see if it is free,
2344// and, if not, pause without attempting to acquire it until it is.
2345// Then we try the speculative acquire.
2346// This means that although we suffer from lemmings a little (
2347// because all we can't acquire the lock speculatively until
2348// the queue of threads waiting has cleared), we don't get into a
2349// state where we can never acquire the lock speculatively (because we
2350// force the queue to clear by preventing new arrivals from entering the
2351// queue).
2352// This does mean that when we're trying to break lemmings, the lock
2353// is no longer fair. However OpenMP makes no guarantee that its
2354// locks are fair, so this isn't a real problem.
2355static void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002356__kmp_acquire_adaptive_lock( kmp_adaptive_lock_t * lck, kmp_int32 gtid )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002357{
2358 if ( __kmp_should_speculate( lck, gtid ) )
2359 {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002360 if ( __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002361 {
2362 if ( __kmp_test_adaptive_lock_only( lck , gtid ) )
2363 return;
2364 // We tried speculation and failed, so give up.
2365 }
2366 else
2367 {
2368 // We can't try speculation until the lock is free, so we
2369 // pause here (without suspending on the queueing lock,
2370 // to allow it to drain, then try again.
2371 // All other threads will also see the same result for
2372 // shouldSpeculate, so will be doing the same if they
2373 // try to claim the lock from now on.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002374 while ( ! __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002375 {
2376 KMP_INC_STAT(lck,lemmingYields);
2377 __kmp_yield (TRUE);
2378 }
2379
2380 if ( __kmp_test_adaptive_lock_only( lck, gtid ) )
2381 return;
2382 }
2383 }
2384
2385 // Speculative acquisition failed, so acquire it non-speculatively.
2386 // Count the non-speculative acquire attempt
2387 lck->lk.adaptive.acquire_attempts++;
2388
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002389 __kmp_acquire_queuing_lock_timed_template<FALSE>( GET_QLK_PTR(lck), gtid );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002390 // We have acquired the base lock, so count that.
2391 KMP_INC_STAT(lck,nonSpeculativeAcquires );
Jonas Hahnfeld50fed042016-11-07 15:58:36 +00002392 ANNOTATE_QUEUING_ACQUIRED(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002393}
2394
2395static void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002396__kmp_acquire_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002397{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002398 char const * const func = "omp_set_lock";
2399 if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) {
2400 KMP_FATAL( LockIsUninitialized, func );
2401 }
2402 if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) == gtid ) {
2403 KMP_FATAL( LockIsAlreadyOwned, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002404 }
2405
2406 __kmp_acquire_adaptive_lock( lck, gtid );
2407
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002408 lck->lk.qlk.owner_id = gtid + 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002409}
2410
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00002411static int
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002412__kmp_release_adaptive_lock( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002413{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002414 if ( __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002415 { // If the lock doesn't look claimed we must be speculating.
2416 // (Or the user's code is buggy and they're releasing without locking;
2417 // if we had XTEST we'd be able to check that case...)
2418 _xend(); // Exit speculation
2419 __kmp_update_badness_after_success( lck );
2420 }
2421 else
2422 { // Since the lock *is* visibly locked we're not speculating,
2423 // so should use the underlying lock's release scheme.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002424 __kmp_release_queuing_lock( GET_QLK_PTR(lck), gtid );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002425 }
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00002426 return KMP_LOCK_RELEASED;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002427}
2428
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00002429static int
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002430__kmp_release_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002431{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002432 char const * const func = "omp_unset_lock";
2433 KMP_MB(); /* in case another processor initialized lock */
2434 if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) {
2435 KMP_FATAL( LockIsUninitialized, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002436 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002437 if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) == -1 ) {
2438 KMP_FATAL( LockUnsettingFree, func );
2439 }
2440 if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) != gtid ) {
2441 KMP_FATAL( LockUnsettingSetByAnother, func );
2442 }
2443 lck->lk.qlk.owner_id = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002444 __kmp_release_adaptive_lock( lck, gtid );
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00002445 return KMP_LOCK_RELEASED;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002446}
2447
2448static void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002449__kmp_init_adaptive_lock( kmp_adaptive_lock_t *lck )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002450{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002451 __kmp_init_queuing_lock( GET_QLK_PTR(lck) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002452 lck->lk.adaptive.badness = 0;
2453 lck->lk.adaptive.acquire_attempts = 0; //nonSpeculativeAcquireAttempts = 0;
2454 lck->lk.adaptive.max_soft_retries = __kmp_adaptive_backoff_params.max_soft_retries;
2455 lck->lk.adaptive.max_badness = __kmp_adaptive_backoff_params.max_badness;
2456#if KMP_DEBUG_ADAPTIVE_LOCKS
2457 __kmp_zero_speculative_stats( &lck->lk.adaptive );
2458#endif
2459 KA_TRACE(1000, ("__kmp_init_adaptive_lock: lock %p initialized\n", lck));
2460}
2461
2462static void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002463__kmp_init_adaptive_lock_with_checks( kmp_adaptive_lock_t * lck )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002464{
2465 __kmp_init_adaptive_lock( lck );
2466}
2467
2468static void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002469__kmp_destroy_adaptive_lock( kmp_adaptive_lock_t *lck )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002470{
2471#if KMP_DEBUG_ADAPTIVE_LOCKS
2472 __kmp_accumulate_speculative_stats( &lck->lk.adaptive );
2473#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002474 __kmp_destroy_queuing_lock (GET_QLK_PTR(lck));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002475 // Nothing needed for the speculative part.
2476}
2477
2478static void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002479__kmp_destroy_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002480{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002481 char const * const func = "omp_destroy_lock";
2482 if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) {
2483 KMP_FATAL( LockIsUninitialized, func );
2484 }
2485 if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) != -1 ) {
2486 KMP_FATAL( LockStillOwned, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002487 }
2488 __kmp_destroy_adaptive_lock( lck );
2489}
2490
2491
2492#endif // KMP_USE_ADAPTIVE_LOCKS
2493
2494
2495/* ------------------------------------------------------------------------ */
2496/* DRDPA ticket locks */
2497/* "DRDPA" means Dynamically Reconfigurable Distributed Polling Area */
2498
2499static kmp_int32
2500__kmp_get_drdpa_lock_owner( kmp_drdpa_lock_t *lck )
2501{
2502 return TCR_4( lck->lk.owner_id ) - 1;
2503}
2504
2505static inline bool
2506__kmp_is_drdpa_lock_nestable( kmp_drdpa_lock_t *lck )
2507{
2508 return lck->lk.depth_locked != -1;
2509}
2510
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002511__forceinline static int
Jim Cownie5e8470a2013-09-27 10:38:44 +00002512__kmp_acquire_drdpa_lock_timed_template( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2513{
2514 kmp_uint64 ticket = KMP_TEST_THEN_INC64((kmp_int64 *)&lck->lk.next_ticket);
2515 kmp_uint64 mask = TCR_8(lck->lk.mask); // volatile load
2516 volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls
2517 = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2518 TCR_PTR(lck->lk.polls); // volatile load
2519
2520#ifdef USE_LOCK_PROFILE
2521 if (TCR_8(polls[ticket & mask].poll) != ticket)
2522 __kmp_printf("LOCK CONTENTION: %p\n", lck);
2523 /* else __kmp_printf( "." );*/
2524#endif /* USE_LOCK_PROFILE */
2525
2526 //
2527 // Now spin-wait, but reload the polls pointer and mask, in case the
2528 // polling area has been reconfigured. Unless it is reconfigured, the
2529 // reloads stay in L1 cache and are cheap.
2530 //
Jonathan Peytonde4749b2016-12-14 23:01:24 +00002531 // Keep this code in sync with KMP_WAIT_YIELD, in kmp_dispatch.cpp !!!
Jim Cownie5e8470a2013-09-27 10:38:44 +00002532 //
2533 // The current implementation of KMP_WAIT_YIELD doesn't allow for mask
2534 // and poll to be re-read every spin iteration.
2535 //
2536 kmp_uint32 spins;
2537
2538 KMP_FSYNC_PREPARE(lck);
2539 KMP_INIT_YIELD(spins);
Paul Osmialowski52bef532016-05-07 00:00:00 +00002540 while (TCR_8(polls[ticket & mask].poll) < ticket) { // volatile load
Jim Cownie5e8470a2013-09-27 10:38:44 +00002541 // If we are oversubscribed,
Alp Toker8f2d3f02014-02-24 10:40:15 +00002542 // or have waited a bit (and KMP_LIBRARY=turnaround), then yield.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002543 // CPU Pause is in the macros for yield.
2544 //
2545 KMP_YIELD(TCR_4(__kmp_nth)
2546 > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc));
2547 KMP_YIELD_SPIN(spins);
2548
2549 // Re-read the mask and the poll pointer from the lock structure.
2550 //
2551 // Make certain that "mask" is read before "polls" !!!
2552 //
2553 // If another thread picks reconfigures the polling area and updates
2554 // their values, and we get the new value of mask and the old polls
2555 // pointer, we could access memory beyond the end of the old polling
2556 // area.
2557 //
2558 mask = TCR_8(lck->lk.mask); // volatile load
2559 polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2560 TCR_PTR(lck->lk.polls); // volatile load
2561 }
2562
2563 //
2564 // Critical section starts here
2565 //
2566 KMP_FSYNC_ACQUIRED(lck);
2567 KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld acquired lock %p\n",
2568 ticket, lck));
2569 lck->lk.now_serving = ticket; // non-volatile store
2570
2571 //
2572 // Deallocate a garbage polling area if we know that we are the last
2573 // thread that could possibly access it.
2574 //
2575 // The >= check is in case __kmp_test_drdpa_lock() allocated the cleanup
2576 // ticket.
2577 //
2578 if ((lck->lk.old_polls != NULL) && (ticket >= lck->lk.cleanup_ticket)) {
2579 __kmp_free((void *)lck->lk.old_polls);
2580 lck->lk.old_polls = NULL;
2581 lck->lk.cleanup_ticket = 0;
2582 }
2583
2584 //
2585 // Check to see if we should reconfigure the polling area.
2586 // If there is still a garbage polling area to be deallocated from a
2587 // previous reconfiguration, let a later thread reconfigure it.
2588 //
2589 if (lck->lk.old_polls == NULL) {
2590 bool reconfigure = false;
2591 volatile struct kmp_base_drdpa_lock::kmp_lock_poll *old_polls = polls;
2592 kmp_uint32 num_polls = TCR_4(lck->lk.num_polls);
2593
2594 if (TCR_4(__kmp_nth)
2595 > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) {
2596 //
2597 // We are in oversubscription mode. Contract the polling area
2598 // down to a single location, if that hasn't been done already.
2599 //
2600 if (num_polls > 1) {
2601 reconfigure = true;
2602 num_polls = TCR_4(lck->lk.num_polls);
2603 mask = 0;
2604 num_polls = 1;
2605 polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2606 __kmp_allocate(num_polls * sizeof(*polls));
2607 polls[0].poll = ticket;
2608 }
2609 }
2610 else {
2611 //
2612 // We are in under/fully subscribed mode. Check the number of
2613 // threads waiting on the lock. The size of the polling area
2614 // should be at least the number of threads waiting.
2615 //
2616 kmp_uint64 num_waiting = TCR_8(lck->lk.next_ticket) - ticket - 1;
2617 if (num_waiting > num_polls) {
2618 kmp_uint32 old_num_polls = num_polls;
2619 reconfigure = true;
2620 do {
2621 mask = (mask << 1) | 1;
2622 num_polls *= 2;
2623 } while (num_polls <= num_waiting);
2624
2625 //
2626 // Allocate the new polling area, and copy the relevant portion
2627 // of the old polling area to the new area. __kmp_allocate()
2628 // zeroes the memory it allocates, and most of the old area is
2629 // just zero padding, so we only copy the release counters.
2630 //
2631 polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2632 __kmp_allocate(num_polls * sizeof(*polls));
2633 kmp_uint32 i;
2634 for (i = 0; i < old_num_polls; i++) {
2635 polls[i].poll = old_polls[i].poll;
2636 }
2637 }
2638 }
2639
2640 if (reconfigure) {
2641 //
2642 // Now write the updated fields back to the lock structure.
2643 //
2644 // Make certain that "polls" is written before "mask" !!!
2645 //
2646 // If another thread picks up the new value of mask and the old
2647 // polls pointer , it could access memory beyond the end of the
2648 // old polling area.
2649 //
2650 // On x86, we need memory fences.
2651 //
2652 KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld reconfiguring lock %p to %d polls\n",
2653 ticket, lck, num_polls));
2654
2655 lck->lk.old_polls = old_polls; // non-volatile store
2656 lck->lk.polls = polls; // volatile store
2657
2658 KMP_MB();
2659
2660 lck->lk.num_polls = num_polls; // non-volatile store
2661 lck->lk.mask = mask; // volatile store
2662
2663 KMP_MB();
2664
2665 //
2666 // Only after the new polling area and mask have been flushed
2667 // to main memory can we update the cleanup ticket field.
2668 //
2669 // volatile load / non-volatile store
2670 //
2671 lck->lk.cleanup_ticket = TCR_8(lck->lk.next_ticket);
2672 }
2673 }
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002674 return KMP_LOCK_ACQUIRED_FIRST;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002675}
2676
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002677int
Jim Cownie5e8470a2013-09-27 10:38:44 +00002678__kmp_acquire_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2679{
Jonas Hahnfeld50fed042016-11-07 15:58:36 +00002680 int retval = __kmp_acquire_drdpa_lock_timed_template( lck, gtid );
2681 ANNOTATE_DRDPA_ACQUIRED(lck);
2682 return retval;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002683}
2684
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002685static int
Jim Cownie5e8470a2013-09-27 10:38:44 +00002686__kmp_acquire_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2687{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002688 char const * const func = "omp_set_lock";
2689 if ( lck->lk.initialized != lck ) {
2690 KMP_FATAL( LockIsUninitialized, func );
2691 }
2692 if ( __kmp_is_drdpa_lock_nestable( lck ) ) {
2693 KMP_FATAL( LockNestableUsedAsSimple, func );
2694 }
2695 if ( ( gtid >= 0 ) && ( __kmp_get_drdpa_lock_owner( lck ) == gtid ) ) {
2696 KMP_FATAL( LockIsAlreadyOwned, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002697 }
2698
2699 __kmp_acquire_drdpa_lock( lck, gtid );
2700
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002701 lck->lk.owner_id = gtid + 1;
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002702 return KMP_LOCK_ACQUIRED_FIRST;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002703}
2704
2705int
2706__kmp_test_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2707{
2708 //
2709 // First get a ticket, then read the polls pointer and the mask.
2710 // The polls pointer must be read before the mask!!! (See above)
2711 //
2712 kmp_uint64 ticket = TCR_8(lck->lk.next_ticket); // volatile load
2713 volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls
2714 = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2715 TCR_PTR(lck->lk.polls); // volatile load
2716 kmp_uint64 mask = TCR_8(lck->lk.mask); // volatile load
2717 if (TCR_8(polls[ticket & mask].poll) == ticket) {
2718 kmp_uint64 next_ticket = ticket + 1;
2719 if (KMP_COMPARE_AND_STORE_ACQ64((kmp_int64 *)&lck->lk.next_ticket,
2720 ticket, next_ticket)) {
2721 KMP_FSYNC_ACQUIRED(lck);
2722 KA_TRACE(1000, ("__kmp_test_drdpa_lock: ticket #%lld acquired lock %p\n",
2723 ticket, lck));
2724 lck->lk.now_serving = ticket; // non-volatile store
2725
2726 //
Alp Toker8f2d3f02014-02-24 10:40:15 +00002727 // Since no threads are waiting, there is no possibility that
Jim Cownie5e8470a2013-09-27 10:38:44 +00002728 // we would want to reconfigure the polling area. We might
2729 // have the cleanup ticket value (which says that it is now
2730 // safe to deallocate old_polls), but we'll let a later thread
2731 // which calls __kmp_acquire_lock do that - this routine
2732 // isn't supposed to block, and we would risk blocks if we
2733 // called __kmp_free() to do the deallocation.
2734 //
2735 return TRUE;
2736 }
2737 }
2738 return FALSE;
2739}
2740
2741static int
2742__kmp_test_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2743{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002744 char const * const func = "omp_test_lock";
2745 if ( lck->lk.initialized != lck ) {
2746 KMP_FATAL( LockIsUninitialized, func );
2747 }
2748 if ( __kmp_is_drdpa_lock_nestable( lck ) ) {
2749 KMP_FATAL( LockNestableUsedAsSimple, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002750 }
2751
2752 int retval = __kmp_test_drdpa_lock( lck, gtid );
2753
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002754 if ( retval ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002755 lck->lk.owner_id = gtid + 1;
2756 }
2757 return retval;
2758}
2759
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00002760int
Jim Cownie5e8470a2013-09-27 10:38:44 +00002761__kmp_release_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2762{
2763 //
2764 // Read the ticket value from the lock data struct, then the polls
2765 // pointer and the mask. The polls pointer must be read before the
2766 // mask!!! (See above)
2767 //
2768 kmp_uint64 ticket = lck->lk.now_serving + 1; // non-volatile load
2769 volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls
2770 = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2771 TCR_PTR(lck->lk.polls); // volatile load
2772 kmp_uint64 mask = TCR_8(lck->lk.mask); // volatile load
2773 KA_TRACE(1000, ("__kmp_release_drdpa_lock: ticket #%lld released lock %p\n",
2774 ticket - 1, lck));
2775 KMP_FSYNC_RELEASING(lck);
Jonas Hahnfeld50fed042016-11-07 15:58:36 +00002776 ANNOTATE_DRDPA_RELEASED(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002777 KMP_ST_REL64(&(polls[ticket & mask].poll), ticket); // volatile store
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00002778 return KMP_LOCK_RELEASED;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002779}
2780
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00002781static int
Jim Cownie5e8470a2013-09-27 10:38:44 +00002782__kmp_release_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2783{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002784 char const * const func = "omp_unset_lock";
2785 KMP_MB(); /* in case another processor initialized lock */
2786 if ( lck->lk.initialized != lck ) {
2787 KMP_FATAL( LockIsUninitialized, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002788 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002789 if ( __kmp_is_drdpa_lock_nestable( lck ) ) {
2790 KMP_FATAL( LockNestableUsedAsSimple, func );
2791 }
2792 if ( __kmp_get_drdpa_lock_owner( lck ) == -1 ) {
2793 KMP_FATAL( LockUnsettingFree, func );
2794 }
2795 if ( ( gtid >= 0 ) && ( __kmp_get_drdpa_lock_owner( lck ) >= 0 )
2796 && ( __kmp_get_drdpa_lock_owner( lck ) != gtid ) ) {
2797 KMP_FATAL( LockUnsettingSetByAnother, func );
2798 }
2799 lck->lk.owner_id = 0;
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00002800 return __kmp_release_drdpa_lock( lck, gtid );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002801}
2802
2803void
2804__kmp_init_drdpa_lock( kmp_drdpa_lock_t *lck )
2805{
2806 lck->lk.location = NULL;
2807 lck->lk.mask = 0;
2808 lck->lk.num_polls = 1;
2809 lck->lk.polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2810 __kmp_allocate(lck->lk.num_polls * sizeof(*(lck->lk.polls)));
2811 lck->lk.cleanup_ticket = 0;
2812 lck->lk.old_polls = NULL;
2813 lck->lk.next_ticket = 0;
2814 lck->lk.now_serving = 0;
2815 lck->lk.owner_id = 0; // no thread owns the lock.
2816 lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks.
2817 lck->lk.initialized = lck;
2818
2819 KA_TRACE(1000, ("__kmp_init_drdpa_lock: lock %p initialized\n", lck));
2820}
2821
2822static void
2823__kmp_init_drdpa_lock_with_checks( kmp_drdpa_lock_t * lck )
2824{
2825 __kmp_init_drdpa_lock( lck );
2826}
2827
2828void
2829__kmp_destroy_drdpa_lock( kmp_drdpa_lock_t *lck )
2830{
2831 lck->lk.initialized = NULL;
2832 lck->lk.location = NULL;
2833 if (lck->lk.polls != NULL) {
2834 __kmp_free((void *)lck->lk.polls);
2835 lck->lk.polls = NULL;
2836 }
2837 if (lck->lk.old_polls != NULL) {
2838 __kmp_free((void *)lck->lk.old_polls);
2839 lck->lk.old_polls = NULL;
2840 }
2841 lck->lk.mask = 0;
2842 lck->lk.num_polls = 0;
2843 lck->lk.cleanup_ticket = 0;
2844 lck->lk.next_ticket = 0;
2845 lck->lk.now_serving = 0;
2846 lck->lk.owner_id = 0;
2847 lck->lk.depth_locked = -1;
2848}
2849
2850static void
2851__kmp_destroy_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck )
2852{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002853 char const * const func = "omp_destroy_lock";
2854 if ( lck->lk.initialized != lck ) {
2855 KMP_FATAL( LockIsUninitialized, func );
2856 }
2857 if ( __kmp_is_drdpa_lock_nestable( lck ) ) {
2858 KMP_FATAL( LockNestableUsedAsSimple, func );
2859 }
2860 if ( __kmp_get_drdpa_lock_owner( lck ) != -1 ) {
2861 KMP_FATAL( LockStillOwned, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002862 }
2863 __kmp_destroy_drdpa_lock( lck );
2864}
2865
2866
2867//
2868// nested drdpa ticket locks
2869//
2870
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002871int
Jim Cownie5e8470a2013-09-27 10:38:44 +00002872__kmp_acquire_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2873{
2874 KMP_DEBUG_ASSERT( gtid >= 0 );
2875
2876 if ( __kmp_get_drdpa_lock_owner( lck ) == gtid ) {
2877 lck->lk.depth_locked += 1;
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002878 return KMP_LOCK_ACQUIRED_NEXT;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002879 }
2880 else {
2881 __kmp_acquire_drdpa_lock_timed_template( lck, gtid );
Jonas Hahnfeld50fed042016-11-07 15:58:36 +00002882 ANNOTATE_DRDPA_ACQUIRED(lck);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002883 KMP_MB();
2884 lck->lk.depth_locked = 1;
2885 KMP_MB();
2886 lck->lk.owner_id = gtid + 1;
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00002887 return KMP_LOCK_ACQUIRED_FIRST;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002888 }
2889}
2890
2891static void
2892__kmp_acquire_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2893{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002894 char const * const func = "omp_set_nest_lock";
2895 if ( lck->lk.initialized != lck ) {
2896 KMP_FATAL( LockIsUninitialized, func );
2897 }
2898 if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) {
2899 KMP_FATAL( LockSimpleUsedAsNestable, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002900 }
2901 __kmp_acquire_nested_drdpa_lock( lck, gtid );
2902}
2903
2904int
2905__kmp_test_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2906{
2907 int retval;
2908
2909 KMP_DEBUG_ASSERT( gtid >= 0 );
2910
2911 if ( __kmp_get_drdpa_lock_owner( lck ) == gtid ) {
2912 retval = ++lck->lk.depth_locked;
2913 }
2914 else if ( !__kmp_test_drdpa_lock( lck, gtid ) ) {
2915 retval = 0;
2916 }
2917 else {
2918 KMP_MB();
2919 retval = lck->lk.depth_locked = 1;
2920 KMP_MB();
2921 lck->lk.owner_id = gtid + 1;
2922 }
2923 return retval;
2924}
2925
2926static int
2927__kmp_test_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2928{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002929 char const * const func = "omp_test_nest_lock";
2930 if ( lck->lk.initialized != lck ) {
2931 KMP_FATAL( LockIsUninitialized, func );
2932 }
2933 if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) {
2934 KMP_FATAL( LockSimpleUsedAsNestable, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002935 }
2936 return __kmp_test_nested_drdpa_lock( lck, gtid );
2937}
2938
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00002939int
Jim Cownie5e8470a2013-09-27 10:38:44 +00002940__kmp_release_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2941{
2942 KMP_DEBUG_ASSERT( gtid >= 0 );
2943
2944 KMP_MB();
2945 if ( --(lck->lk.depth_locked) == 0 ) {
2946 KMP_MB();
2947 lck->lk.owner_id = 0;
2948 __kmp_release_drdpa_lock( lck, gtid );
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00002949 return KMP_LOCK_RELEASED;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002950 }
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00002951 return KMP_LOCK_STILL_HELD;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002952}
2953
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00002954static int
Jim Cownie5e8470a2013-09-27 10:38:44 +00002955__kmp_release_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2956{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002957 char const * const func = "omp_unset_nest_lock";
2958 KMP_MB(); /* in case another processor initialized lock */
2959 if ( lck->lk.initialized != lck ) {
2960 KMP_FATAL( LockIsUninitialized, func );
2961 }
2962 if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) {
2963 KMP_FATAL( LockSimpleUsedAsNestable, func );
2964 }
2965 if ( __kmp_get_drdpa_lock_owner( lck ) == -1 ) {
2966 KMP_FATAL( LockUnsettingFree, func );
2967 }
2968 if ( __kmp_get_drdpa_lock_owner( lck ) != gtid ) {
2969 KMP_FATAL( LockUnsettingSetByAnother, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002970 }
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00002971 return __kmp_release_nested_drdpa_lock( lck, gtid );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002972}
2973
2974void
2975__kmp_init_nested_drdpa_lock( kmp_drdpa_lock_t * lck )
2976{
2977 __kmp_init_drdpa_lock( lck );
2978 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
2979}
2980
2981static void
2982__kmp_init_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t * lck )
2983{
2984 __kmp_init_nested_drdpa_lock( lck );
2985}
2986
2987void
2988__kmp_destroy_nested_drdpa_lock( kmp_drdpa_lock_t *lck )
2989{
2990 __kmp_destroy_drdpa_lock( lck );
2991 lck->lk.depth_locked = 0;
2992}
2993
2994static void
2995__kmp_destroy_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck )
2996{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002997 char const * const func = "omp_destroy_nest_lock";
2998 if ( lck->lk.initialized != lck ) {
2999 KMP_FATAL( LockIsUninitialized, func );
3000 }
3001 if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) {
3002 KMP_FATAL( LockSimpleUsedAsNestable, func );
3003 }
3004 if ( __kmp_get_drdpa_lock_owner( lck ) != -1 ) {
3005 KMP_FATAL( LockStillOwned, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00003006 }
3007 __kmp_destroy_nested_drdpa_lock( lck );
3008}
3009
3010
3011//
3012// access functions to fields which don't exist for all lock kinds.
3013//
3014
3015static int
3016__kmp_is_drdpa_lock_initialized( kmp_drdpa_lock_t *lck )
3017{
3018 return lck == lck->lk.initialized;
3019}
3020
3021static const ident_t *
3022__kmp_get_drdpa_lock_location( kmp_drdpa_lock_t *lck )
3023{
3024 return lck->lk.location;
3025}
3026
3027static void
3028__kmp_set_drdpa_lock_location( kmp_drdpa_lock_t *lck, const ident_t *loc )
3029{
3030 lck->lk.location = loc;
3031}
3032
3033static kmp_lock_flags_t
3034__kmp_get_drdpa_lock_flags( kmp_drdpa_lock_t *lck )
3035{
3036 return lck->lk.flags;
3037}
3038
3039static void
3040__kmp_set_drdpa_lock_flags( kmp_drdpa_lock_t *lck, kmp_lock_flags_t flags )
3041{
3042 lck->lk.flags = flags;
3043}
3044
Jonathan Peyton377aa402016-04-14 16:00:37 +00003045// Time stamp counter
3046#if KMP_ARCH_X86 || KMP_ARCH_X86_64
3047# define __kmp_tsc() __kmp_hardware_timestamp()
3048// Runtime's default backoff parameters
3049kmp_backoff_t __kmp_spin_backoff_params = { 1, 4096, 100 };
3050#else
3051// Use nanoseconds for other platforms
3052extern kmp_uint64 __kmp_now_nsec();
3053kmp_backoff_t __kmp_spin_backoff_params = { 1, 256, 100 };
3054# define __kmp_tsc() __kmp_now_nsec()
3055#endif
3056
3057// A useful predicate for dealing with timestamps that may wrap.
3058// Is a before b?
3059// Since the timestamps may wrap, this is asking whether it's
3060// shorter to go clockwise from a to b around the clock-face, or anti-clockwise.
3061// Times where going clockwise is less distance than going anti-clockwise
3062// are in the future, others are in the past.
3063// e.g.) a = MAX-1, b = MAX+1 (=0), then a > b (true) does not mean a reached b
3064// whereas signed(a) = -2, signed(b) = 0 captures the actual difference
3065static inline bool before(kmp_uint64 a, kmp_uint64 b)
3066{
3067 return ((kmp_int64)b - (kmp_int64)a) > 0;
3068}
3069
3070// Truncated binary exponential backoff function
3071void
3072__kmp_spin_backoff(kmp_backoff_t *boff)
3073{
3074 // We could flatten this loop, but making it a nested loop gives better result.
3075 kmp_uint32 i;
3076 for (i = boff->step; i > 0; i--) {
3077 kmp_uint64 goal = __kmp_tsc() + boff->min_tick;
3078 do {
3079 KMP_CPU_PAUSE();
3080 } while (before(__kmp_tsc(), goal));
3081 }
3082 boff->step = (boff->step<<1 | 1) & (boff->max_backoff-1);
3083}
3084
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003085#if KMP_USE_DYNAMIC_LOCK
3086
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003087// Direct lock initializers. It simply writes a tag to the low 8 bits of the lock word.
Jonathan Peytona03533d2015-12-11 21:49:08 +00003088static void __kmp_init_direct_lock(kmp_dyna_lock_t *lck, kmp_dyna_lockseq_t seq)
3089{
3090 TCW_4(*lck, KMP_GET_D_TAG(seq));
3091 KA_TRACE(20, ("__kmp_init_direct_lock: initialized direct lock with type#%d\n", seq));
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003092}
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003093
Jonathan Peytondae13d82015-12-11 21:57:06 +00003094#if KMP_USE_TSX
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003095
3096// HLE lock functions - imported from the testbed runtime.
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003097#define HLE_ACQUIRE ".byte 0xf2;"
3098#define HLE_RELEASE ".byte 0xf3;"
3099
3100static inline kmp_uint32
3101swap4(kmp_uint32 volatile *p, kmp_uint32 v)
3102{
3103 __asm__ volatile(HLE_ACQUIRE "xchg %1,%0"
3104 : "+r"(v), "+m"(*p)
3105 :
3106 : "memory");
3107 return v;
3108}
3109
3110static void
3111__kmp_destroy_hle_lock(kmp_dyna_lock_t *lck)
3112{
Jonathan Peytona03533d2015-12-11 21:49:08 +00003113 TCW_4(*lck, 0);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003114}
3115
3116static void
3117__kmp_acquire_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid)
3118{
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00003119 // Use gtid for KMP_LOCK_BUSY if necessary
3120 if (swap4(lck, KMP_LOCK_BUSY(1, hle)) != KMP_LOCK_FREE(hle)) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003121 int delay = 1;
3122 do {
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00003123 while (*(kmp_uint32 volatile *)lck != KMP_LOCK_FREE(hle)) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003124 for (int i = delay; i != 0; --i)
Jonathan Peytona03533d2015-12-11 21:49:08 +00003125 KMP_CPU_PAUSE();
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003126 delay = ((delay << 1) | 1) & 7;
3127 }
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00003128 } while (swap4(lck, KMP_LOCK_BUSY(1, hle)) != KMP_LOCK_FREE(hle));
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003129 }
3130}
3131
3132static void
3133__kmp_acquire_hle_lock_with_checks(kmp_dyna_lock_t *lck, kmp_int32 gtid)
3134{
3135 __kmp_acquire_hle_lock(lck, gtid); // TODO: add checks
3136}
3137
Jonathan Peytona03533d2015-12-11 21:49:08 +00003138static int
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003139__kmp_release_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid)
3140{
3141 __asm__ volatile(HLE_RELEASE "movl %1,%0"
3142 : "=m"(*lck)
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00003143 : "r"(KMP_LOCK_FREE(hle))
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003144 : "memory");
Jonathan Peytona03533d2015-12-11 21:49:08 +00003145 return KMP_LOCK_RELEASED;
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003146}
3147
Jonathan Peytona03533d2015-12-11 21:49:08 +00003148static int
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003149__kmp_release_hle_lock_with_checks(kmp_dyna_lock_t *lck, kmp_int32 gtid)
3150{
Jonathan Peytona03533d2015-12-11 21:49:08 +00003151 return __kmp_release_hle_lock(lck, gtid); // TODO: add checks
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003152}
3153
3154static int
3155__kmp_test_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid)
3156{
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00003157 return swap4(lck, KMP_LOCK_BUSY(1, hle)) == KMP_LOCK_FREE(hle);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003158}
3159
3160static int
3161__kmp_test_hle_lock_with_checks(kmp_dyna_lock_t *lck, kmp_int32 gtid)
3162{
3163 return __kmp_test_hle_lock(lck, gtid); // TODO: add checks
3164}
3165
Jonathan Peytondae13d82015-12-11 21:57:06 +00003166static void
3167__kmp_init_rtm_lock(kmp_queuing_lock_t *lck)
3168{
3169 __kmp_init_queuing_lock(lck);
3170}
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003171
Jonathan Peytondae13d82015-12-11 21:57:06 +00003172static void
3173__kmp_destroy_rtm_lock(kmp_queuing_lock_t *lck)
3174{
3175 __kmp_destroy_queuing_lock(lck);
3176}
3177
3178static void
3179__kmp_acquire_rtm_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid)
3180{
3181 unsigned retries=3, status;
3182 do {
3183 status = _xbegin();
3184 if (status == _XBEGIN_STARTED) {
3185 if (__kmp_is_unlocked_queuing_lock(lck))
3186 return;
3187 _xabort(0xff);
3188 }
3189 if ((status & _XABORT_EXPLICIT) && _XABORT_CODE(status) == 0xff) {
3190 // Wait until lock becomes free
3191 while (! __kmp_is_unlocked_queuing_lock(lck))
3192 __kmp_yield(TRUE);
3193 }
3194 else if (!(status & _XABORT_RETRY))
3195 break;
3196 } while (retries--);
3197
3198 // Fall-back non-speculative lock (xchg)
3199 __kmp_acquire_queuing_lock(lck, gtid);
3200}
3201
3202static void
3203__kmp_acquire_rtm_lock_with_checks(kmp_queuing_lock_t *lck, kmp_int32 gtid)
3204{
3205 __kmp_acquire_rtm_lock(lck, gtid);
3206}
3207
3208static int
3209__kmp_release_rtm_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid)
3210{
3211 if (__kmp_is_unlocked_queuing_lock(lck)) {
3212 // Releasing from speculation
3213 _xend();
3214 }
3215 else {
3216 // Releasing from a real lock
3217 __kmp_release_queuing_lock(lck, gtid);
3218 }
3219 return KMP_LOCK_RELEASED;
3220}
3221
3222static int
3223__kmp_release_rtm_lock_with_checks(kmp_queuing_lock_t *lck, kmp_int32 gtid)
3224{
3225 return __kmp_release_rtm_lock(lck, gtid);
3226}
3227
3228static int
3229__kmp_test_rtm_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid)
3230{
3231 unsigned retries=3, status;
3232 do {
3233 status = _xbegin();
3234 if (status == _XBEGIN_STARTED && __kmp_is_unlocked_queuing_lock(lck)) {
3235 return 1;
3236 }
3237 if (!(status & _XABORT_RETRY))
3238 break;
3239 } while (retries--);
3240
3241 return (__kmp_is_unlocked_queuing_lock(lck))? 1: 0;
3242}
3243
3244static int
3245__kmp_test_rtm_lock_with_checks(kmp_queuing_lock_t *lck, kmp_int32 gtid)
3246{
3247 return __kmp_test_rtm_lock(lck, gtid);
3248}
3249
3250#endif // KMP_USE_TSX
3251
3252// Entry functions for indirect locks (first element of direct lock jump tables).
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003253static void __kmp_init_indirect_lock(kmp_dyna_lock_t * l, kmp_dyna_lockseq_t tag);
3254static void __kmp_destroy_indirect_lock(kmp_dyna_lock_t * lock);
3255static void __kmp_set_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32);
Jonathan Peytona03533d2015-12-11 21:49:08 +00003256static int __kmp_unset_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003257static int __kmp_test_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32);
3258static void __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32);
Jonathan Peytona03533d2015-12-11 21:49:08 +00003259static int __kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003260static int __kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32);
3261
3262//
3263// Jump tables for the indirect lock functions.
3264// Only fill in the odd entries, that avoids the need to shift out the low bit.
3265//
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003266
Jonathan Peytona03533d2015-12-11 21:49:08 +00003267// init functions
3268#define expand(l, op) 0,__kmp_init_direct_lock,
3269void (*__kmp_direct_init[])(kmp_dyna_lock_t *, kmp_dyna_lockseq_t)
3270 = { __kmp_init_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, init) };
3271#undef expand
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003272
Jonathan Peytona03533d2015-12-11 21:49:08 +00003273// destroy functions
3274#define expand(l, op) 0,(void (*)(kmp_dyna_lock_t *))__kmp_##op##_##l##_lock,
3275void (*__kmp_direct_destroy[])(kmp_dyna_lock_t *)
3276 = { __kmp_destroy_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, destroy) };
3277#undef expand
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003278
Jonathan Peytona03533d2015-12-11 21:49:08 +00003279// set/acquire functions
3280#define expand(l, op) 0,(void (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock,
3281static void (*direct_set[])(kmp_dyna_lock_t *, kmp_int32)
3282 = { __kmp_set_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, acquire) };
3283#undef expand
3284#define expand(l, op) 0,(void (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock_with_checks,
3285static void (*direct_set_check[])(kmp_dyna_lock_t *, kmp_int32)
3286 = { __kmp_set_indirect_lock_with_checks, 0, KMP_FOREACH_D_LOCK(expand, acquire) };
3287#undef expand
3288
3289// unset/release and test functions
3290#define expand(l, op) 0,(int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock,
3291static int (*direct_unset[])(kmp_dyna_lock_t *, kmp_int32)
3292 = { __kmp_unset_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, release) };
3293static int (*direct_test[])(kmp_dyna_lock_t *, kmp_int32)
3294 = { __kmp_test_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, test) };
3295#undef expand
3296#define expand(l, op) 0,(int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock_with_checks,
3297static int (*direct_unset_check[])(kmp_dyna_lock_t *, kmp_int32)
3298 = { __kmp_unset_indirect_lock_with_checks, 0, KMP_FOREACH_D_LOCK(expand, release) };
3299static int (*direct_test_check[])(kmp_dyna_lock_t *, kmp_int32)
3300 = { __kmp_test_indirect_lock_with_checks, 0, KMP_FOREACH_D_LOCK(expand, test) };
3301#undef expand
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003302
3303// Exposes only one set of jump tables (*lock or *lock_with_checks).
Jonathan Peytona03533d2015-12-11 21:49:08 +00003304void (*(*__kmp_direct_set))(kmp_dyna_lock_t *, kmp_int32) = 0;
3305int (*(*__kmp_direct_unset))(kmp_dyna_lock_t *, kmp_int32) = 0;
3306int (*(*__kmp_direct_test))(kmp_dyna_lock_t *, kmp_int32) = 0;
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003307
3308//
3309// Jump tables for the indirect lock functions.
3310//
Jonathan Peytona03533d2015-12-11 21:49:08 +00003311#define expand(l, op) (void (*)(kmp_user_lock_p))__kmp_##op##_##l##_##lock,
3312void (*__kmp_indirect_init[])(kmp_user_lock_p) = { KMP_FOREACH_I_LOCK(expand, init) };
3313void (*__kmp_indirect_destroy[])(kmp_user_lock_p) = { KMP_FOREACH_I_LOCK(expand, destroy) };
3314#undef expand
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003315
Jonathan Peytona03533d2015-12-11 21:49:08 +00003316// set/acquire functions
3317#define expand(l, op) (void (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock,
3318static void (*indirect_set[])(kmp_user_lock_p, kmp_int32) = { KMP_FOREACH_I_LOCK(expand, acquire) };
3319#undef expand
3320#define expand(l, op) (void (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock_with_checks,
3321static void (*indirect_set_check[])(kmp_user_lock_p, kmp_int32) = { KMP_FOREACH_I_LOCK(expand, acquire) };
3322#undef expand
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003323
Jonathan Peytona03533d2015-12-11 21:49:08 +00003324// unset/release and test functions
3325#define expand(l, op) (int (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock,
3326static int (*indirect_unset[])(kmp_user_lock_p, kmp_int32) = { KMP_FOREACH_I_LOCK(expand, release) };
3327static int (*indirect_test[])(kmp_user_lock_p, kmp_int32) = { KMP_FOREACH_I_LOCK(expand, test) };
3328#undef expand
3329#define expand(l, op) (int (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock_with_checks,
3330static int (*indirect_unset_check[])(kmp_user_lock_p, kmp_int32) = { KMP_FOREACH_I_LOCK(expand, release) };
3331static int (*indirect_test_check[])(kmp_user_lock_p, kmp_int32) = { KMP_FOREACH_I_LOCK(expand, test) };
3332#undef expand
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003333
Jonathan Peytona03533d2015-12-11 21:49:08 +00003334// Exposes only one jump tables (*lock or *lock_with_checks).
3335void (*(*__kmp_indirect_set))(kmp_user_lock_p, kmp_int32) = 0;
3336int (*(*__kmp_indirect_unset))(kmp_user_lock_p, kmp_int32) = 0;
3337int (*(*__kmp_indirect_test))(kmp_user_lock_p, kmp_int32) = 0;
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003338
3339// Lock index table.
Jonathan Peytondae13d82015-12-11 21:57:06 +00003340kmp_indirect_lock_table_t __kmp_i_lock_table;
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003341
3342// Size of indirect locks.
Jonathan Peytondae13d82015-12-11 21:57:06 +00003343static kmp_uint32 __kmp_indirect_lock_size[KMP_NUM_I_LOCKS] = { 0 };
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003344
3345// Jump tables for lock accessor/modifier.
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00003346void (*__kmp_indirect_set_location[KMP_NUM_I_LOCKS])(kmp_user_lock_p, const ident_t *) = { 0 };
3347void (*__kmp_indirect_set_flags[KMP_NUM_I_LOCKS])(kmp_user_lock_p, kmp_lock_flags_t) = { 0 };
3348const ident_t * (*__kmp_indirect_get_location[KMP_NUM_I_LOCKS])(kmp_user_lock_p) = { 0 };
3349kmp_lock_flags_t (*__kmp_indirect_get_flags[KMP_NUM_I_LOCKS])(kmp_user_lock_p) = { 0 };
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003350
3351// Use different lock pools for different lock types.
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00003352static kmp_indirect_lock_t * __kmp_indirect_lock_pool[KMP_NUM_I_LOCKS] = { 0 };
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003353
Jonathan Peytondae13d82015-12-11 21:57:06 +00003354// User lock allocator for dynamically dispatched indirect locks.
3355// Every entry of the indirect lock table holds the address and type of the allocated indrect lock
3356// (kmp_indirect_lock_t), and the size of the table doubles when it is full. A destroyed indirect lock
3357// object is returned to the reusable pool of locks, unique to each lock type.
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003358kmp_indirect_lock_t *
3359__kmp_allocate_indirect_lock(void **user_lock, kmp_int32 gtid, kmp_indirect_locktag_t tag)
3360{
3361 kmp_indirect_lock_t *lck;
3362 kmp_lock_index_t idx;
3363
3364 __kmp_acquire_lock(&__kmp_global_lock, gtid);
3365
3366 if (__kmp_indirect_lock_pool[tag] != NULL) {
Jonathan Peytondae13d82015-12-11 21:57:06 +00003367 // Reuse the allocated and destroyed lock object
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003368 lck = __kmp_indirect_lock_pool[tag];
3369 if (OMP_LOCK_T_SIZE < sizeof(void *))
3370 idx = lck->lock->pool.index;
3371 __kmp_indirect_lock_pool[tag] = (kmp_indirect_lock_t *)lck->lock->pool.next;
Jonathan Peytondae13d82015-12-11 21:57:06 +00003372 KA_TRACE(20, ("__kmp_allocate_indirect_lock: reusing an existing lock %p\n", lck));
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003373 } else {
Jonathan Peytondae13d82015-12-11 21:57:06 +00003374 idx = __kmp_i_lock_table.next;
3375 // Check capacity and double the size if it is full
3376 if (idx == __kmp_i_lock_table.size) {
3377 // Double up the space for block pointers
3378 int row = __kmp_i_lock_table.size/KMP_I_LOCK_CHUNK;
3379 kmp_indirect_lock_t **old_table = __kmp_i_lock_table.table;
3380 __kmp_i_lock_table.table = (kmp_indirect_lock_t **)__kmp_allocate(2*row*sizeof(kmp_indirect_lock_t *));
3381 KMP_MEMCPY(__kmp_i_lock_table.table, old_table, row*sizeof(kmp_indirect_lock_t *));
3382 __kmp_free(old_table);
3383 // Allocate new objects in the new blocks
3384 for (int i = row; i < 2*row; ++i)
3385 *(__kmp_i_lock_table.table + i) = (kmp_indirect_lock_t *)
3386 __kmp_allocate(KMP_I_LOCK_CHUNK*sizeof(kmp_indirect_lock_t));
3387 __kmp_i_lock_table.size = 2*idx;
3388 }
3389 __kmp_i_lock_table.next++;
3390 lck = KMP_GET_I_LOCK(idx);
3391 // Allocate a new base lock object
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003392 lck->lock = (kmp_user_lock_p)__kmp_allocate(__kmp_indirect_lock_size[tag]);
Jonathan Peytondae13d82015-12-11 21:57:06 +00003393 KA_TRACE(20, ("__kmp_allocate_indirect_lock: allocated a new lock %p\n", lck));
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003394 }
3395
3396 __kmp_release_lock(&__kmp_global_lock, gtid);
3397
3398 lck->type = tag;
3399
3400 if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3401 *((kmp_lock_index_t *)user_lock) = idx << 1; // indirect lock word must be even.
3402 } else {
3403 *((kmp_indirect_lock_t **)user_lock) = lck;
3404 }
3405
3406 return lck;
3407}
3408
3409// User lock lookup for dynamically dispatched locks.
3410static __forceinline
3411kmp_indirect_lock_t *
3412__kmp_lookup_indirect_lock(void **user_lock, const char *func)
3413{
3414 if (__kmp_env_consistency_check) {
3415 kmp_indirect_lock_t *lck = NULL;
3416 if (user_lock == NULL) {
3417 KMP_FATAL(LockIsUninitialized, func);
3418 }
3419 if (OMP_LOCK_T_SIZE < sizeof(void *)) {
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00003420 kmp_lock_index_t idx = KMP_EXTRACT_I_INDEX(user_lock);
Jonathan Peytonb87b5812015-12-11 22:04:05 +00003421 if (idx >= __kmp_i_lock_table.size) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003422 KMP_FATAL(LockIsUninitialized, func);
3423 }
Jonathan Peytondae13d82015-12-11 21:57:06 +00003424 lck = KMP_GET_I_LOCK(idx);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003425 } else {
3426 lck = *((kmp_indirect_lock_t **)user_lock);
3427 }
3428 if (lck == NULL) {
3429 KMP_FATAL(LockIsUninitialized, func);
3430 }
Jonathan Peyton61118492016-05-20 19:03:38 +00003431 return lck;
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003432 } else {
3433 if (OMP_LOCK_T_SIZE < sizeof(void *)) {
Jonathan Peytondae13d82015-12-11 21:57:06 +00003434 return KMP_GET_I_LOCK(KMP_EXTRACT_I_INDEX(user_lock));
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003435 } else {
3436 return *((kmp_indirect_lock_t **)user_lock);
3437 }
3438 }
3439}
3440
3441static void
3442__kmp_init_indirect_lock(kmp_dyna_lock_t * lock, kmp_dyna_lockseq_t seq)
3443{
3444#if KMP_USE_ADAPTIVE_LOCKS
3445 if (seq == lockseq_adaptive && !__kmp_cpuinfo.rtm) {
3446 KMP_WARNING(AdaptiveNotSupported, "kmp_lockseq_t", "adaptive");
3447 seq = lockseq_queuing;
3448 }
3449#endif
Jonathan Peytondae13d82015-12-11 21:57:06 +00003450#if KMP_USE_TSX
3451 if (seq == lockseq_rtm && !__kmp_cpuinfo.rtm) {
3452 seq = lockseq_queuing;
3453 }
3454#endif
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00003455 kmp_indirect_locktag_t tag = KMP_GET_I_TAG(seq);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003456 kmp_indirect_lock_t *l = __kmp_allocate_indirect_lock((void **)lock, __kmp_entry_gtid(), tag);
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00003457 KMP_I_LOCK_FUNC(l, init)(l->lock);
Jonathan Peytondae13d82015-12-11 21:57:06 +00003458 KA_TRACE(20, ("__kmp_init_indirect_lock: initialized indirect lock with type#%d\n", seq));
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003459}
3460
3461static void
3462__kmp_destroy_indirect_lock(kmp_dyna_lock_t * lock)
3463{
3464 kmp_uint32 gtid = __kmp_entry_gtid();
3465 kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_destroy_lock");
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00003466 KMP_I_LOCK_FUNC(l, destroy)(l->lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003467 kmp_indirect_locktag_t tag = l->type;
3468
3469 __kmp_acquire_lock(&__kmp_global_lock, gtid);
3470
3471 // Use the base lock's space to keep the pool chain.
3472 l->lock->pool.next = (kmp_user_lock_p)__kmp_indirect_lock_pool[tag];
3473 if (OMP_LOCK_T_SIZE < sizeof(void *)) {
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00003474 l->lock->pool.index = KMP_EXTRACT_I_INDEX(lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003475 }
3476 __kmp_indirect_lock_pool[tag] = l;
3477
3478 __kmp_release_lock(&__kmp_global_lock, gtid);
3479}
3480
3481static void
3482__kmp_set_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32 gtid)
3483{
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00003484 kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock);
3485 KMP_I_LOCK_FUNC(l, set)(l->lock, gtid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003486}
3487
Jonathan Peytona03533d2015-12-11 21:49:08 +00003488static int
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003489__kmp_unset_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32 gtid)
3490{
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00003491 kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock);
Jonathan Peytona03533d2015-12-11 21:49:08 +00003492 return KMP_I_LOCK_FUNC(l, unset)(l->lock, gtid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003493}
3494
3495static int
3496__kmp_test_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32 gtid)
3497{
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00003498 kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock);
3499 return KMP_I_LOCK_FUNC(l, test)(l->lock, gtid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003500}
3501
3502static void
3503__kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32 gtid)
3504{
3505 kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_set_lock");
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00003506 KMP_I_LOCK_FUNC(l, set)(l->lock, gtid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003507}
3508
Jonathan Peytona03533d2015-12-11 21:49:08 +00003509static int
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003510__kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32 gtid)
3511{
3512 kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_unset_lock");
Jonathan Peytona03533d2015-12-11 21:49:08 +00003513 return KMP_I_LOCK_FUNC(l, unset)(l->lock, gtid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003514}
3515
3516static int
3517__kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32 gtid)
3518{
3519 kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_test_lock");
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00003520 return KMP_I_LOCK_FUNC(l, test)(l->lock, gtid);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003521}
3522
3523kmp_dyna_lockseq_t __kmp_user_lock_seq = lockseq_queuing;
3524
Jonathan Peytonde4749b2016-12-14 23:01:24 +00003525// This is used only in kmp_error.cpp when consistency checking is on.
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003526kmp_int32
3527__kmp_get_user_lock_owner(kmp_user_lock_p lck, kmp_uint32 seq)
3528{
3529 switch (seq) {
3530 case lockseq_tas:
3531 case lockseq_nested_tas:
3532 return __kmp_get_tas_lock_owner((kmp_tas_lock_t *)lck);
Paul Osmialowskifb043fd2016-05-16 09:44:11 +00003533#if KMP_USE_FUTEX
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003534 case lockseq_futex:
3535 case lockseq_nested_futex:
3536 return __kmp_get_futex_lock_owner((kmp_futex_lock_t *)lck);
3537#endif
3538 case lockseq_ticket:
3539 case lockseq_nested_ticket:
3540 return __kmp_get_ticket_lock_owner((kmp_ticket_lock_t *)lck);
3541 case lockseq_queuing:
3542 case lockseq_nested_queuing:
3543#if KMP_USE_ADAPTIVE_LOCKS
3544 case lockseq_adaptive:
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003545#endif
Jonathan Peyton1d46d972016-03-30 21:50:59 +00003546 return __kmp_get_queuing_lock_owner((kmp_queuing_lock_t *)lck);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003547 case lockseq_drdpa:
3548 case lockseq_nested_drdpa:
3549 return __kmp_get_drdpa_lock_owner((kmp_drdpa_lock_t *)lck);
3550 default:
3551 return 0;
3552 }
3553}
3554
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003555// Initializes data for dynamic user locks.
3556void
3557__kmp_init_dynamic_user_locks()
3558{
Jonathan Peytona03533d2015-12-11 21:49:08 +00003559 // Initialize jump table for the lock functions
3560 if (__kmp_env_consistency_check) {
3561 __kmp_direct_set = direct_set_check;
3562 __kmp_direct_unset = direct_unset_check;
3563 __kmp_direct_test = direct_test_check;
3564 __kmp_indirect_set = indirect_set_check;
3565 __kmp_indirect_unset = indirect_unset_check;
3566 __kmp_indirect_test = indirect_test_check;
3567 }
3568 else {
3569 __kmp_direct_set = direct_set;
3570 __kmp_direct_unset = direct_unset;
3571 __kmp_direct_test = direct_test;
3572 __kmp_indirect_set = indirect_set;
3573 __kmp_indirect_unset = indirect_unset;
3574 __kmp_indirect_test = indirect_test;
3575 }
3576
3577 // Initialize lock index table
Jonathan Peytondae13d82015-12-11 21:57:06 +00003578 __kmp_i_lock_table.size = KMP_I_LOCK_CHUNK;
3579 __kmp_i_lock_table.table = (kmp_indirect_lock_t **)__kmp_allocate(sizeof(kmp_indirect_lock_t *));
3580 *(__kmp_i_lock_table.table) = (kmp_indirect_lock_t *)
Jonathan Peyton61118492016-05-20 19:03:38 +00003581 __kmp_allocate(KMP_I_LOCK_CHUNK*sizeof(kmp_indirect_lock_t));
Jonathan Peytondae13d82015-12-11 21:57:06 +00003582 __kmp_i_lock_table.next = 0;
3583
3584 // Indirect lock size
3585 __kmp_indirect_lock_size[locktag_ticket] = sizeof(kmp_ticket_lock_t);
3586 __kmp_indirect_lock_size[locktag_queuing] = sizeof(kmp_queuing_lock_t);
3587#if KMP_USE_ADAPTIVE_LOCKS
3588 __kmp_indirect_lock_size[locktag_adaptive] = sizeof(kmp_adaptive_lock_t);
3589#endif
3590 __kmp_indirect_lock_size[locktag_drdpa] = sizeof(kmp_drdpa_lock_t);
3591#if KMP_USE_TSX
3592 __kmp_indirect_lock_size[locktag_rtm] = sizeof(kmp_queuing_lock_t);
3593#endif
3594 __kmp_indirect_lock_size[locktag_nested_tas] = sizeof(kmp_tas_lock_t);
3595#if KMP_USE_FUTEX
3596 __kmp_indirect_lock_size[locktag_nested_futex] = sizeof(kmp_futex_lock_t);
3597#endif
3598 __kmp_indirect_lock_size[locktag_nested_ticket] = sizeof(kmp_ticket_lock_t);
3599 __kmp_indirect_lock_size[locktag_nested_queuing] = sizeof(kmp_queuing_lock_t);
3600 __kmp_indirect_lock_size[locktag_nested_drdpa] = sizeof(kmp_drdpa_lock_t);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003601
3602 // Initialize lock accessor/modifier
Jonathan Peytondae13d82015-12-11 21:57:06 +00003603#define fill_jumps(table, expand, sep) { \
3604 table[locktag##sep##ticket] = expand(ticket); \
3605 table[locktag##sep##queuing] = expand(queuing); \
3606 table[locktag##sep##drdpa] = expand(drdpa); \
3607}
3608
3609#if KMP_USE_ADAPTIVE_LOCKS
3610# define fill_table(table, expand) { \
3611 fill_jumps(table, expand, _); \
3612 table[locktag_adaptive] = expand(queuing); \
3613 fill_jumps(table, expand, _nested_); \
3614}
3615#else
3616# define fill_table(table, expand) { \
3617 fill_jumps(table, expand, _); \
3618 fill_jumps(table, expand, _nested_); \
3619}
3620#endif // KMP_USE_ADAPTIVE_LOCKS
3621
3622#define expand(l) (void (*)(kmp_user_lock_p, const ident_t *))__kmp_set_##l##_lock_location
3623 fill_table(__kmp_indirect_set_location, expand);
3624#undef expand
3625#define expand(l) (void (*)(kmp_user_lock_p, kmp_lock_flags_t))__kmp_set_##l##_lock_flags
3626 fill_table(__kmp_indirect_set_flags, expand);
3627#undef expand
3628#define expand(l) (const ident_t * (*)(kmp_user_lock_p))__kmp_get_##l##_lock_location
3629 fill_table(__kmp_indirect_get_location, expand);
3630#undef expand
3631#define expand(l) (kmp_lock_flags_t (*)(kmp_user_lock_p))__kmp_get_##l##_lock_flags
3632 fill_table(__kmp_indirect_get_flags, expand);
3633#undef expand
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003634
3635 __kmp_init_user_locks = TRUE;
3636}
3637
3638// Clean up the lock table.
3639void
3640__kmp_cleanup_indirect_user_locks()
3641{
3642 kmp_lock_index_t i;
3643 int k;
3644
3645 // Clean up locks in the pools first (they were already destroyed before going into the pools).
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00003646 for (k = 0; k < KMP_NUM_I_LOCKS; ++k) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003647 kmp_indirect_lock_t *l = __kmp_indirect_lock_pool[k];
3648 while (l != NULL) {
3649 kmp_indirect_lock_t *ll = l;
3650 l = (kmp_indirect_lock_t *)l->lock->pool.next;
Jonathan Peytondae13d82015-12-11 21:57:06 +00003651 KA_TRACE(20, ("__kmp_cleanup_indirect_user_locks: freeing %p from pool\n", ll));
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003652 __kmp_free(ll->lock);
Jonathan Peytondae13d82015-12-11 21:57:06 +00003653 ll->lock = NULL;
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003654 }
Jonathan Peyton3bd88d42016-01-22 19:16:14 +00003655 __kmp_indirect_lock_pool[k] = NULL;
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003656 }
3657 // Clean up the remaining undestroyed locks.
Jonathan Peytondae13d82015-12-11 21:57:06 +00003658 for (i = 0; i < __kmp_i_lock_table.next; i++) {
3659 kmp_indirect_lock_t *l = KMP_GET_I_LOCK(i);
3660 if (l->lock != NULL) {
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003661 // Locks not destroyed explicitly need to be destroyed here.
Jonathan Peytonf2d119f2015-12-03 19:37:20 +00003662 KMP_I_LOCK_FUNC(l, destroy)(l->lock);
Jonathan Peytondae13d82015-12-11 21:57:06 +00003663 KA_TRACE(20, ("__kmp_cleanup_indirect_user_locks: destroy/freeing %p from table\n", l));
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003664 __kmp_free(l->lock);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003665 }
3666 }
3667 // Free the table
Jonathan Peytondae13d82015-12-11 21:57:06 +00003668 for (i = 0; i < __kmp_i_lock_table.size / KMP_I_LOCK_CHUNK; i++)
3669 __kmp_free(__kmp_i_lock_table.table[i]);
3670 __kmp_free(__kmp_i_lock_table.table);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003671
3672 __kmp_init_user_locks = FALSE;
3673}
3674
3675enum kmp_lock_kind __kmp_user_lock_kind = lk_default;
3676int __kmp_num_locks_in_block = 1; // FIXME - tune this value
3677
3678#else // KMP_USE_DYNAMIC_LOCK
3679
Jim Cownie5e8470a2013-09-27 10:38:44 +00003680/* ------------------------------------------------------------------------ */
3681/* user locks
3682 *
3683 * They are implemented as a table of function pointers which are set to the
3684 * lock functions of the appropriate kind, once that has been determined.
3685 */
3686
3687enum kmp_lock_kind __kmp_user_lock_kind = lk_default;
3688
3689size_t __kmp_base_user_lock_size = 0;
3690size_t __kmp_user_lock_size = 0;
3691
3692kmp_int32 ( *__kmp_get_user_lock_owner_ )( kmp_user_lock_p lck ) = NULL;
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00003693int ( *__kmp_acquire_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003694
3695int ( *__kmp_test_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00003696int ( *__kmp_release_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003697void ( *__kmp_init_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL;
3698void ( *__kmp_destroy_user_lock_ )( kmp_user_lock_p lck ) = NULL;
3699void ( *__kmp_destroy_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL;
Jonathan Peyton0e6d4572015-10-16 16:52:58 +00003700int ( *__kmp_acquire_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003701
3702int ( *__kmp_test_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00003703int ( *__kmp_release_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003704void ( *__kmp_init_nested_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL;
3705void ( *__kmp_destroy_nested_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL;
3706
3707int ( *__kmp_is_user_lock_initialized_ )( kmp_user_lock_p lck ) = NULL;
3708const ident_t * ( *__kmp_get_user_lock_location_ )( kmp_user_lock_p lck ) = NULL;
3709void ( *__kmp_set_user_lock_location_ )( kmp_user_lock_p lck, const ident_t *loc ) = NULL;
3710kmp_lock_flags_t ( *__kmp_get_user_lock_flags_ )( kmp_user_lock_p lck ) = NULL;
3711void ( *__kmp_set_user_lock_flags_ )( kmp_user_lock_p lck, kmp_lock_flags_t flags ) = NULL;
3712
3713void __kmp_set_user_lock_vptrs( kmp_lock_kind_t user_lock_kind )
3714{
3715 switch ( user_lock_kind ) {
3716 case lk_default:
3717 default:
3718 KMP_ASSERT( 0 );
3719
3720 case lk_tas: {
3721 __kmp_base_user_lock_size = sizeof( kmp_base_tas_lock_t );
3722 __kmp_user_lock_size = sizeof( kmp_tas_lock_t );
3723
3724 __kmp_get_user_lock_owner_ =
3725 ( kmp_int32 ( * )( kmp_user_lock_p ) )
3726 ( &__kmp_get_tas_lock_owner );
3727
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003728 if ( __kmp_env_consistency_check ) {
3729 KMP_BIND_USER_LOCK_WITH_CHECKS(tas);
3730 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(tas);
3731 }
3732 else {
3733 KMP_BIND_USER_LOCK(tas);
3734 KMP_BIND_NESTED_USER_LOCK(tas);
3735 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003736
3737 __kmp_destroy_user_lock_ =
3738 ( void ( * )( kmp_user_lock_p ) )
3739 ( &__kmp_destroy_tas_lock );
3740
Jim Cownie5e8470a2013-09-27 10:38:44 +00003741 __kmp_is_user_lock_initialized_ =
3742 ( int ( * )( kmp_user_lock_p ) ) NULL;
3743
3744 __kmp_get_user_lock_location_ =
3745 ( const ident_t * ( * )( kmp_user_lock_p ) ) NULL;
3746
3747 __kmp_set_user_lock_location_ =
3748 ( void ( * )( kmp_user_lock_p, const ident_t * ) ) NULL;
3749
3750 __kmp_get_user_lock_flags_ =
3751 ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) NULL;
3752
3753 __kmp_set_user_lock_flags_ =
3754 ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) NULL;
3755 }
3756 break;
3757
Paul Osmialowskifb043fd2016-05-16 09:44:11 +00003758#if KMP_USE_FUTEX
Jim Cownie5e8470a2013-09-27 10:38:44 +00003759
3760 case lk_futex: {
3761 __kmp_base_user_lock_size = sizeof( kmp_base_futex_lock_t );
3762 __kmp_user_lock_size = sizeof( kmp_futex_lock_t );
3763
3764 __kmp_get_user_lock_owner_ =
3765 ( kmp_int32 ( * )( kmp_user_lock_p ) )
3766 ( &__kmp_get_futex_lock_owner );
3767
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003768 if ( __kmp_env_consistency_check ) {
3769 KMP_BIND_USER_LOCK_WITH_CHECKS(futex);
3770 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(futex);
3771 }
3772 else {
3773 KMP_BIND_USER_LOCK(futex);
3774 KMP_BIND_NESTED_USER_LOCK(futex);
3775 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003776
3777 __kmp_destroy_user_lock_ =
3778 ( void ( * )( kmp_user_lock_p ) )
3779 ( &__kmp_destroy_futex_lock );
3780
Jim Cownie5e8470a2013-09-27 10:38:44 +00003781 __kmp_is_user_lock_initialized_ =
3782 ( int ( * )( kmp_user_lock_p ) ) NULL;
3783
3784 __kmp_get_user_lock_location_ =
3785 ( const ident_t * ( * )( kmp_user_lock_p ) ) NULL;
3786
3787 __kmp_set_user_lock_location_ =
3788 ( void ( * )( kmp_user_lock_p, const ident_t * ) ) NULL;
3789
3790 __kmp_get_user_lock_flags_ =
3791 ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) NULL;
3792
3793 __kmp_set_user_lock_flags_ =
3794 ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) NULL;
3795 }
3796 break;
3797
Paul Osmialowskifb043fd2016-05-16 09:44:11 +00003798#endif // KMP_USE_FUTEX
Jim Cownie5e8470a2013-09-27 10:38:44 +00003799
3800 case lk_ticket: {
3801 __kmp_base_user_lock_size = sizeof( kmp_base_ticket_lock_t );
3802 __kmp_user_lock_size = sizeof( kmp_ticket_lock_t );
3803
3804 __kmp_get_user_lock_owner_ =
3805 ( kmp_int32 ( * )( kmp_user_lock_p ) )
3806 ( &__kmp_get_ticket_lock_owner );
3807
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003808 if ( __kmp_env_consistency_check ) {
3809 KMP_BIND_USER_LOCK_WITH_CHECKS(ticket);
3810 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(ticket);
3811 }
3812 else {
3813 KMP_BIND_USER_LOCK(ticket);
3814 KMP_BIND_NESTED_USER_LOCK(ticket);
3815 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003816
3817 __kmp_destroy_user_lock_ =
3818 ( void ( * )( kmp_user_lock_p ) )
3819 ( &__kmp_destroy_ticket_lock );
3820
Jim Cownie5e8470a2013-09-27 10:38:44 +00003821 __kmp_is_user_lock_initialized_ =
3822 ( int ( * )( kmp_user_lock_p ) )
3823 ( &__kmp_is_ticket_lock_initialized );
3824
3825 __kmp_get_user_lock_location_ =
3826 ( const ident_t * ( * )( kmp_user_lock_p ) )
3827 ( &__kmp_get_ticket_lock_location );
3828
3829 __kmp_set_user_lock_location_ =
3830 ( void ( * )( kmp_user_lock_p, const ident_t * ) )
3831 ( &__kmp_set_ticket_lock_location );
3832
3833 __kmp_get_user_lock_flags_ =
3834 ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) )
3835 ( &__kmp_get_ticket_lock_flags );
3836
3837 __kmp_set_user_lock_flags_ =
3838 ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) )
3839 ( &__kmp_set_ticket_lock_flags );
3840 }
3841 break;
3842
3843 case lk_queuing: {
3844 __kmp_base_user_lock_size = sizeof( kmp_base_queuing_lock_t );
3845 __kmp_user_lock_size = sizeof( kmp_queuing_lock_t );
3846
3847 __kmp_get_user_lock_owner_ =
3848 ( kmp_int32 ( * )( kmp_user_lock_p ) )
3849 ( &__kmp_get_queuing_lock_owner );
3850
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003851 if ( __kmp_env_consistency_check ) {
3852 KMP_BIND_USER_LOCK_WITH_CHECKS(queuing);
3853 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(queuing);
3854 }
3855 else {
3856 KMP_BIND_USER_LOCK(queuing);
3857 KMP_BIND_NESTED_USER_LOCK(queuing);
3858 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003859
3860 __kmp_destroy_user_lock_ =
3861 ( void ( * )( kmp_user_lock_p ) )
3862 ( &__kmp_destroy_queuing_lock );
3863
Jim Cownie5e8470a2013-09-27 10:38:44 +00003864 __kmp_is_user_lock_initialized_ =
3865 ( int ( * )( kmp_user_lock_p ) )
3866 ( &__kmp_is_queuing_lock_initialized );
3867
3868 __kmp_get_user_lock_location_ =
3869 ( const ident_t * ( * )( kmp_user_lock_p ) )
3870 ( &__kmp_get_queuing_lock_location );
3871
3872 __kmp_set_user_lock_location_ =
3873 ( void ( * )( kmp_user_lock_p, const ident_t * ) )
3874 ( &__kmp_set_queuing_lock_location );
3875
3876 __kmp_get_user_lock_flags_ =
3877 ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) )
3878 ( &__kmp_get_queuing_lock_flags );
3879
3880 __kmp_set_user_lock_flags_ =
3881 ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) )
3882 ( &__kmp_set_queuing_lock_flags );
3883 }
3884 break;
3885
3886#if KMP_USE_ADAPTIVE_LOCKS
3887 case lk_adaptive: {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003888 __kmp_base_user_lock_size = sizeof( kmp_base_adaptive_lock_t );
3889 __kmp_user_lock_size = sizeof( kmp_adaptive_lock_t );
Jim Cownie5e8470a2013-09-27 10:38:44 +00003890
3891 __kmp_get_user_lock_owner_ =
3892 ( kmp_int32 ( * )( kmp_user_lock_p ) )
3893 ( &__kmp_get_queuing_lock_owner );
3894
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003895 if ( __kmp_env_consistency_check ) {
3896 KMP_BIND_USER_LOCK_WITH_CHECKS(adaptive);
3897 }
3898 else {
3899 KMP_BIND_USER_LOCK(adaptive);
3900 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003901
3902 __kmp_destroy_user_lock_ =
3903 ( void ( * )( kmp_user_lock_p ) )
3904 ( &__kmp_destroy_adaptive_lock );
3905
3906 __kmp_is_user_lock_initialized_ =
3907 ( int ( * )( kmp_user_lock_p ) )
3908 ( &__kmp_is_queuing_lock_initialized );
3909
3910 __kmp_get_user_lock_location_ =
3911 ( const ident_t * ( * )( kmp_user_lock_p ) )
3912 ( &__kmp_get_queuing_lock_location );
3913
3914 __kmp_set_user_lock_location_ =
3915 ( void ( * )( kmp_user_lock_p, const ident_t * ) )
3916 ( &__kmp_set_queuing_lock_location );
3917
3918 __kmp_get_user_lock_flags_ =
3919 ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) )
3920 ( &__kmp_get_queuing_lock_flags );
3921
3922 __kmp_set_user_lock_flags_ =
3923 ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) )
3924 ( &__kmp_set_queuing_lock_flags );
3925
3926 }
3927 break;
3928#endif // KMP_USE_ADAPTIVE_LOCKS
3929
3930 case lk_drdpa: {
3931 __kmp_base_user_lock_size = sizeof( kmp_base_drdpa_lock_t );
3932 __kmp_user_lock_size = sizeof( kmp_drdpa_lock_t );
3933
3934 __kmp_get_user_lock_owner_ =
3935 ( kmp_int32 ( * )( kmp_user_lock_p ) )
3936 ( &__kmp_get_drdpa_lock_owner );
3937
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003938 if ( __kmp_env_consistency_check ) {
3939 KMP_BIND_USER_LOCK_WITH_CHECKS(drdpa);
3940 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(drdpa);
3941 }
3942 else {
3943 KMP_BIND_USER_LOCK(drdpa);
3944 KMP_BIND_NESTED_USER_LOCK(drdpa);
3945 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003946
3947 __kmp_destroy_user_lock_ =
3948 ( void ( * )( kmp_user_lock_p ) )
3949 ( &__kmp_destroy_drdpa_lock );
3950
Jim Cownie5e8470a2013-09-27 10:38:44 +00003951 __kmp_is_user_lock_initialized_ =
3952 ( int ( * )( kmp_user_lock_p ) )
3953 ( &__kmp_is_drdpa_lock_initialized );
3954
3955 __kmp_get_user_lock_location_ =
3956 ( const ident_t * ( * )( kmp_user_lock_p ) )
3957 ( &__kmp_get_drdpa_lock_location );
3958
3959 __kmp_set_user_lock_location_ =
3960 ( void ( * )( kmp_user_lock_p, const ident_t * ) )
3961 ( &__kmp_set_drdpa_lock_location );
3962
3963 __kmp_get_user_lock_flags_ =
3964 ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) )
3965 ( &__kmp_get_drdpa_lock_flags );
3966
3967 __kmp_set_user_lock_flags_ =
3968 ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) )
3969 ( &__kmp_set_drdpa_lock_flags );
3970 }
3971 break;
3972 }
3973}
3974
3975
3976// ----------------------------------------------------------------------------
3977// User lock table & lock allocation
3978
3979kmp_lock_table_t __kmp_user_lock_table = { 1, 0, NULL };
3980kmp_user_lock_p __kmp_lock_pool = NULL;
3981
3982// Lock block-allocation support.
3983kmp_block_of_locks* __kmp_lock_blocks = NULL;
3984int __kmp_num_locks_in_block = 1; // FIXME - tune this value
3985
3986static kmp_lock_index_t
3987__kmp_lock_table_insert( kmp_user_lock_p lck )
3988{
3989 // Assume that kmp_global_lock is held upon entry/exit.
3990 kmp_lock_index_t index;
3991 if ( __kmp_user_lock_table.used >= __kmp_user_lock_table.allocated ) {
3992 kmp_lock_index_t size;
3993 kmp_user_lock_p *table;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003994 // Reallocate lock table.
3995 if ( __kmp_user_lock_table.allocated == 0 ) {
3996 size = 1024;
3997 }
3998 else {
3999 size = __kmp_user_lock_table.allocated * 2;
4000 }
4001 table = (kmp_user_lock_p *)__kmp_allocate( sizeof( kmp_user_lock_p ) * size );
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00004002 KMP_MEMCPY( table + 1, __kmp_user_lock_table.table + 1, sizeof( kmp_user_lock_p ) * ( __kmp_user_lock_table.used - 1 ) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004003 table[ 0 ] = (kmp_user_lock_p)__kmp_user_lock_table.table;
Jonathan Peyton1406f012015-05-22 22:35:51 +00004004 // We cannot free the previous table now, since it may be in use by other
Jim Cownie5e8470a2013-09-27 10:38:44 +00004005 // threads. So save the pointer to the previous table in in the first element of the
4006 // new table. All the tables will be organized into a list, and could be freed when
4007 // library shutting down.
4008 __kmp_user_lock_table.table = table;
4009 __kmp_user_lock_table.allocated = size;
4010 }
4011 KMP_DEBUG_ASSERT( __kmp_user_lock_table.used < __kmp_user_lock_table.allocated );
4012 index = __kmp_user_lock_table.used;
4013 __kmp_user_lock_table.table[ index ] = lck;
4014 ++ __kmp_user_lock_table.used;
4015 return index;
4016}
4017
4018static kmp_user_lock_p
4019__kmp_lock_block_allocate()
4020{
4021 // Assume that kmp_global_lock is held upon entry/exit.
4022 static int last_index = 0;
4023 if ( ( last_index >= __kmp_num_locks_in_block )
4024 || ( __kmp_lock_blocks == NULL ) ) {
4025 // Restart the index.
4026 last_index = 0;
4027 // Need to allocate a new block.
4028 KMP_DEBUG_ASSERT( __kmp_user_lock_size > 0 );
4029 size_t space_for_locks = __kmp_user_lock_size * __kmp_num_locks_in_block;
4030 char* buffer = (char*)__kmp_allocate( space_for_locks + sizeof( kmp_block_of_locks ) );
4031 // Set up the new block.
4032 kmp_block_of_locks *new_block = (kmp_block_of_locks *)(& buffer[space_for_locks]);
4033 new_block->next_block = __kmp_lock_blocks;
4034 new_block->locks = (void *)buffer;
4035 // Publish the new block.
4036 KMP_MB();
4037 __kmp_lock_blocks = new_block;
4038 }
4039 kmp_user_lock_p ret = (kmp_user_lock_p)(& ( ( (char *)( __kmp_lock_blocks->locks ) )
4040 [ last_index * __kmp_user_lock_size ] ) );
4041 last_index++;
4042 return ret;
4043}
4044
4045//
4046// Get memory for a lock. It may be freshly allocated memory or reused memory
4047// from lock pool.
4048//
4049kmp_user_lock_p
4050__kmp_user_lock_allocate( void **user_lock, kmp_int32 gtid,
4051 kmp_lock_flags_t flags )
4052{
4053 kmp_user_lock_p lck;
4054 kmp_lock_index_t index;
4055 KMP_DEBUG_ASSERT( user_lock );
4056
4057 __kmp_acquire_lock( &__kmp_global_lock, gtid );
4058
4059 if ( __kmp_lock_pool == NULL ) {
4060 // Lock pool is empty. Allocate new memory.
Jonas Hahnfeld50fed042016-11-07 15:58:36 +00004061
4062 // ANNOTATION: Found no good way to express the syncronisation
4063 // between allocation and usage, so ignore the allocation
4064 ANNOTATE_IGNORE_WRITES_BEGIN();
Jim Cownie5e8470a2013-09-27 10:38:44 +00004065 if ( __kmp_num_locks_in_block <= 1 ) { // Tune this cutoff point.
4066 lck = (kmp_user_lock_p) __kmp_allocate( __kmp_user_lock_size );
4067 }
4068 else {
4069 lck = __kmp_lock_block_allocate();
4070 }
Jonas Hahnfeld50fed042016-11-07 15:58:36 +00004071 ANNOTATE_IGNORE_WRITES_END();
Jim Cownie5e8470a2013-09-27 10:38:44 +00004072
4073 // Insert lock in the table so that it can be freed in __kmp_cleanup,
4074 // and debugger has info on all allocated locks.
4075 index = __kmp_lock_table_insert( lck );
4076 }
4077 else {
4078 // Pick up lock from pool.
4079 lck = __kmp_lock_pool;
4080 index = __kmp_lock_pool->pool.index;
4081 __kmp_lock_pool = __kmp_lock_pool->pool.next;
4082 }
4083
4084 //
4085 // We could potentially differentiate between nested and regular locks
4086 // here, and do the lock table lookup for regular locks only.
4087 //
4088 if ( OMP_LOCK_T_SIZE < sizeof(void *) ) {
4089 * ( (kmp_lock_index_t *) user_lock ) = index;
4090 }
4091 else {
4092 * ( (kmp_user_lock_p *) user_lock ) = lck;
4093 }
4094
4095 // mark the lock if it is critical section lock.
4096 __kmp_set_user_lock_flags( lck, flags );
4097
4098 __kmp_release_lock( & __kmp_global_lock, gtid ); // AC: TODO: move this line upper
4099
4100 return lck;
4101}
4102
4103// Put lock's memory to pool for reusing.
4104void
4105__kmp_user_lock_free( void **user_lock, kmp_int32 gtid, kmp_user_lock_p lck )
4106{
Jim Cownie5e8470a2013-09-27 10:38:44 +00004107 KMP_DEBUG_ASSERT( user_lock != NULL );
4108 KMP_DEBUG_ASSERT( lck != NULL );
4109
4110 __kmp_acquire_lock( & __kmp_global_lock, gtid );
4111
4112 lck->pool.next = __kmp_lock_pool;
4113 __kmp_lock_pool = lck;
4114 if ( OMP_LOCK_T_SIZE < sizeof(void *) ) {
4115 kmp_lock_index_t index = * ( (kmp_lock_index_t *) user_lock );
4116 KMP_DEBUG_ASSERT( 0 < index && index <= __kmp_user_lock_table.used );
4117 lck->pool.index = index;
4118 }
4119
4120 __kmp_release_lock( & __kmp_global_lock, gtid );
4121}
4122
4123kmp_user_lock_p
4124__kmp_lookup_user_lock( void **user_lock, char const *func )
4125{
4126 kmp_user_lock_p lck = NULL;
4127
4128 if ( __kmp_env_consistency_check ) {
4129 if ( user_lock == NULL ) {
4130 KMP_FATAL( LockIsUninitialized, func );
4131 }
4132 }
4133
4134 if ( OMP_LOCK_T_SIZE < sizeof(void *) ) {
4135 kmp_lock_index_t index = *( (kmp_lock_index_t *)user_lock );
4136 if ( __kmp_env_consistency_check ) {
4137 if ( ! ( 0 < index && index < __kmp_user_lock_table.used ) ) {
4138 KMP_FATAL( LockIsUninitialized, func );
4139 }
4140 }
4141 KMP_DEBUG_ASSERT( 0 < index && index < __kmp_user_lock_table.used );
4142 KMP_DEBUG_ASSERT( __kmp_user_lock_size > 0 );
4143 lck = __kmp_user_lock_table.table[index];
4144 }
4145 else {
4146 lck = *( (kmp_user_lock_p *)user_lock );
4147 }
4148
4149 if ( __kmp_env_consistency_check ) {
4150 if ( lck == NULL ) {
4151 KMP_FATAL( LockIsUninitialized, func );
4152 }
4153 }
4154
4155 return lck;
4156}
4157
4158void
4159__kmp_cleanup_user_locks( void )
4160{
4161 //
4162 // Reset lock pool. Do not worry about lock in the pool -- we will free
4163 // them when iterating through lock table (it includes all the locks,
4164 // dead or alive).
4165 //
4166 __kmp_lock_pool = NULL;
4167
4168#define IS_CRITICAL(lck) \
4169 ( ( __kmp_get_user_lock_flags_ != NULL ) && \
4170 ( ( *__kmp_get_user_lock_flags_ )( lck ) & kmp_lf_critical_section ) )
4171
4172 //
4173 // Loop through lock table, free all locks.
4174 //
4175 // Do not free item [0], it is reserved for lock tables list.
4176 //
4177 // FIXME - we are iterating through a list of (pointers to) objects of
4178 // type union kmp_user_lock, but we have no way of knowing whether the
4179 // base type is currently "pool" or whatever the global user lock type
4180 // is.
4181 //
4182 // We are relying on the fact that for all of the user lock types
4183 // (except "tas"), the first field in the lock struct is the "initialized"
4184 // field, which is set to the address of the lock object itself when
4185 // the lock is initialized. When the union is of type "pool", the
4186 // first field is a pointer to the next object in the free list, which
4187 // will not be the same address as the object itself.
4188 //
4189 // This means that the check ( *__kmp_is_user_lock_initialized_ )( lck )
4190 // will fail for "pool" objects on the free list. This must happen as
4191 // the "location" field of real user locks overlaps the "index" field
4192 // of "pool" objects.
4193 //
4194 // It would be better to run through the free list, and remove all "pool"
4195 // objects from the lock table before executing this loop. However,
4196 // "pool" objects do not always have their index field set (only on
4197 // lin_32e), and I don't want to search the lock table for the address
4198 // of every "pool" object on the free list.
4199 //
4200 while ( __kmp_user_lock_table.used > 1 ) {
4201 const ident *loc;
4202
4203 //
4204 // reduce __kmp_user_lock_table.used before freeing the lock,
4205 // so that state of locks is consistent
4206 //
4207 kmp_user_lock_p lck = __kmp_user_lock_table.table[
4208 --__kmp_user_lock_table.used ];
4209
4210 if ( ( __kmp_is_user_lock_initialized_ != NULL ) &&
4211 ( *__kmp_is_user_lock_initialized_ )( lck ) ) {
4212 //
4213 // Issue a warning if: KMP_CONSISTENCY_CHECK AND lock is
4214 // initialized AND it is NOT a critical section (user is not
4215 // responsible for destroying criticals) AND we know source
4216 // location to report.
4217 //
4218 if ( __kmp_env_consistency_check && ( ! IS_CRITICAL( lck ) ) &&
4219 ( ( loc = __kmp_get_user_lock_location( lck ) ) != NULL ) &&
4220 ( loc->psource != NULL ) ) {
4221 kmp_str_loc_t str_loc = __kmp_str_loc_init( loc->psource, 0 );
Andrey Churbanove8595de2015-02-20 18:19:41 +00004222 KMP_WARNING( CnsLockNotDestroyed, str_loc.file, str_loc.line );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004223 __kmp_str_loc_free( &str_loc);
4224 }
4225
4226#ifdef KMP_DEBUG
4227 if ( IS_CRITICAL( lck ) ) {
4228 KA_TRACE( 20, ("__kmp_cleanup_user_locks: free critical section lock %p (%p)\n", lck, *(void**)lck ) );
4229 }
4230 else {
4231 KA_TRACE( 20, ("__kmp_cleanup_user_locks: free lock %p (%p)\n", lck, *(void**)lck ) );
4232 }
4233#endif // KMP_DEBUG
4234
4235 //
4236 // Cleanup internal lock dynamic resources
4237 // (for drdpa locks particularly).
4238 //
4239 __kmp_destroy_user_lock( lck );
4240 }
4241
4242 //
4243 // Free the lock if block allocation of locks is not used.
4244 //
4245 if ( __kmp_lock_blocks == NULL ) {
4246 __kmp_free( lck );
4247 }
4248 }
4249
4250#undef IS_CRITICAL
4251
4252 //
4253 // delete lock table(s).
4254 //
4255 kmp_user_lock_p *table_ptr = __kmp_user_lock_table.table;
4256 __kmp_user_lock_table.table = NULL;
4257 __kmp_user_lock_table.allocated = 0;
4258
4259 while ( table_ptr != NULL ) {
4260 //
4261 // In the first element we saved the pointer to the previous
4262 // (smaller) lock table.
4263 //
4264 kmp_user_lock_p *next = (kmp_user_lock_p *)( table_ptr[ 0 ] );
4265 __kmp_free( table_ptr );
4266 table_ptr = next;
4267 }
4268
4269 //
4270 // Free buffers allocated for blocks of locks.
4271 //
4272 kmp_block_of_locks_t *block_ptr = __kmp_lock_blocks;
4273 __kmp_lock_blocks = NULL;
4274
4275 while ( block_ptr != NULL ) {
4276 kmp_block_of_locks_t *next = block_ptr->next_block;
4277 __kmp_free( block_ptr->locks );
4278 //
4279 // *block_ptr itself was allocated at the end of the locks vector.
4280 //
4281 block_ptr = next;
4282 }
4283
4284 TCW_4(__kmp_init_user_locks, FALSE);
4285}
4286
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00004287#endif // KMP_USE_DYNAMIC_LOCK