blob: 77e25aa176fbcaeb4ab292efd17a11e38fa26087 [file] [log] [blame]
Jim Cownie5e8470a2013-09-27 10:38:44 +00001/*
2 * kmp_lock.cpp -- lock-related functions
Jim Cownie5e8470a2013-09-27 10:38:44 +00003 */
4
5
6//===----------------------------------------------------------------------===//
7//
8// The LLVM Compiler Infrastructure
9//
10// This file is dual licensed under the MIT and the University of Illinois Open
11// Source Licenses. See LICENSE.txt for details.
12//
13//===----------------------------------------------------------------------===//
14
15
16#include <stddef.h>
17
18#include "kmp.h"
19#include "kmp_itt.h"
20#include "kmp_i18n.h"
21#include "kmp_lock.h"
22#include "kmp_io.h"
23
Andrey Churbanovcbda8682015-01-13 14:43:35 +000024#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +000025# include <unistd.h>
26# include <sys/syscall.h>
27// We should really include <futex.h>, but that causes compatibility problems on different
28// Linux* OS distributions that either require that you include (or break when you try to include)
29// <pci/types.h>.
30// Since all we need is the two macros below (which are part of the kernel ABI, so can't change)
31// we just define the constants here and don't include <futex.h>
32# ifndef FUTEX_WAIT
33# define FUTEX_WAIT 0
34# endif
35# ifndef FUTEX_WAKE
36# define FUTEX_WAKE 1
37# endif
38#endif
39
Jim Cownie5e8470a2013-09-27 10:38:44 +000040/* Implement spin locks for internal library use. */
41/* The algorithm implemented is Lamport's bakery lock [1974]. */
42
43void
44__kmp_validate_locks( void )
45{
46 int i;
47 kmp_uint32 x, y;
48
49 /* Check to make sure unsigned arithmetic does wraps properly */
50 x = ~((kmp_uint32) 0) - 2;
51 y = x - 2;
52
53 for (i = 0; i < 8; ++i, ++x, ++y) {
54 kmp_uint32 z = (x - y);
55 KMP_ASSERT( z == 2 );
56 }
57
58 KMP_ASSERT( offsetof( kmp_base_queuing_lock, tail_id ) % 8 == 0 );
59}
60
61
62/* ------------------------------------------------------------------------ */
63/* test and set locks */
64
65//
66// For the non-nested locks, we can only assume that the first 4 bytes were
67// allocated, since gcc only allocates 4 bytes for omp_lock_t, and the Intel
68// compiler only allocates a 4 byte pointer on IA-32 architecture. On
69// Windows* OS on Intel(R) 64, we can assume that all 8 bytes were allocated.
70//
71// gcc reserves >= 8 bytes for nested locks, so we can assume that the
72// entire 8 bytes were allocated for nested locks on all 64-bit platforms.
73//
74
75static kmp_int32
76__kmp_get_tas_lock_owner( kmp_tas_lock_t *lck )
77{
Andrey Churbanov5c56fb52015-02-20 18:05:17 +000078 return DYNA_LOCK_STRIP(TCR_4( lck->lk.poll )) - 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +000079}
80
81static inline bool
82__kmp_is_tas_lock_nestable( kmp_tas_lock_t *lck )
83{
84 return lck->lk.depth_locked != -1;
85}
86
87__forceinline static void
88__kmp_acquire_tas_lock_timed_template( kmp_tas_lock_t *lck, kmp_int32 gtid )
89{
90 KMP_MB();
91
92#ifdef USE_LOCK_PROFILE
93 kmp_uint32 curr = TCR_4( lck->lk.poll );
94 if ( ( curr != 0 ) && ( curr != gtid + 1 ) )
95 __kmp_printf( "LOCK CONTENTION: %p\n", lck );
96 /* else __kmp_printf( "." );*/
97#endif /* USE_LOCK_PROFILE */
98
Andrey Churbanov5c56fb52015-02-20 18:05:17 +000099 if ( ( lck->lk.poll == DYNA_LOCK_FREE(tas) )
100 && KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), DYNA_LOCK_FREE(tas), DYNA_LOCK_BUSY(gtid+1, tas) ) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000101 KMP_FSYNC_ACQUIRED(lck);
102 return;
103 }
104
105 kmp_uint32 spins;
106 KMP_FSYNC_PREPARE( lck );
107 KMP_INIT_YIELD( spins );
108 if ( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc :
109 __kmp_xproc ) ) {
110 KMP_YIELD( TRUE );
111 }
112 else {
113 KMP_YIELD_SPIN( spins );
114 }
115
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000116 while ( ( lck->lk.poll != DYNA_LOCK_FREE(tas) ) ||
117 ( ! KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), DYNA_LOCK_FREE(tas), DYNA_LOCK_BUSY(gtid+1, tas) ) ) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000118 //
119 // FIXME - use exponential backoff here
120 //
121 if ( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc :
122 __kmp_xproc ) ) {
123 KMP_YIELD( TRUE );
124 }
125 else {
126 KMP_YIELD_SPIN( spins );
127 }
128 }
129 KMP_FSYNC_ACQUIRED( lck );
130}
131
132void
133__kmp_acquire_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
134{
135 __kmp_acquire_tas_lock_timed_template( lck, gtid );
136}
137
138static void
139__kmp_acquire_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
140{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000141 char const * const func = "omp_set_lock";
142 if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE )
143 && __kmp_is_tas_lock_nestable( lck ) ) {
144 KMP_FATAL( LockNestableUsedAsSimple, func );
145 }
146 if ( ( gtid >= 0 ) && ( __kmp_get_tas_lock_owner( lck ) == gtid ) ) {
147 KMP_FATAL( LockIsAlreadyOwned, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000148 }
149 __kmp_acquire_tas_lock( lck, gtid );
150}
151
152int
153__kmp_test_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
154{
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000155 if ( ( lck->lk.poll == DYNA_LOCK_FREE(tas) )
156 && KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), DYNA_LOCK_FREE(tas), DYNA_LOCK_BUSY(gtid+1, tas) ) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000157 KMP_FSYNC_ACQUIRED( lck );
158 return TRUE;
159 }
160 return FALSE;
161}
162
163static int
164__kmp_test_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
165{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000166 char const * const func = "omp_test_lock";
167 if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE )
168 && __kmp_is_tas_lock_nestable( lck ) ) {
169 KMP_FATAL( LockNestableUsedAsSimple, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000170 }
171 return __kmp_test_tas_lock( lck, gtid );
172}
173
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000174int
Jim Cownie5e8470a2013-09-27 10:38:44 +0000175__kmp_release_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
176{
177 KMP_MB(); /* Flush all pending memory write invalidates. */
178
179 KMP_FSYNC_RELEASING(lck);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000180 KMP_ST_REL32( &(lck->lk.poll), DYNA_LOCK_FREE(tas) );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000181 KMP_MB(); /* Flush all pending memory write invalidates. */
182
183 KMP_YIELD( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc :
184 __kmp_xproc ) );
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000185 return KMP_LOCK_RELEASED;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000186}
187
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000188static int
Jim Cownie5e8470a2013-09-27 10:38:44 +0000189__kmp_release_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
190{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000191 char const * const func = "omp_unset_lock";
192 KMP_MB(); /* in case another processor initialized lock */
193 if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE )
194 && __kmp_is_tas_lock_nestable( lck ) ) {
195 KMP_FATAL( LockNestableUsedAsSimple, func );
196 }
197 if ( __kmp_get_tas_lock_owner( lck ) == -1 ) {
198 KMP_FATAL( LockUnsettingFree, func );
199 }
200 if ( ( gtid >= 0 ) && ( __kmp_get_tas_lock_owner( lck ) >= 0 )
201 && ( __kmp_get_tas_lock_owner( lck ) != gtid ) ) {
202 KMP_FATAL( LockUnsettingSetByAnother, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000203 }
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000204 return __kmp_release_tas_lock( lck, gtid );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000205}
206
207void
208__kmp_init_tas_lock( kmp_tas_lock_t * lck )
209{
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000210 TCW_4( lck->lk.poll, DYNA_LOCK_FREE(tas) );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000211}
212
213static void
214__kmp_init_tas_lock_with_checks( kmp_tas_lock_t * lck )
215{
216 __kmp_init_tas_lock( lck );
217}
218
219void
220__kmp_destroy_tas_lock( kmp_tas_lock_t *lck )
221{
222 lck->lk.poll = 0;
223}
224
225static void
226__kmp_destroy_tas_lock_with_checks( kmp_tas_lock_t *lck )
227{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000228 char const * const func = "omp_destroy_lock";
229 if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE )
230 && __kmp_is_tas_lock_nestable( lck ) ) {
231 KMP_FATAL( LockNestableUsedAsSimple, func );
232 }
233 if ( __kmp_get_tas_lock_owner( lck ) != -1 ) {
234 KMP_FATAL( LockStillOwned, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000235 }
236 __kmp_destroy_tas_lock( lck );
237}
238
239
240//
241// nested test and set locks
242//
243
244void
245__kmp_acquire_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
246{
247 KMP_DEBUG_ASSERT( gtid >= 0 );
248
249 if ( __kmp_get_tas_lock_owner( lck ) == gtid ) {
250 lck->lk.depth_locked += 1;
251 }
252 else {
253 __kmp_acquire_tas_lock_timed_template( lck, gtid );
254 lck->lk.depth_locked = 1;
255 }
256}
257
258static void
259__kmp_acquire_nested_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
260{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000261 char const * const func = "omp_set_nest_lock";
262 if ( ! __kmp_is_tas_lock_nestable( lck ) ) {
263 KMP_FATAL( LockSimpleUsedAsNestable, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000264 }
265 __kmp_acquire_nested_tas_lock( lck, gtid );
266}
267
268int
269__kmp_test_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
270{
271 int retval;
272
273 KMP_DEBUG_ASSERT( gtid >= 0 );
274
275 if ( __kmp_get_tas_lock_owner( lck ) == gtid ) {
276 retval = ++lck->lk.depth_locked;
277 }
278 else if ( !__kmp_test_tas_lock( lck, gtid ) ) {
279 retval = 0;
280 }
281 else {
282 KMP_MB();
283 retval = lck->lk.depth_locked = 1;
284 }
285 return retval;
286}
287
288static int
289__kmp_test_nested_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
290{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000291 char const * const func = "omp_test_nest_lock";
292 if ( ! __kmp_is_tas_lock_nestable( lck ) ) {
293 KMP_FATAL( LockSimpleUsedAsNestable, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000294 }
295 return __kmp_test_nested_tas_lock( lck, gtid );
296}
297
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000298int
Jim Cownie5e8470a2013-09-27 10:38:44 +0000299__kmp_release_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
300{
301 KMP_DEBUG_ASSERT( gtid >= 0 );
302
303 KMP_MB();
304 if ( --(lck->lk.depth_locked) == 0 ) {
305 __kmp_release_tas_lock( lck, gtid );
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000306 return KMP_LOCK_RELEASED;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000307 }
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000308 return KMP_LOCK_STILL_HELD;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000309}
310
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000311static int
Jim Cownie5e8470a2013-09-27 10:38:44 +0000312__kmp_release_nested_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
313{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000314 char const * const func = "omp_unset_nest_lock";
315 KMP_MB(); /* in case another processor initialized lock */
316 if ( ! __kmp_is_tas_lock_nestable( lck ) ) {
317 KMP_FATAL( LockSimpleUsedAsNestable, func );
318 }
319 if ( __kmp_get_tas_lock_owner( lck ) == -1 ) {
320 KMP_FATAL( LockUnsettingFree, func );
321 }
322 if ( __kmp_get_tas_lock_owner( lck ) != gtid ) {
323 KMP_FATAL( LockUnsettingSetByAnother, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000324 }
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000325 return __kmp_release_nested_tas_lock( lck, gtid );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000326}
327
328void
329__kmp_init_nested_tas_lock( kmp_tas_lock_t * lck )
330{
331 __kmp_init_tas_lock( lck );
332 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
333}
334
335static void
336__kmp_init_nested_tas_lock_with_checks( kmp_tas_lock_t * lck )
337{
338 __kmp_init_nested_tas_lock( lck );
339}
340
341void
342__kmp_destroy_nested_tas_lock( kmp_tas_lock_t *lck )
343{
344 __kmp_destroy_tas_lock( lck );
345 lck->lk.depth_locked = 0;
346}
347
348static void
349__kmp_destroy_nested_tas_lock_with_checks( kmp_tas_lock_t *lck )
350{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000351 char const * const func = "omp_destroy_nest_lock";
352 if ( ! __kmp_is_tas_lock_nestable( lck ) ) {
353 KMP_FATAL( LockSimpleUsedAsNestable, func );
354 }
355 if ( __kmp_get_tas_lock_owner( lck ) != -1 ) {
356 KMP_FATAL( LockStillOwned, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000357 }
358 __kmp_destroy_nested_tas_lock( lck );
359}
360
361
Andrey Churbanovcbda8682015-01-13 14:43:35 +0000362#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +0000363
364/* ------------------------------------------------------------------------ */
365/* futex locks */
366
367// futex locks are really just test and set locks, with a different method
368// of handling contention. They take the same amount of space as test and
369// set locks, and are allocated the same way (i.e. use the area allocated by
370// the compiler for non-nested locks / allocate nested locks on the heap).
371
372static kmp_int32
373__kmp_get_futex_lock_owner( kmp_futex_lock_t *lck )
374{
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000375 return DYNA_LOCK_STRIP(( TCR_4( lck->lk.poll ) >> 1 )) - 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000376}
377
378static inline bool
379__kmp_is_futex_lock_nestable( kmp_futex_lock_t *lck )
380{
381 return lck->lk.depth_locked != -1;
382}
383
384__forceinline static void
385__kmp_acquire_futex_lock_timed_template( kmp_futex_lock_t *lck, kmp_int32 gtid )
386{
387 kmp_int32 gtid_code = ( gtid + 1 ) << 1;
388
389 KMP_MB();
390
391#ifdef USE_LOCK_PROFILE
392 kmp_uint32 curr = TCR_4( lck->lk.poll );
393 if ( ( curr != 0 ) && ( curr != gtid_code ) )
394 __kmp_printf( "LOCK CONTENTION: %p\n", lck );
395 /* else __kmp_printf( "." );*/
396#endif /* USE_LOCK_PROFILE */
397
398 KMP_FSYNC_PREPARE( lck );
399 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d entering\n",
400 lck, lck->lk.poll, gtid ) );
401
402 kmp_int32 poll_val;
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000403
404 while ( ( poll_val = KMP_COMPARE_AND_STORE_RET32( & ( lck->lk.poll ), DYNA_LOCK_FREE(futex),
405 DYNA_LOCK_BUSY(gtid_code, futex) ) ) != DYNA_LOCK_FREE(futex) ) {
406
407 kmp_int32 cond = DYNA_LOCK_STRIP(poll_val) & 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000408 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d poll_val = 0x%x cond = 0x%x\n",
409 lck, gtid, poll_val, cond ) );
410
411 //
412 // NOTE: if you try to use the following condition for this branch
413 //
414 // if ( poll_val & 1 == 0 )
415 //
416 // Then the 12.0 compiler has a bug where the following block will
417 // always be skipped, regardless of the value of the LSB of poll_val.
418 //
419 if ( ! cond ) {
420 //
421 // Try to set the lsb in the poll to indicate to the owner
422 // thread that they need to wake this thread up.
423 //
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000424 if ( ! KMP_COMPARE_AND_STORE_REL32( & ( lck->lk.poll ), poll_val, poll_val | DYNA_LOCK_BUSY(1, futex) ) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000425 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d can't set bit 0\n",
426 lck, lck->lk.poll, gtid ) );
427 continue;
428 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000429 poll_val |= DYNA_LOCK_BUSY(1, futex);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000430
431 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d bit 0 set\n",
432 lck, lck->lk.poll, gtid ) );
433 }
434
435 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d before futex_wait(0x%x)\n",
436 lck, gtid, poll_val ) );
437
438 kmp_int32 rc;
439 if ( ( rc = syscall( __NR_futex, & ( lck->lk.poll ), FUTEX_WAIT,
440 poll_val, NULL, NULL, 0 ) ) != 0 ) {
441 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d futex_wait(0x%x) failed (rc=%d errno=%d)\n",
442 lck, gtid, poll_val, rc, errno ) );
443 continue;
444 }
445
446 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d after futex_wait(0x%x)\n",
447 lck, gtid, poll_val ) );
448 //
Alp Toker8f2d3f02014-02-24 10:40:15 +0000449 // This thread has now done a successful futex wait call and was
Jim Cownie5e8470a2013-09-27 10:38:44 +0000450 // entered on the OS futex queue. We must now perform a futex
451 // wake call when releasing the lock, as we have no idea how many
452 // other threads are in the queue.
453 //
454 gtid_code |= 1;
455 }
456
457 KMP_FSYNC_ACQUIRED( lck );
458 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d exiting\n",
459 lck, lck->lk.poll, gtid ) );
460}
461
462void
463__kmp_acquire_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
464{
465 __kmp_acquire_futex_lock_timed_template( lck, gtid );
466}
467
468static void
469__kmp_acquire_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
470{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000471 char const * const func = "omp_set_lock";
472 if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE )
473 && __kmp_is_futex_lock_nestable( lck ) ) {
474 KMP_FATAL( LockNestableUsedAsSimple, func );
475 }
476 if ( ( gtid >= 0 ) && ( __kmp_get_futex_lock_owner( lck ) == gtid ) ) {
477 KMP_FATAL( LockIsAlreadyOwned, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000478 }
479 __kmp_acquire_futex_lock( lck, gtid );
480}
481
482int
483__kmp_test_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
484{
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000485 if ( KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), DYNA_LOCK_FREE(futex), DYNA_LOCK_BUSY(gtid+1, futex) << 1 ) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000486 KMP_FSYNC_ACQUIRED( lck );
487 return TRUE;
488 }
489 return FALSE;
490}
491
492static int
493__kmp_test_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
494{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000495 char const * const func = "omp_test_lock";
496 if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE )
497 && __kmp_is_futex_lock_nestable( lck ) ) {
498 KMP_FATAL( LockNestableUsedAsSimple, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000499 }
500 return __kmp_test_futex_lock( lck, gtid );
501}
502
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000503int
Jim Cownie5e8470a2013-09-27 10:38:44 +0000504__kmp_release_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
505{
506 KMP_MB(); /* Flush all pending memory write invalidates. */
507
508 KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d entering\n",
509 lck, lck->lk.poll, gtid ) );
510
511 KMP_FSYNC_RELEASING(lck);
512
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000513 kmp_int32 poll_val = KMP_XCHG_FIXED32( & ( lck->lk.poll ), DYNA_LOCK_FREE(futex) );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000514
515 KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p, T#%d released poll_val = 0x%x\n",
516 lck, gtid, poll_val ) );
517
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000518 if ( DYNA_LOCK_STRIP(poll_val) & 1 ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000519 KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p, T#%d futex_wake 1 thread\n",
520 lck, gtid ) );
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000521 syscall( __NR_futex, & ( lck->lk.poll ), FUTEX_WAKE, DYNA_LOCK_BUSY(1, futex), NULL, NULL, 0 );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000522 }
523
524 KMP_MB(); /* Flush all pending memory write invalidates. */
525
526 KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d exiting\n",
527 lck, lck->lk.poll, gtid ) );
528
529 KMP_YIELD( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc :
530 __kmp_xproc ) );
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000531 return KMP_LOCK_RELEASED;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000532}
533
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000534static int
Jim Cownie5e8470a2013-09-27 10:38:44 +0000535__kmp_release_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
536{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000537 char const * const func = "omp_unset_lock";
538 KMP_MB(); /* in case another processor initialized lock */
539 if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE )
540 && __kmp_is_futex_lock_nestable( lck ) ) {
541 KMP_FATAL( LockNestableUsedAsSimple, func );
542 }
543 if ( __kmp_get_futex_lock_owner( lck ) == -1 ) {
544 KMP_FATAL( LockUnsettingFree, func );
545 }
546 if ( ( gtid >= 0 ) && ( __kmp_get_futex_lock_owner( lck ) >= 0 )
547 && ( __kmp_get_futex_lock_owner( lck ) != gtid ) ) {
548 KMP_FATAL( LockUnsettingSetByAnother, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000549 }
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000550 return __kmp_release_futex_lock( lck, gtid );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000551}
552
553void
554__kmp_init_futex_lock( kmp_futex_lock_t * lck )
555{
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000556 TCW_4( lck->lk.poll, DYNA_LOCK_FREE(futex) );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000557}
558
559static void
560__kmp_init_futex_lock_with_checks( kmp_futex_lock_t * lck )
561{
562 __kmp_init_futex_lock( lck );
563}
564
565void
566__kmp_destroy_futex_lock( kmp_futex_lock_t *lck )
567{
568 lck->lk.poll = 0;
569}
570
571static void
572__kmp_destroy_futex_lock_with_checks( kmp_futex_lock_t *lck )
573{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000574 char const * const func = "omp_destroy_lock";
575 if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE )
576 && __kmp_is_futex_lock_nestable( lck ) ) {
577 KMP_FATAL( LockNestableUsedAsSimple, func );
578 }
579 if ( __kmp_get_futex_lock_owner( lck ) != -1 ) {
580 KMP_FATAL( LockStillOwned, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000581 }
582 __kmp_destroy_futex_lock( lck );
583}
584
585
586//
587// nested futex locks
588//
589
590void
591__kmp_acquire_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
592{
593 KMP_DEBUG_ASSERT( gtid >= 0 );
594
595 if ( __kmp_get_futex_lock_owner( lck ) == gtid ) {
596 lck->lk.depth_locked += 1;
597 }
598 else {
599 __kmp_acquire_futex_lock_timed_template( lck, gtid );
600 lck->lk.depth_locked = 1;
601 }
602}
603
604static void
605__kmp_acquire_nested_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
606{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000607 char const * const func = "omp_set_nest_lock";
608 if ( ! __kmp_is_futex_lock_nestable( lck ) ) {
609 KMP_FATAL( LockSimpleUsedAsNestable, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000610 }
611 __kmp_acquire_nested_futex_lock( lck, gtid );
612}
613
614int
615__kmp_test_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
616{
617 int retval;
618
619 KMP_DEBUG_ASSERT( gtid >= 0 );
620
621 if ( __kmp_get_futex_lock_owner( lck ) == gtid ) {
622 retval = ++lck->lk.depth_locked;
623 }
624 else if ( !__kmp_test_futex_lock( lck, gtid ) ) {
625 retval = 0;
626 }
627 else {
628 KMP_MB();
629 retval = lck->lk.depth_locked = 1;
630 }
631 return retval;
632}
633
634static int
635__kmp_test_nested_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
636{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000637 char const * const func = "omp_test_nest_lock";
638 if ( ! __kmp_is_futex_lock_nestable( lck ) ) {
639 KMP_FATAL( LockSimpleUsedAsNestable, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000640 }
641 return __kmp_test_nested_futex_lock( lck, gtid );
642}
643
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000644int
Jim Cownie5e8470a2013-09-27 10:38:44 +0000645__kmp_release_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
646{
647 KMP_DEBUG_ASSERT( gtid >= 0 );
648
649 KMP_MB();
650 if ( --(lck->lk.depth_locked) == 0 ) {
651 __kmp_release_futex_lock( lck, gtid );
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000652 return KMP_LOCK_RELEASED;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000653 }
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000654 return KMP_LOCK_STILL_HELD;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000655}
656
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000657static int
Jim Cownie5e8470a2013-09-27 10:38:44 +0000658__kmp_release_nested_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
659{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000660 char const * const func = "omp_unset_nest_lock";
661 KMP_MB(); /* in case another processor initialized lock */
662 if ( ! __kmp_is_futex_lock_nestable( lck ) ) {
663 KMP_FATAL( LockSimpleUsedAsNestable, func );
664 }
665 if ( __kmp_get_futex_lock_owner( lck ) == -1 ) {
666 KMP_FATAL( LockUnsettingFree, func );
667 }
668 if ( __kmp_get_futex_lock_owner( lck ) != gtid ) {
669 KMP_FATAL( LockUnsettingSetByAnother, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000670 }
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000671 return __kmp_release_nested_futex_lock( lck, gtid );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000672}
673
674void
675__kmp_init_nested_futex_lock( kmp_futex_lock_t * lck )
676{
677 __kmp_init_futex_lock( lck );
678 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
679}
680
681static void
682__kmp_init_nested_futex_lock_with_checks( kmp_futex_lock_t * lck )
683{
684 __kmp_init_nested_futex_lock( lck );
685}
686
687void
688__kmp_destroy_nested_futex_lock( kmp_futex_lock_t *lck )
689{
690 __kmp_destroy_futex_lock( lck );
691 lck->lk.depth_locked = 0;
692}
693
694static void
695__kmp_destroy_nested_futex_lock_with_checks( kmp_futex_lock_t *lck )
696{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000697 char const * const func = "omp_destroy_nest_lock";
698 if ( ! __kmp_is_futex_lock_nestable( lck ) ) {
699 KMP_FATAL( LockSimpleUsedAsNestable, func );
700 }
701 if ( __kmp_get_futex_lock_owner( lck ) != -1 ) {
702 KMP_FATAL( LockStillOwned, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000703 }
704 __kmp_destroy_nested_futex_lock( lck );
705}
706
Jim Cownie181b4bb2013-12-23 17:28:57 +0000707#endif // KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
Jim Cownie5e8470a2013-09-27 10:38:44 +0000708
709
710/* ------------------------------------------------------------------------ */
711/* ticket (bakery) locks */
712
713static kmp_int32
714__kmp_get_ticket_lock_owner( kmp_ticket_lock_t *lck )
715{
716 return TCR_4( lck->lk.owner_id ) - 1;
717}
718
719static inline bool
720__kmp_is_ticket_lock_nestable( kmp_ticket_lock_t *lck )
721{
722 return lck->lk.depth_locked != -1;
723}
724
725static kmp_uint32
726__kmp_bakery_check(kmp_uint value, kmp_uint checker)
727{
728 register kmp_uint32 pause;
729
730 if (value == checker) {
731 return TRUE;
732 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000733 for (pause = checker - value; pause != 0; --pause);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000734 return FALSE;
735}
736
737__forceinline static void
738__kmp_acquire_ticket_lock_timed_template( kmp_ticket_lock_t *lck, kmp_int32 gtid )
739{
740 kmp_uint32 my_ticket;
741 KMP_MB();
742
743 my_ticket = KMP_TEST_THEN_INC32( (kmp_int32 *) &lck->lk.next_ticket );
744
745#ifdef USE_LOCK_PROFILE
746 if ( TCR_4( lck->lk.now_serving ) != my_ticket )
747 __kmp_printf( "LOCK CONTENTION: %p\n", lck );
748 /* else __kmp_printf( "." );*/
749#endif /* USE_LOCK_PROFILE */
750
751 if ( TCR_4( lck->lk.now_serving ) == my_ticket ) {
752 KMP_FSYNC_ACQUIRED(lck);
753 return;
754 }
755 KMP_WAIT_YIELD( &lck->lk.now_serving, my_ticket, __kmp_bakery_check, lck );
756 KMP_FSYNC_ACQUIRED(lck);
757}
758
759void
760__kmp_acquire_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
761{
762 __kmp_acquire_ticket_lock_timed_template( lck, gtid );
763}
764
765static void
766__kmp_acquire_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
767{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000768 char const * const func = "omp_set_lock";
769 if ( lck->lk.initialized != lck ) {
770 KMP_FATAL( LockIsUninitialized, func );
771 }
772 if ( __kmp_is_ticket_lock_nestable( lck ) ) {
773 KMP_FATAL( LockNestableUsedAsSimple, func );
774 }
775 if ( ( gtid >= 0 ) && ( __kmp_get_ticket_lock_owner( lck ) == gtid ) ) {
776 KMP_FATAL( LockIsAlreadyOwned, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000777 }
778
779 __kmp_acquire_ticket_lock( lck, gtid );
780
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000781 lck->lk.owner_id = gtid + 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000782}
783
784int
785__kmp_test_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
786{
787 kmp_uint32 my_ticket = TCR_4( lck->lk.next_ticket );
788 if ( TCR_4( lck->lk.now_serving ) == my_ticket ) {
789 kmp_uint32 next_ticket = my_ticket + 1;
790 if ( KMP_COMPARE_AND_STORE_ACQ32( (kmp_int32 *) &lck->lk.next_ticket,
791 my_ticket, next_ticket ) ) {
792 KMP_FSYNC_ACQUIRED( lck );
793 return TRUE;
794 }
795 }
796 return FALSE;
797}
798
799static int
800__kmp_test_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
801{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000802 char const * const func = "omp_test_lock";
803 if ( lck->lk.initialized != lck ) {
804 KMP_FATAL( LockIsUninitialized, func );
805 }
806 if ( __kmp_is_ticket_lock_nestable( lck ) ) {
807 KMP_FATAL( LockNestableUsedAsSimple, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000808 }
809
810 int retval = __kmp_test_ticket_lock( lck, gtid );
811
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000812 if ( retval ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000813 lck->lk.owner_id = gtid + 1;
814 }
815 return retval;
816}
817
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000818int
Jim Cownie5e8470a2013-09-27 10:38:44 +0000819__kmp_release_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
820{
821 kmp_uint32 distance;
822
823 KMP_MB(); /* Flush all pending memory write invalidates. */
824
825 KMP_FSYNC_RELEASING(lck);
826 distance = ( TCR_4( lck->lk.next_ticket ) - TCR_4( lck->lk.now_serving ) );
827
828 KMP_ST_REL32( &(lck->lk.now_serving), lck->lk.now_serving + 1 );
829
830 KMP_MB(); /* Flush all pending memory write invalidates. */
831
832 KMP_YIELD( distance
833 > (kmp_uint32) (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc) );
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000834 return KMP_LOCK_RELEASED;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000835}
836
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000837static int
Jim Cownie5e8470a2013-09-27 10:38:44 +0000838__kmp_release_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
839{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000840 char const * const func = "omp_unset_lock";
841 KMP_MB(); /* in case another processor initialized lock */
842 if ( lck->lk.initialized != lck ) {
843 KMP_FATAL( LockIsUninitialized, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000844 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000845 if ( __kmp_is_ticket_lock_nestable( lck ) ) {
846 KMP_FATAL( LockNestableUsedAsSimple, func );
847 }
848 if ( __kmp_get_ticket_lock_owner( lck ) == -1 ) {
849 KMP_FATAL( LockUnsettingFree, func );
850 }
851 if ( ( gtid >= 0 ) && ( __kmp_get_ticket_lock_owner( lck ) >= 0 )
852 && ( __kmp_get_ticket_lock_owner( lck ) != gtid ) ) {
853 KMP_FATAL( LockUnsettingSetByAnother, func );
854 }
855 lck->lk.owner_id = 0;
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000856 return __kmp_release_ticket_lock( lck, gtid );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000857}
858
859void
860__kmp_init_ticket_lock( kmp_ticket_lock_t * lck )
861{
862 lck->lk.location = NULL;
863 TCW_4( lck->lk.next_ticket, 0 );
864 TCW_4( lck->lk.now_serving, 0 );
865 lck->lk.owner_id = 0; // no thread owns the lock.
866 lck->lk.depth_locked = -1; // -1 => not a nested lock.
867 lck->lk.initialized = (kmp_ticket_lock *)lck;
868}
869
870static void
871__kmp_init_ticket_lock_with_checks( kmp_ticket_lock_t * lck )
872{
873 __kmp_init_ticket_lock( lck );
874}
875
876void
877__kmp_destroy_ticket_lock( kmp_ticket_lock_t *lck )
878{
879 lck->lk.initialized = NULL;
880 lck->lk.location = NULL;
881 lck->lk.next_ticket = 0;
882 lck->lk.now_serving = 0;
883 lck->lk.owner_id = 0;
884 lck->lk.depth_locked = -1;
885}
886
887static void
888__kmp_destroy_ticket_lock_with_checks( kmp_ticket_lock_t *lck )
889{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000890 char const * const func = "omp_destroy_lock";
891 if ( lck->lk.initialized != lck ) {
892 KMP_FATAL( LockIsUninitialized, func );
893 }
894 if ( __kmp_is_ticket_lock_nestable( lck ) ) {
895 KMP_FATAL( LockNestableUsedAsSimple, func );
896 }
897 if ( __kmp_get_ticket_lock_owner( lck ) != -1 ) {
898 KMP_FATAL( LockStillOwned, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000899 }
900 __kmp_destroy_ticket_lock( lck );
901}
902
903
904//
905// nested ticket locks
906//
907
908void
909__kmp_acquire_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
910{
911 KMP_DEBUG_ASSERT( gtid >= 0 );
912
913 if ( __kmp_get_ticket_lock_owner( lck ) == gtid ) {
914 lck->lk.depth_locked += 1;
915 }
916 else {
917 __kmp_acquire_ticket_lock_timed_template( lck, gtid );
918 KMP_MB();
919 lck->lk.depth_locked = 1;
920 KMP_MB();
921 lck->lk.owner_id = gtid + 1;
922 }
923}
924
925static void
926__kmp_acquire_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
927{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000928 char const * const func = "omp_set_nest_lock";
929 if ( lck->lk.initialized != lck ) {
930 KMP_FATAL( LockIsUninitialized, func );
931 }
932 if ( ! __kmp_is_ticket_lock_nestable( lck ) ) {
933 KMP_FATAL( LockSimpleUsedAsNestable, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000934 }
935 __kmp_acquire_nested_ticket_lock( lck, gtid );
936}
937
938int
939__kmp_test_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
940{
941 int retval;
942
943 KMP_DEBUG_ASSERT( gtid >= 0 );
944
945 if ( __kmp_get_ticket_lock_owner( lck ) == gtid ) {
946 retval = ++lck->lk.depth_locked;
947 }
948 else if ( !__kmp_test_ticket_lock( lck, gtid ) ) {
949 retval = 0;
950 }
951 else {
952 KMP_MB();
953 retval = lck->lk.depth_locked = 1;
954 KMP_MB();
955 lck->lk.owner_id = gtid + 1;
956 }
957 return retval;
958}
959
960static int
961__kmp_test_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck,
962 kmp_int32 gtid )
963{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000964 char const * const func = "omp_test_nest_lock";
965 if ( lck->lk.initialized != lck ) {
966 KMP_FATAL( LockIsUninitialized, func );
967 }
968 if ( ! __kmp_is_ticket_lock_nestable( lck ) ) {
969 KMP_FATAL( LockSimpleUsedAsNestable, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000970 }
971 return __kmp_test_nested_ticket_lock( lck, gtid );
972}
973
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000974int
Jim Cownie5e8470a2013-09-27 10:38:44 +0000975__kmp_release_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
976{
977 KMP_DEBUG_ASSERT( gtid >= 0 );
978
979 KMP_MB();
980 if ( --(lck->lk.depth_locked) == 0 ) {
981 KMP_MB();
982 lck->lk.owner_id = 0;
983 __kmp_release_ticket_lock( lck, gtid );
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000984 return KMP_LOCK_RELEASED;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000985 }
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000986 return KMP_LOCK_STILL_HELD;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000987}
988
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000989static int
Jim Cownie5e8470a2013-09-27 10:38:44 +0000990__kmp_release_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
991{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000992 char const * const func = "omp_unset_nest_lock";
993 KMP_MB(); /* in case another processor initialized lock */
994 if ( lck->lk.initialized != lck ) {
995 KMP_FATAL( LockIsUninitialized, func );
996 }
997 if ( ! __kmp_is_ticket_lock_nestable( lck ) ) {
998 KMP_FATAL( LockSimpleUsedAsNestable, func );
999 }
1000 if ( __kmp_get_ticket_lock_owner( lck ) == -1 ) {
1001 KMP_FATAL( LockUnsettingFree, func );
1002 }
1003 if ( __kmp_get_ticket_lock_owner( lck ) != gtid ) {
1004 KMP_FATAL( LockUnsettingSetByAnother, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001005 }
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00001006 return __kmp_release_nested_ticket_lock( lck, gtid );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001007}
1008
1009void
1010__kmp_init_nested_ticket_lock( kmp_ticket_lock_t * lck )
1011{
1012 __kmp_init_ticket_lock( lck );
1013 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
1014}
1015
1016static void
1017__kmp_init_nested_ticket_lock_with_checks( kmp_ticket_lock_t * lck )
1018{
1019 __kmp_init_nested_ticket_lock( lck );
1020}
1021
1022void
1023__kmp_destroy_nested_ticket_lock( kmp_ticket_lock_t *lck )
1024{
1025 __kmp_destroy_ticket_lock( lck );
1026 lck->lk.depth_locked = 0;
1027}
1028
1029static void
1030__kmp_destroy_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck )
1031{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001032 char const * const func = "omp_destroy_nest_lock";
1033 if ( lck->lk.initialized != lck ) {
1034 KMP_FATAL( LockIsUninitialized, func );
1035 }
1036 if ( ! __kmp_is_ticket_lock_nestable( lck ) ) {
1037 KMP_FATAL( LockSimpleUsedAsNestable, func );
1038 }
1039 if ( __kmp_get_ticket_lock_owner( lck ) != -1 ) {
1040 KMP_FATAL( LockStillOwned, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001041 }
1042 __kmp_destroy_nested_ticket_lock( lck );
1043}
1044
1045
1046//
1047// access functions to fields which don't exist for all lock kinds.
1048//
1049
1050static int
1051__kmp_is_ticket_lock_initialized( kmp_ticket_lock_t *lck )
1052{
1053 return lck == lck->lk.initialized;
1054}
1055
1056static const ident_t *
1057__kmp_get_ticket_lock_location( kmp_ticket_lock_t *lck )
1058{
1059 return lck->lk.location;
1060}
1061
1062static void
1063__kmp_set_ticket_lock_location( kmp_ticket_lock_t *lck, const ident_t *loc )
1064{
1065 lck->lk.location = loc;
1066}
1067
1068static kmp_lock_flags_t
1069__kmp_get_ticket_lock_flags( kmp_ticket_lock_t *lck )
1070{
1071 return lck->lk.flags;
1072}
1073
1074static void
1075__kmp_set_ticket_lock_flags( kmp_ticket_lock_t *lck, kmp_lock_flags_t flags )
1076{
1077 lck->lk.flags = flags;
1078}
1079
1080/* ------------------------------------------------------------------------ */
1081/* queuing locks */
1082
1083/*
1084 * First the states
1085 * (head,tail) = 0, 0 means lock is unheld, nobody on queue
1086 * UINT_MAX or -1, 0 means lock is held, nobody on queue
1087 * h, h means lock is held or about to transition, 1 element on queue
1088 * h, t h <> t, means lock is held or about to transition, >1 elements on queue
1089 *
1090 * Now the transitions
1091 * Acquire(0,0) = -1 ,0
1092 * Release(0,0) = Error
1093 * Acquire(-1,0) = h ,h h > 0
1094 * Release(-1,0) = 0 ,0
1095 * Acquire(h,h) = h ,t h > 0, t > 0, h <> t
1096 * Release(h,h) = -1 ,0 h > 0
1097 * Acquire(h,t) = h ,t' h > 0, t > 0, t' > 0, h <> t, h <> t', t <> t'
1098 * Release(h,t) = h',t h > 0, t > 0, h <> t, h <> h', h' maybe = t
1099 *
1100 * And pictorially
1101 *
1102 *
1103 * +-----+
1104 * | 0, 0|------- release -------> Error
1105 * +-----+
1106 * | ^
1107 * acquire| |release
1108 * | |
1109 * | |
1110 * v |
1111 * +-----+
1112 * |-1, 0|
1113 * +-----+
1114 * | ^
1115 * acquire| |release
1116 * | |
1117 * | |
1118 * v |
1119 * +-----+
1120 * | h, h|
1121 * +-----+
1122 * | ^
1123 * acquire| |release
1124 * | |
1125 * | |
1126 * v |
1127 * +-----+
1128 * | h, t|----- acquire, release loopback ---+
1129 * +-----+ |
1130 * ^ |
1131 * | |
1132 * +------------------------------------+
1133 *
1134 */
1135
1136#ifdef DEBUG_QUEUING_LOCKS
1137
1138/* Stuff for circular trace buffer */
1139#define TRACE_BUF_ELE 1024
1140static char traces[TRACE_BUF_ELE][128] = { 0 }
1141static int tc = 0;
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00001142#define TRACE_LOCK(X,Y) KMP_SNPRINTF( traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s\n", X, Y );
1143#define TRACE_LOCK_T(X,Y,Z) KMP_SNPRINTF( traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s%d\n", X,Y,Z );
1144#define TRACE_LOCK_HT(X,Y,Z,Q) KMP_SNPRINTF( traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s %d,%d\n", X, Y, Z, Q );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001145
1146static void
1147__kmp_dump_queuing_lock( kmp_info_t *this_thr, kmp_int32 gtid,
1148 kmp_queuing_lock_t *lck, kmp_int32 head_id, kmp_int32 tail_id )
1149{
1150 kmp_int32 t, i;
1151
1152 __kmp_printf_no_lock( "\n__kmp_dump_queuing_lock: TRACE BEGINS HERE! \n" );
1153
1154 i = tc % TRACE_BUF_ELE;
1155 __kmp_printf_no_lock( "%s\n", traces[i] );
1156 i = (i+1) % TRACE_BUF_ELE;
1157 while ( i != (tc % TRACE_BUF_ELE) ) {
1158 __kmp_printf_no_lock( "%s", traces[i] );
1159 i = (i+1) % TRACE_BUF_ELE;
1160 }
1161 __kmp_printf_no_lock( "\n" );
1162
1163 __kmp_printf_no_lock(
1164 "\n__kmp_dump_queuing_lock: gtid+1:%d, spin_here:%d, next_wait:%d, head_id:%d, tail_id:%d\n",
1165 gtid+1, this_thr->th.th_spin_here, this_thr->th.th_next_waiting,
1166 head_id, tail_id );
1167
1168 __kmp_printf_no_lock( "\t\thead: %d ", lck->lk.head_id );
1169
1170 if ( lck->lk.head_id >= 1 ) {
1171 t = __kmp_threads[lck->lk.head_id-1]->th.th_next_waiting;
1172 while (t > 0) {
1173 __kmp_printf_no_lock( "-> %d ", t );
1174 t = __kmp_threads[t-1]->th.th_next_waiting;
1175 }
1176 }
1177 __kmp_printf_no_lock( "; tail: %d ", lck->lk.tail_id );
1178 __kmp_printf_no_lock( "\n\n" );
1179}
1180
1181#endif /* DEBUG_QUEUING_LOCKS */
1182
1183static kmp_int32
1184__kmp_get_queuing_lock_owner( kmp_queuing_lock_t *lck )
1185{
1186 return TCR_4( lck->lk.owner_id ) - 1;
1187}
1188
1189static inline bool
1190__kmp_is_queuing_lock_nestable( kmp_queuing_lock_t *lck )
1191{
1192 return lck->lk.depth_locked != -1;
1193}
1194
1195/* Acquire a lock using a the queuing lock implementation */
1196template <bool takeTime>
1197/* [TLW] The unused template above is left behind because of what BEB believes is a
1198 potential compiler problem with __forceinline. */
1199__forceinline static void
1200__kmp_acquire_queuing_lock_timed_template( kmp_queuing_lock_t *lck,
1201 kmp_int32 gtid )
1202{
1203 register kmp_info_t *this_thr = __kmp_thread_from_gtid( gtid );
1204 volatile kmp_int32 *head_id_p = & lck->lk.head_id;
1205 volatile kmp_int32 *tail_id_p = & lck->lk.tail_id;
1206 volatile kmp_uint32 *spin_here_p;
1207 kmp_int32 need_mf = 1;
1208
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001209#if OMPT_SUPPORT
1210 ompt_state_t prev_state = ompt_state_undefined;
1211#endif
1212
Jim Cownie5e8470a2013-09-27 10:38:44 +00001213 KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d entering\n", lck, gtid ));
1214
1215 KMP_FSYNC_PREPARE( lck );
1216 KMP_DEBUG_ASSERT( this_thr != NULL );
1217 spin_here_p = & this_thr->th.th_spin_here;
1218
1219#ifdef DEBUG_QUEUING_LOCKS
1220 TRACE_LOCK( gtid+1, "acq ent" );
1221 if ( *spin_here_p )
1222 __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1223 if ( this_thr->th.th_next_waiting != 0 )
1224 __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1225#endif
1226 KMP_DEBUG_ASSERT( !*spin_here_p );
1227 KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 );
1228
1229
1230 /* The following st.rel to spin_here_p needs to precede the cmpxchg.acq to head_id_p
1231 that may follow, not just in execution order, but also in visibility order. This way,
1232 when a releasing thread observes the changes to the queue by this thread, it can
1233 rightly assume that spin_here_p has already been set to TRUE, so that when it sets
1234 spin_here_p to FALSE, it is not premature. If the releasing thread sets spin_here_p
1235 to FALSE before this thread sets it to TRUE, this thread will hang.
1236 */
1237 *spin_here_p = TRUE; /* before enqueuing to prevent race */
1238
1239 while( 1 ) {
1240 kmp_int32 enqueued;
1241 kmp_int32 head;
1242 kmp_int32 tail;
1243
1244 head = *head_id_p;
1245
1246 switch ( head ) {
1247
1248 case -1:
1249 {
1250#ifdef DEBUG_QUEUING_LOCKS
1251 tail = *tail_id_p;
1252 TRACE_LOCK_HT( gtid+1, "acq read: ", head, tail );
1253#endif
1254 tail = 0; /* to make sure next link asynchronously read is not set accidentally;
1255 this assignment prevents us from entering the if ( t > 0 )
1256 condition in the enqueued case below, which is not necessary for
1257 this state transition */
1258
1259 need_mf = 0;
1260 /* try (-1,0)->(tid,tid) */
1261 enqueued = KMP_COMPARE_AND_STORE_ACQ64( (volatile kmp_int64 *) tail_id_p,
1262 KMP_PACK_64( -1, 0 ),
1263 KMP_PACK_64( gtid+1, gtid+1 ) );
1264#ifdef DEBUG_QUEUING_LOCKS
1265 if ( enqueued ) TRACE_LOCK( gtid+1, "acq enq: (-1,0)->(tid,tid)" );
1266#endif
1267 }
1268 break;
1269
1270 default:
1271 {
1272 tail = *tail_id_p;
1273 KMP_DEBUG_ASSERT( tail != gtid + 1 );
1274
1275#ifdef DEBUG_QUEUING_LOCKS
1276 TRACE_LOCK_HT( gtid+1, "acq read: ", head, tail );
1277#endif
1278
1279 if ( tail == 0 ) {
1280 enqueued = FALSE;
1281 }
1282 else {
1283 need_mf = 0;
1284 /* try (h,t) or (h,h)->(h,tid) */
1285 enqueued = KMP_COMPARE_AND_STORE_ACQ32( tail_id_p, tail, gtid+1 );
1286
1287#ifdef DEBUG_QUEUING_LOCKS
1288 if ( enqueued ) TRACE_LOCK( gtid+1, "acq enq: (h,t)->(h,tid)" );
1289#endif
1290 }
1291 }
1292 break;
1293
1294 case 0: /* empty queue */
1295 {
1296 kmp_int32 grabbed_lock;
1297
1298#ifdef DEBUG_QUEUING_LOCKS
1299 tail = *tail_id_p;
1300 TRACE_LOCK_HT( gtid+1, "acq read: ", head, tail );
1301#endif
1302 /* try (0,0)->(-1,0) */
1303
1304 /* only legal transition out of head = 0 is head = -1 with no change to tail */
1305 grabbed_lock = KMP_COMPARE_AND_STORE_ACQ32( head_id_p, 0, -1 );
1306
1307 if ( grabbed_lock ) {
1308
1309 *spin_here_p = FALSE;
1310
1311 KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: no queuing\n",
1312 lck, gtid ));
1313#ifdef DEBUG_QUEUING_LOCKS
1314 TRACE_LOCK_HT( gtid+1, "acq exit: ", head, 0 );
1315#endif
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001316
1317#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001318 if (ompt_enabled && prev_state != ompt_state_undefined) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001319 /* change the state before clearing wait_id */
1320 this_thr->th.ompt_thread_info.state = prev_state;
1321 this_thr->th.ompt_thread_info.wait_id = 0;
1322 }
1323#endif
1324
Jim Cownie5e8470a2013-09-27 10:38:44 +00001325 KMP_FSYNC_ACQUIRED( lck );
1326 return; /* lock holder cannot be on queue */
1327 }
1328 enqueued = FALSE;
1329 }
1330 break;
1331 }
1332
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001333#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001334 if (ompt_enabled && prev_state == ompt_state_undefined) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001335 /* this thread will spin; set wait_id before entering wait state */
1336 prev_state = this_thr->th.ompt_thread_info.state;
1337 this_thr->th.ompt_thread_info.wait_id = (uint64_t) lck;
1338 this_thr->th.ompt_thread_info.state = ompt_state_wait_lock;
1339 }
1340#endif
1341
Jim Cownie5e8470a2013-09-27 10:38:44 +00001342 if ( enqueued ) {
1343 if ( tail > 0 ) {
1344 kmp_info_t *tail_thr = __kmp_thread_from_gtid( tail - 1 );
1345 KMP_ASSERT( tail_thr != NULL );
1346 tail_thr->th.th_next_waiting = gtid+1;
1347 /* corresponding wait for this write in release code */
1348 }
1349 KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d waiting for lock\n", lck, gtid ));
1350
1351
1352 /* ToDo: May want to consider using __kmp_wait_sleep or something that sleeps for
1353 * throughput only here.
1354 */
1355 KMP_MB();
1356 KMP_WAIT_YIELD(spin_here_p, FALSE, KMP_EQ, lck);
1357
1358#ifdef DEBUG_QUEUING_LOCKS
1359 TRACE_LOCK( gtid+1, "acq spin" );
1360
1361 if ( this_thr->th.th_next_waiting != 0 )
1362 __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1363#endif
1364 KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 );
1365 KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: after waiting on queue\n",
1366 lck, gtid ));
1367
1368#ifdef DEBUG_QUEUING_LOCKS
1369 TRACE_LOCK( gtid+1, "acq exit 2" );
1370#endif
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001371
1372#if OMPT_SUPPORT
1373 /* change the state before clearing wait_id */
1374 this_thr->th.ompt_thread_info.state = prev_state;
1375 this_thr->th.ompt_thread_info.wait_id = 0;
1376#endif
1377
Jim Cownie5e8470a2013-09-27 10:38:44 +00001378 /* got lock, we were dequeued by the thread that released lock */
1379 return;
1380 }
1381
1382 /* Yield if number of threads > number of logical processors */
1383 /* ToDo: Not sure why this should only be in oversubscription case,
1384 maybe should be traditional YIELD_INIT/YIELD_WHEN loop */
1385 KMP_YIELD( TCR_4( __kmp_nth ) > (__kmp_avail_proc ? __kmp_avail_proc :
1386 __kmp_xproc ) );
1387#ifdef DEBUG_QUEUING_LOCKS
1388 TRACE_LOCK( gtid+1, "acq retry" );
1389#endif
1390
1391 }
1392 KMP_ASSERT2( 0, "should not get here" );
1393}
1394
1395void
1396__kmp_acquire_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1397{
1398 KMP_DEBUG_ASSERT( gtid >= 0 );
1399
1400 __kmp_acquire_queuing_lock_timed_template<false>( lck, gtid );
1401}
1402
1403static void
1404__kmp_acquire_queuing_lock_with_checks( kmp_queuing_lock_t *lck,
1405 kmp_int32 gtid )
1406{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001407 char const * const func = "omp_set_lock";
1408 if ( lck->lk.initialized != lck ) {
1409 KMP_FATAL( LockIsUninitialized, func );
1410 }
1411 if ( __kmp_is_queuing_lock_nestable( lck ) ) {
1412 KMP_FATAL( LockNestableUsedAsSimple, func );
1413 }
1414 if ( __kmp_get_queuing_lock_owner( lck ) == gtid ) {
1415 KMP_FATAL( LockIsAlreadyOwned, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001416 }
1417
1418 __kmp_acquire_queuing_lock( lck, gtid );
1419
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001420 lck->lk.owner_id = gtid + 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001421}
1422
1423int
1424__kmp_test_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1425{
1426 volatile kmp_int32 *head_id_p = & lck->lk.head_id;
1427 kmp_int32 head;
1428#ifdef KMP_DEBUG
1429 kmp_info_t *this_thr;
1430#endif
1431
1432 KA_TRACE( 1000, ("__kmp_test_queuing_lock: T#%d entering\n", gtid ));
1433 KMP_DEBUG_ASSERT( gtid >= 0 );
1434#ifdef KMP_DEBUG
1435 this_thr = __kmp_thread_from_gtid( gtid );
1436 KMP_DEBUG_ASSERT( this_thr != NULL );
1437 KMP_DEBUG_ASSERT( !this_thr->th.th_spin_here );
1438#endif
1439
1440 head = *head_id_p;
1441
1442 if ( head == 0 ) { /* nobody on queue, nobody holding */
1443
1444 /* try (0,0)->(-1,0) */
1445
1446 if ( KMP_COMPARE_AND_STORE_ACQ32( head_id_p, 0, -1 ) ) {
1447 KA_TRACE( 1000, ("__kmp_test_queuing_lock: T#%d exiting: holding lock\n", gtid ));
1448 KMP_FSYNC_ACQUIRED(lck);
1449 return TRUE;
1450 }
1451 }
1452
1453 KA_TRACE( 1000, ("__kmp_test_queuing_lock: T#%d exiting: without lock\n", gtid ));
1454 return FALSE;
1455}
1456
1457static int
1458__kmp_test_queuing_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1459{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001460 char const * const func = "omp_test_lock";
1461 if ( lck->lk.initialized != lck ) {
1462 KMP_FATAL( LockIsUninitialized, func );
1463 }
1464 if ( __kmp_is_queuing_lock_nestable( lck ) ) {
1465 KMP_FATAL( LockNestableUsedAsSimple, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001466 }
1467
1468 int retval = __kmp_test_queuing_lock( lck, gtid );
1469
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001470 if ( retval ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001471 lck->lk.owner_id = gtid + 1;
1472 }
1473 return retval;
1474}
1475
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00001476int
Jim Cownie5e8470a2013-09-27 10:38:44 +00001477__kmp_release_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1478{
1479 register kmp_info_t *this_thr;
1480 volatile kmp_int32 *head_id_p = & lck->lk.head_id;
1481 volatile kmp_int32 *tail_id_p = & lck->lk.tail_id;
1482
1483 KA_TRACE( 1000, ("__kmp_release_queuing_lock: lck:%p, T#%d entering\n", lck, gtid ));
1484 KMP_DEBUG_ASSERT( gtid >= 0 );
1485 this_thr = __kmp_thread_from_gtid( gtid );
1486 KMP_DEBUG_ASSERT( this_thr != NULL );
1487#ifdef DEBUG_QUEUING_LOCKS
1488 TRACE_LOCK( gtid+1, "rel ent" );
1489
1490 if ( this_thr->th.th_spin_here )
1491 __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1492 if ( this_thr->th.th_next_waiting != 0 )
1493 __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1494#endif
1495 KMP_DEBUG_ASSERT( !this_thr->th.th_spin_here );
1496 KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 );
1497
1498 KMP_FSYNC_RELEASING(lck);
1499
1500 while( 1 ) {
1501 kmp_int32 dequeued;
1502 kmp_int32 head;
1503 kmp_int32 tail;
1504
1505 head = *head_id_p;
1506
1507#ifdef DEBUG_QUEUING_LOCKS
1508 tail = *tail_id_p;
1509 TRACE_LOCK_HT( gtid+1, "rel read: ", head, tail );
1510 if ( head == 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail );
1511#endif
1512 KMP_DEBUG_ASSERT( head != 0 ); /* holding the lock, head must be -1 or queue head */
1513
1514 if ( head == -1 ) { /* nobody on queue */
1515
1516 /* try (-1,0)->(0,0) */
1517 if ( KMP_COMPARE_AND_STORE_REL32( head_id_p, -1, 0 ) ) {
1518 KA_TRACE( 1000, ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: queue empty\n",
1519 lck, gtid ));
1520#ifdef DEBUG_QUEUING_LOCKS
1521 TRACE_LOCK_HT( gtid+1, "rel exit: ", 0, 0 );
1522#endif
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001523
1524#if OMPT_SUPPORT
1525 /* nothing to do - no other thread is trying to shift blame */
1526#endif
1527
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00001528 return KMP_LOCK_RELEASED;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001529 }
1530 dequeued = FALSE;
1531
1532 }
1533 else {
1534
1535 tail = *tail_id_p;
1536 if ( head == tail ) { /* only one thread on the queue */
1537
1538#ifdef DEBUG_QUEUING_LOCKS
1539 if ( head <= 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail );
1540#endif
1541 KMP_DEBUG_ASSERT( head > 0 );
1542
1543 /* try (h,h)->(-1,0) */
1544 dequeued = KMP_COMPARE_AND_STORE_REL64( (kmp_int64 *) tail_id_p,
1545 KMP_PACK_64( head, head ), KMP_PACK_64( -1, 0 ) );
1546#ifdef DEBUG_QUEUING_LOCKS
1547 TRACE_LOCK( gtid+1, "rel deq: (h,h)->(-1,0)" );
1548#endif
1549
1550 }
1551 else {
1552 volatile kmp_int32 *waiting_id_p;
1553 kmp_info_t *head_thr = __kmp_thread_from_gtid( head - 1 );
1554 KMP_DEBUG_ASSERT( head_thr != NULL );
1555 waiting_id_p = & head_thr->th.th_next_waiting;
1556
1557 /* Does this require synchronous reads? */
1558#ifdef DEBUG_QUEUING_LOCKS
1559 if ( head <= 0 || tail <= 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail );
1560#endif
1561 KMP_DEBUG_ASSERT( head > 0 && tail > 0 );
1562
1563 /* try (h,t)->(h',t) or (t,t) */
1564
1565 KMP_MB();
1566 /* make sure enqueuing thread has time to update next waiting thread field */
1567 *head_id_p = (kmp_int32) KMP_WAIT_YIELD((volatile kmp_uint*) waiting_id_p, 0, KMP_NEQ, NULL);
1568#ifdef DEBUG_QUEUING_LOCKS
1569 TRACE_LOCK( gtid+1, "rel deq: (h,t)->(h',t)" );
1570#endif
1571 dequeued = TRUE;
1572 }
1573 }
1574
1575 if ( dequeued ) {
1576 kmp_info_t *head_thr = __kmp_thread_from_gtid( head - 1 );
1577 KMP_DEBUG_ASSERT( head_thr != NULL );
1578
1579 /* Does this require synchronous reads? */
1580#ifdef DEBUG_QUEUING_LOCKS
1581 if ( head <= 0 || tail <= 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail );
1582#endif
1583 KMP_DEBUG_ASSERT( head > 0 && tail > 0 );
1584
1585 /* For clean code only.
1586 * Thread not released until next statement prevents race with acquire code.
1587 */
1588 head_thr->th.th_next_waiting = 0;
1589#ifdef DEBUG_QUEUING_LOCKS
1590 TRACE_LOCK_T( gtid+1, "rel nw=0 for t=", head );
1591#endif
1592
1593 KMP_MB();
1594 /* reset spin value */
1595 head_thr->th.th_spin_here = FALSE;
1596
1597 KA_TRACE( 1000, ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: after dequeuing\n",
1598 lck, gtid ));
1599#ifdef DEBUG_QUEUING_LOCKS
1600 TRACE_LOCK( gtid+1, "rel exit 2" );
1601#endif
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00001602 return KMP_LOCK_RELEASED;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001603 }
1604 /* KMP_CPU_PAUSE( ); don't want to make releasing thread hold up acquiring threads */
1605
1606#ifdef DEBUG_QUEUING_LOCKS
1607 TRACE_LOCK( gtid+1, "rel retry" );
1608#endif
1609
1610 } /* while */
1611 KMP_ASSERT2( 0, "should not get here" );
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00001612 return KMP_LOCK_RELEASED;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001613}
1614
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00001615static int
Jim Cownie5e8470a2013-09-27 10:38:44 +00001616__kmp_release_queuing_lock_with_checks( kmp_queuing_lock_t *lck,
1617 kmp_int32 gtid )
1618{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001619 char const * const func = "omp_unset_lock";
1620 KMP_MB(); /* in case another processor initialized lock */
1621 if ( lck->lk.initialized != lck ) {
1622 KMP_FATAL( LockIsUninitialized, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001623 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001624 if ( __kmp_is_queuing_lock_nestable( lck ) ) {
1625 KMP_FATAL( LockNestableUsedAsSimple, func );
1626 }
1627 if ( __kmp_get_queuing_lock_owner( lck ) == -1 ) {
1628 KMP_FATAL( LockUnsettingFree, func );
1629 }
1630 if ( __kmp_get_queuing_lock_owner( lck ) != gtid ) {
1631 KMP_FATAL( LockUnsettingSetByAnother, func );
1632 }
1633 lck->lk.owner_id = 0;
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00001634 return __kmp_release_queuing_lock( lck, gtid );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001635}
1636
1637void
1638__kmp_init_queuing_lock( kmp_queuing_lock_t *lck )
1639{
1640 lck->lk.location = NULL;
1641 lck->lk.head_id = 0;
1642 lck->lk.tail_id = 0;
1643 lck->lk.next_ticket = 0;
1644 lck->lk.now_serving = 0;
1645 lck->lk.owner_id = 0; // no thread owns the lock.
1646 lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks.
1647 lck->lk.initialized = lck;
1648
1649 KA_TRACE(1000, ("__kmp_init_queuing_lock: lock %p initialized\n", lck));
1650}
1651
1652static void
1653__kmp_init_queuing_lock_with_checks( kmp_queuing_lock_t * lck )
1654{
1655 __kmp_init_queuing_lock( lck );
1656}
1657
1658void
1659__kmp_destroy_queuing_lock( kmp_queuing_lock_t *lck )
1660{
1661 lck->lk.initialized = NULL;
1662 lck->lk.location = NULL;
1663 lck->lk.head_id = 0;
1664 lck->lk.tail_id = 0;
1665 lck->lk.next_ticket = 0;
1666 lck->lk.now_serving = 0;
1667 lck->lk.owner_id = 0;
1668 lck->lk.depth_locked = -1;
1669}
1670
1671static void
1672__kmp_destroy_queuing_lock_with_checks( kmp_queuing_lock_t *lck )
1673{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001674 char const * const func = "omp_destroy_lock";
1675 if ( lck->lk.initialized != lck ) {
1676 KMP_FATAL( LockIsUninitialized, func );
1677 }
1678 if ( __kmp_is_queuing_lock_nestable( lck ) ) {
1679 KMP_FATAL( LockNestableUsedAsSimple, func );
1680 }
1681 if ( __kmp_get_queuing_lock_owner( lck ) != -1 ) {
1682 KMP_FATAL( LockStillOwned, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001683 }
1684 __kmp_destroy_queuing_lock( lck );
1685}
1686
1687
1688//
1689// nested queuing locks
1690//
1691
1692void
1693__kmp_acquire_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1694{
1695 KMP_DEBUG_ASSERT( gtid >= 0 );
1696
1697 if ( __kmp_get_queuing_lock_owner( lck ) == gtid ) {
1698 lck->lk.depth_locked += 1;
1699 }
1700 else {
1701 __kmp_acquire_queuing_lock_timed_template<false>( lck, gtid );
1702 KMP_MB();
1703 lck->lk.depth_locked = 1;
1704 KMP_MB();
1705 lck->lk.owner_id = gtid + 1;
1706 }
1707}
1708
1709static void
1710__kmp_acquire_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1711{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001712 char const * const func = "omp_set_nest_lock";
1713 if ( lck->lk.initialized != lck ) {
1714 KMP_FATAL( LockIsUninitialized, func );
1715 }
1716 if ( ! __kmp_is_queuing_lock_nestable( lck ) ) {
1717 KMP_FATAL( LockSimpleUsedAsNestable, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001718 }
1719 __kmp_acquire_nested_queuing_lock( lck, gtid );
1720}
1721
1722int
1723__kmp_test_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1724{
1725 int retval;
1726
1727 KMP_DEBUG_ASSERT( gtid >= 0 );
1728
1729 if ( __kmp_get_queuing_lock_owner( lck ) == gtid ) {
1730 retval = ++lck->lk.depth_locked;
1731 }
1732 else if ( !__kmp_test_queuing_lock( lck, gtid ) ) {
1733 retval = 0;
1734 }
1735 else {
1736 KMP_MB();
1737 retval = lck->lk.depth_locked = 1;
1738 KMP_MB();
1739 lck->lk.owner_id = gtid + 1;
1740 }
1741 return retval;
1742}
1743
1744static int
1745__kmp_test_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck,
1746 kmp_int32 gtid )
1747{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001748 char const * const func = "omp_test_nest_lock";
1749 if ( lck->lk.initialized != lck ) {
1750 KMP_FATAL( LockIsUninitialized, func );
1751 }
1752 if ( ! __kmp_is_queuing_lock_nestable( lck ) ) {
1753 KMP_FATAL( LockSimpleUsedAsNestable, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001754 }
1755 return __kmp_test_nested_queuing_lock( lck, gtid );
1756}
1757
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00001758int
Jim Cownie5e8470a2013-09-27 10:38:44 +00001759__kmp_release_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1760{
1761 KMP_DEBUG_ASSERT( gtid >= 0 );
1762
1763 KMP_MB();
1764 if ( --(lck->lk.depth_locked) == 0 ) {
1765 KMP_MB();
1766 lck->lk.owner_id = 0;
1767 __kmp_release_queuing_lock( lck, gtid );
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00001768 return KMP_LOCK_RELEASED;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001769 }
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00001770 return KMP_LOCK_STILL_HELD;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001771}
1772
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00001773static int
Jim Cownie5e8470a2013-09-27 10:38:44 +00001774__kmp_release_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1775{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001776 char const * const func = "omp_unset_nest_lock";
1777 KMP_MB(); /* in case another processor initialized lock */
1778 if ( lck->lk.initialized != lck ) {
1779 KMP_FATAL( LockIsUninitialized, func );
1780 }
1781 if ( ! __kmp_is_queuing_lock_nestable( lck ) ) {
1782 KMP_FATAL( LockSimpleUsedAsNestable, func );
1783 }
1784 if ( __kmp_get_queuing_lock_owner( lck ) == -1 ) {
1785 KMP_FATAL( LockUnsettingFree, func );
1786 }
1787 if ( __kmp_get_queuing_lock_owner( lck ) != gtid ) {
1788 KMP_FATAL( LockUnsettingSetByAnother, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001789 }
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00001790 return __kmp_release_nested_queuing_lock( lck, gtid );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001791}
1792
1793void
1794__kmp_init_nested_queuing_lock( kmp_queuing_lock_t * lck )
1795{
1796 __kmp_init_queuing_lock( lck );
1797 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
1798}
1799
1800static void
1801__kmp_init_nested_queuing_lock_with_checks( kmp_queuing_lock_t * lck )
1802{
1803 __kmp_init_nested_queuing_lock( lck );
1804}
1805
1806void
1807__kmp_destroy_nested_queuing_lock( kmp_queuing_lock_t *lck )
1808{
1809 __kmp_destroy_queuing_lock( lck );
1810 lck->lk.depth_locked = 0;
1811}
1812
1813static void
1814__kmp_destroy_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck )
1815{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001816 char const * const func = "omp_destroy_nest_lock";
1817 if ( lck->lk.initialized != lck ) {
1818 KMP_FATAL( LockIsUninitialized, func );
1819 }
1820 if ( ! __kmp_is_queuing_lock_nestable( lck ) ) {
1821 KMP_FATAL( LockSimpleUsedAsNestable, func );
1822 }
1823 if ( __kmp_get_queuing_lock_owner( lck ) != -1 ) {
1824 KMP_FATAL( LockStillOwned, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001825 }
1826 __kmp_destroy_nested_queuing_lock( lck );
1827}
1828
1829
1830//
1831// access functions to fields which don't exist for all lock kinds.
1832//
1833
1834static int
1835__kmp_is_queuing_lock_initialized( kmp_queuing_lock_t *lck )
1836{
1837 return lck == lck->lk.initialized;
1838}
1839
1840static const ident_t *
1841__kmp_get_queuing_lock_location( kmp_queuing_lock_t *lck )
1842{
1843 return lck->lk.location;
1844}
1845
1846static void
1847__kmp_set_queuing_lock_location( kmp_queuing_lock_t *lck, const ident_t *loc )
1848{
1849 lck->lk.location = loc;
1850}
1851
1852static kmp_lock_flags_t
1853__kmp_get_queuing_lock_flags( kmp_queuing_lock_t *lck )
1854{
1855 return lck->lk.flags;
1856}
1857
1858static void
1859__kmp_set_queuing_lock_flags( kmp_queuing_lock_t *lck, kmp_lock_flags_t flags )
1860{
1861 lck->lk.flags = flags;
1862}
1863
1864#if KMP_USE_ADAPTIVE_LOCKS
1865
1866/*
1867 RTM Adaptive locks
1868*/
1869
1870// TODO: Use the header for intrinsics below with the compiler 13.0
1871//#include <immintrin.h>
1872
1873// Values from the status register after failed speculation.
1874#define _XBEGIN_STARTED (~0u)
1875#define _XABORT_EXPLICIT (1 << 0)
1876#define _XABORT_RETRY (1 << 1)
1877#define _XABORT_CONFLICT (1 << 2)
1878#define _XABORT_CAPACITY (1 << 3)
1879#define _XABORT_DEBUG (1 << 4)
1880#define _XABORT_NESTED (1 << 5)
1881#define _XABORT_CODE(x) ((unsigned char)(((x) >> 24) & 0xFF))
1882
1883// Aborts for which it's worth trying again immediately
1884#define SOFT_ABORT_MASK (_XABORT_RETRY | _XABORT_CONFLICT | _XABORT_EXPLICIT)
1885
1886#define STRINGIZE_INTERNAL(arg) #arg
1887#define STRINGIZE(arg) STRINGIZE_INTERNAL(arg)
1888
1889// Access to RTM instructions
1890
1891/*
1892 A version of XBegin which returns -1 on speculation, and the value of EAX on an abort.
1893 This is the same definition as the compiler intrinsic that will be supported at some point.
1894*/
1895static __inline int _xbegin()
1896{
1897 int res = -1;
1898
1899#if KMP_OS_WINDOWS
1900#if KMP_ARCH_X86_64
1901 _asm {
1902 _emit 0xC7
1903 _emit 0xF8
1904 _emit 2
1905 _emit 0
1906 _emit 0
1907 _emit 0
1908 jmp L2
1909 mov res, eax
1910 L2:
1911 }
1912#else /* IA32 */
1913 _asm {
1914 _emit 0xC7
1915 _emit 0xF8
1916 _emit 2
1917 _emit 0
1918 _emit 0
1919 _emit 0
1920 jmp L2
1921 mov res, eax
1922 L2:
1923 }
1924#endif // KMP_ARCH_X86_64
1925#else
1926 /* Note that %eax must be noted as killed (clobbered), because
1927 * the XSR is returned in %eax(%rax) on abort. Other register
1928 * values are restored, so don't need to be killed.
1929 *
1930 * We must also mark 'res' as an input and an output, since otherwise
1931 * 'res=-1' may be dropped as being dead, whereas we do need the
1932 * assignment on the successful (i.e., non-abort) path.
1933 */
1934 __asm__ volatile ("1: .byte 0xC7; .byte 0xF8;\n"
1935 " .long 1f-1b-6\n"
1936 " jmp 2f\n"
1937 "1: movl %%eax,%0\n"
1938 "2:"
1939 :"+r"(res)::"memory","%eax");
1940#endif // KMP_OS_WINDOWS
1941 return res;
1942}
1943
1944/*
1945 Transaction end
1946*/
1947static __inline void _xend()
1948{
1949#if KMP_OS_WINDOWS
1950 __asm {
1951 _emit 0x0f
1952 _emit 0x01
1953 _emit 0xd5
1954 }
1955#else
1956 __asm__ volatile (".byte 0x0f; .byte 0x01; .byte 0xd5" :::"memory");
1957#endif
1958}
1959
1960/*
1961 This is a macro, the argument must be a single byte constant which
1962 can be evaluated by the inline assembler, since it is emitted as a
1963 byte into the assembly code.
1964*/
1965#if KMP_OS_WINDOWS
1966#define _xabort(ARG) \
1967 _asm _emit 0xc6 \
1968 _asm _emit 0xf8 \
1969 _asm _emit ARG
1970#else
1971#define _xabort(ARG) \
1972 __asm__ volatile (".byte 0xC6; .byte 0xF8; .byte " STRINGIZE(ARG) :::"memory");
1973#endif
1974
1975//
1976// Statistics is collected for testing purpose
1977//
1978#if KMP_DEBUG_ADAPTIVE_LOCKS
1979
1980// We accumulate speculative lock statistics when the lock is destroyed.
1981// We keep locks that haven't been destroyed in the liveLocks list
1982// so that we can grab their statistics too.
1983static kmp_adaptive_lock_statistics_t destroyedStats;
1984
1985// To hold the list of live locks.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001986static kmp_adaptive_lock_info_t liveLocks;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001987
1988// A lock so we can safely update the list of locks.
1989static kmp_bootstrap_lock_t chain_lock;
1990
1991// Initialize the list of stats.
1992void
1993__kmp_init_speculative_stats()
1994{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001995 kmp_adaptive_lock_info_t *lck = &liveLocks;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001996
1997 memset( ( void * ) & ( lck->stats ), 0, sizeof( lck->stats ) );
1998 lck->stats.next = lck;
1999 lck->stats.prev = lck;
2000
2001 KMP_ASSERT( lck->stats.next->stats.prev == lck );
2002 KMP_ASSERT( lck->stats.prev->stats.next == lck );
2003
2004 __kmp_init_bootstrap_lock( &chain_lock );
2005
2006}
2007
2008// Insert the lock into the circular list
2009static void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002010__kmp_remember_lock( kmp_adaptive_lock_info_t * lck )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002011{
2012 __kmp_acquire_bootstrap_lock( &chain_lock );
2013
2014 lck->stats.next = liveLocks.stats.next;
2015 lck->stats.prev = &liveLocks;
2016
2017 liveLocks.stats.next = lck;
2018 lck->stats.next->stats.prev = lck;
2019
2020 KMP_ASSERT( lck->stats.next->stats.prev == lck );
2021 KMP_ASSERT( lck->stats.prev->stats.next == lck );
2022
2023 __kmp_release_bootstrap_lock( &chain_lock );
2024}
2025
2026static void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002027__kmp_forget_lock( kmp_adaptive_lock_info_t * lck )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002028{
2029 KMP_ASSERT( lck->stats.next->stats.prev == lck );
2030 KMP_ASSERT( lck->stats.prev->stats.next == lck );
2031
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002032 kmp_adaptive_lock_info_t * n = lck->stats.next;
2033 kmp_adaptive_lock_info_t * p = lck->stats.prev;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002034
2035 n->stats.prev = p;
2036 p->stats.next = n;
2037}
2038
2039static void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002040__kmp_zero_speculative_stats( kmp_adaptive_lock_info_t * lck )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002041{
2042 memset( ( void * )&lck->stats, 0, sizeof( lck->stats ) );
2043 __kmp_remember_lock( lck );
2044}
2045
2046static void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002047__kmp_add_stats( kmp_adaptive_lock_statistics_t * t, kmp_adaptive_lock_info_t * lck )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002048{
2049 kmp_adaptive_lock_statistics_t volatile *s = &lck->stats;
2050
2051 t->nonSpeculativeAcquireAttempts += lck->acquire_attempts;
2052 t->successfulSpeculations += s->successfulSpeculations;
2053 t->hardFailedSpeculations += s->hardFailedSpeculations;
2054 t->softFailedSpeculations += s->softFailedSpeculations;
2055 t->nonSpeculativeAcquires += s->nonSpeculativeAcquires;
2056 t->lemmingYields += s->lemmingYields;
2057}
2058
2059static void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002060__kmp_accumulate_speculative_stats( kmp_adaptive_lock_info_t * lck)
Jim Cownie5e8470a2013-09-27 10:38:44 +00002061{
2062 kmp_adaptive_lock_statistics_t *t = &destroyedStats;
2063
2064 __kmp_acquire_bootstrap_lock( &chain_lock );
2065
2066 __kmp_add_stats( &destroyedStats, lck );
2067 __kmp_forget_lock( lck );
2068
2069 __kmp_release_bootstrap_lock( &chain_lock );
2070}
2071
2072static float
2073percent (kmp_uint32 count, kmp_uint32 total)
2074{
2075 return (total == 0) ? 0.0: (100.0 * count)/total;
2076}
2077
2078static
2079FILE * __kmp_open_stats_file()
2080{
2081 if (strcmp (__kmp_speculative_statsfile, "-") == 0)
2082 return stdout;
2083
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00002084 size_t buffLen = KMP_STRLEN( __kmp_speculative_statsfile ) + 20;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002085 char buffer[buffLen];
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00002086 KMP_SNPRINTF (&buffer[0], buffLen, __kmp_speculative_statsfile,
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002087 (kmp_int32)getpid());
Jim Cownie5e8470a2013-09-27 10:38:44 +00002088 FILE * result = fopen(&buffer[0], "w");
2089
2090 // Maybe we should issue a warning here...
2091 return result ? result : stdout;
2092}
2093
2094void
2095__kmp_print_speculative_stats()
2096{
2097 if (__kmp_user_lock_kind != lk_adaptive)
2098 return;
2099
2100 FILE * statsFile = __kmp_open_stats_file();
2101
2102 kmp_adaptive_lock_statistics_t total = destroyedStats;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002103 kmp_adaptive_lock_info_t *lck;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002104
2105 for (lck = liveLocks.stats.next; lck != &liveLocks; lck = lck->stats.next) {
2106 __kmp_add_stats( &total, lck );
2107 }
2108 kmp_adaptive_lock_statistics_t *t = &total;
2109 kmp_uint32 totalSections = t->nonSpeculativeAcquires + t->successfulSpeculations;
2110 kmp_uint32 totalSpeculations = t->successfulSpeculations + t->hardFailedSpeculations +
2111 t->softFailedSpeculations;
2112
2113 fprintf ( statsFile, "Speculative lock statistics (all approximate!)\n");
2114 fprintf ( statsFile, " Lock parameters: \n"
2115 " max_soft_retries : %10d\n"
2116 " max_badness : %10d\n",
2117 __kmp_adaptive_backoff_params.max_soft_retries,
2118 __kmp_adaptive_backoff_params.max_badness);
2119 fprintf( statsFile, " Non-speculative acquire attempts : %10d\n", t->nonSpeculativeAcquireAttempts );
2120 fprintf( statsFile, " Total critical sections : %10d\n", totalSections );
2121 fprintf( statsFile, " Successful speculations : %10d (%5.1f%%)\n",
2122 t->successfulSpeculations, percent( t->successfulSpeculations, totalSections ) );
2123 fprintf( statsFile, " Non-speculative acquires : %10d (%5.1f%%)\n",
2124 t->nonSpeculativeAcquires, percent( t->nonSpeculativeAcquires, totalSections ) );
2125 fprintf( statsFile, " Lemming yields : %10d\n\n", t->lemmingYields );
2126
2127 fprintf( statsFile, " Speculative acquire attempts : %10d\n", totalSpeculations );
2128 fprintf( statsFile, " Successes : %10d (%5.1f%%)\n",
2129 t->successfulSpeculations, percent( t->successfulSpeculations, totalSpeculations ) );
2130 fprintf( statsFile, " Soft failures : %10d (%5.1f%%)\n",
2131 t->softFailedSpeculations, percent( t->softFailedSpeculations, totalSpeculations ) );
2132 fprintf( statsFile, " Hard failures : %10d (%5.1f%%)\n",
2133 t->hardFailedSpeculations, percent( t->hardFailedSpeculations, totalSpeculations ) );
2134
2135 if (statsFile != stdout)
2136 fclose( statsFile );
2137}
2138
2139# define KMP_INC_STAT(lck,stat) ( lck->lk.adaptive.stats.stat++ )
2140#else
2141# define KMP_INC_STAT(lck,stat)
2142
2143#endif // KMP_DEBUG_ADAPTIVE_LOCKS
2144
2145static inline bool
2146__kmp_is_unlocked_queuing_lock( kmp_queuing_lock_t *lck )
2147{
2148 // It is enough to check that the head_id is zero.
2149 // We don't also need to check the tail.
2150 bool res = lck->lk.head_id == 0;
2151
2152 // We need a fence here, since we must ensure that no memory operations
2153 // from later in this thread float above that read.
Jim Cownie181b4bb2013-12-23 17:28:57 +00002154#if KMP_COMPILER_ICC
Jim Cownie5e8470a2013-09-27 10:38:44 +00002155 _mm_mfence();
Jim Cownie181b4bb2013-12-23 17:28:57 +00002156#else
2157 __sync_synchronize();
Jim Cownie5e8470a2013-09-27 10:38:44 +00002158#endif
2159
2160 return res;
2161}
2162
2163// Functions for manipulating the badness
2164static __inline void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002165__kmp_update_badness_after_success( kmp_adaptive_lock_t *lck )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002166{
2167 // Reset the badness to zero so we eagerly try to speculate again
2168 lck->lk.adaptive.badness = 0;
2169 KMP_INC_STAT(lck,successfulSpeculations);
2170}
2171
2172// Create a bit mask with one more set bit.
2173static __inline void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002174__kmp_step_badness( kmp_adaptive_lock_t *lck )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002175{
2176 kmp_uint32 newBadness = ( lck->lk.adaptive.badness << 1 ) | 1;
2177 if ( newBadness > lck->lk.adaptive.max_badness) {
2178 return;
2179 } else {
2180 lck->lk.adaptive.badness = newBadness;
2181 }
2182}
2183
2184// Check whether speculation should be attempted.
2185static __inline int
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002186__kmp_should_speculate( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002187{
2188 kmp_uint32 badness = lck->lk.adaptive.badness;
2189 kmp_uint32 attempts= lck->lk.adaptive.acquire_attempts;
2190 int res = (attempts & badness) == 0;
2191 return res;
2192}
2193
2194// Attempt to acquire only the speculative lock.
2195// Does not back off to the non-speculative lock.
2196//
2197static int
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002198__kmp_test_adaptive_lock_only( kmp_adaptive_lock_t * lck, kmp_int32 gtid )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002199{
2200 int retries = lck->lk.adaptive.max_soft_retries;
2201
2202 // We don't explicitly count the start of speculation, rather we record
2203 // the results (success, hard fail, soft fail). The sum of all of those
2204 // is the total number of times we started speculation since all
2205 // speculations must end one of those ways.
2206 do
2207 {
2208 kmp_uint32 status = _xbegin();
2209 // Switch this in to disable actual speculation but exercise
2210 // at least some of the rest of the code. Useful for debugging...
2211 // kmp_uint32 status = _XABORT_NESTED;
2212
2213 if (status == _XBEGIN_STARTED )
2214 { /* We have successfully started speculation
2215 * Check that no-one acquired the lock for real between when we last looked
2216 * and now. This also gets the lock cache line into our read-set,
2217 * which we need so that we'll abort if anyone later claims it for real.
2218 */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002219 if (! __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002220 {
2221 // Lock is now visibly acquired, so someone beat us to it.
2222 // Abort the transaction so we'll restart from _xbegin with the
2223 // failure status.
2224 _xabort(0x01)
2225 KMP_ASSERT2( 0, "should not get here" );
2226 }
2227 return 1; // Lock has been acquired (speculatively)
2228 } else {
2229 // We have aborted, update the statistics
2230 if ( status & SOFT_ABORT_MASK)
2231 {
2232 KMP_INC_STAT(lck,softFailedSpeculations);
2233 // and loop round to retry.
2234 }
2235 else
2236 {
2237 KMP_INC_STAT(lck,hardFailedSpeculations);
2238 // Give up if we had a hard failure.
2239 break;
2240 }
2241 }
2242 } while( retries-- ); // Loop while we have retries, and didn't fail hard.
2243
2244 // Either we had a hard failure or we didn't succeed softly after
2245 // the full set of attempts, so back off the badness.
2246 __kmp_step_badness( lck );
2247 return 0;
2248}
2249
2250// Attempt to acquire the speculative lock, or back off to the non-speculative one
2251// if the speculative lock cannot be acquired.
2252// We can succeed speculatively, non-speculatively, or fail.
2253static int
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002254__kmp_test_adaptive_lock( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002255{
2256 // First try to acquire the lock speculatively
2257 if ( __kmp_should_speculate( lck, gtid ) && __kmp_test_adaptive_lock_only( lck, gtid ) )
2258 return 1;
2259
2260 // Speculative acquisition failed, so try to acquire it non-speculatively.
2261 // Count the non-speculative acquire attempt
2262 lck->lk.adaptive.acquire_attempts++;
2263
2264 // Use base, non-speculative lock.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002265 if ( __kmp_test_queuing_lock( GET_QLK_PTR(lck), gtid ) )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002266 {
2267 KMP_INC_STAT(lck,nonSpeculativeAcquires);
2268 return 1; // Lock is acquired (non-speculatively)
2269 }
2270 else
2271 {
2272 return 0; // Failed to acquire the lock, it's already visibly locked.
2273 }
2274}
2275
2276static int
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002277__kmp_test_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002278{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002279 char const * const func = "omp_test_lock";
2280 if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) {
2281 KMP_FATAL( LockIsUninitialized, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002282 }
2283
2284 int retval = __kmp_test_adaptive_lock( lck, gtid );
2285
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002286 if ( retval ) {
2287 lck->lk.qlk.owner_id = gtid + 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002288 }
2289 return retval;
2290}
2291
2292// Block until we can acquire a speculative, adaptive lock.
2293// We check whether we should be trying to speculate.
2294// If we should be, we check the real lock to see if it is free,
2295// and, if not, pause without attempting to acquire it until it is.
2296// Then we try the speculative acquire.
2297// This means that although we suffer from lemmings a little (
2298// because all we can't acquire the lock speculatively until
2299// the queue of threads waiting has cleared), we don't get into a
2300// state where we can never acquire the lock speculatively (because we
2301// force the queue to clear by preventing new arrivals from entering the
2302// queue).
2303// This does mean that when we're trying to break lemmings, the lock
2304// is no longer fair. However OpenMP makes no guarantee that its
2305// locks are fair, so this isn't a real problem.
2306static void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002307__kmp_acquire_adaptive_lock( kmp_adaptive_lock_t * lck, kmp_int32 gtid )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002308{
2309 if ( __kmp_should_speculate( lck, gtid ) )
2310 {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002311 if ( __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002312 {
2313 if ( __kmp_test_adaptive_lock_only( lck , gtid ) )
2314 return;
2315 // We tried speculation and failed, so give up.
2316 }
2317 else
2318 {
2319 // We can't try speculation until the lock is free, so we
2320 // pause here (without suspending on the queueing lock,
2321 // to allow it to drain, then try again.
2322 // All other threads will also see the same result for
2323 // shouldSpeculate, so will be doing the same if they
2324 // try to claim the lock from now on.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002325 while ( ! __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002326 {
2327 KMP_INC_STAT(lck,lemmingYields);
2328 __kmp_yield (TRUE);
2329 }
2330
2331 if ( __kmp_test_adaptive_lock_only( lck, gtid ) )
2332 return;
2333 }
2334 }
2335
2336 // Speculative acquisition failed, so acquire it non-speculatively.
2337 // Count the non-speculative acquire attempt
2338 lck->lk.adaptive.acquire_attempts++;
2339
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002340 __kmp_acquire_queuing_lock_timed_template<FALSE>( GET_QLK_PTR(lck), gtid );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002341 // We have acquired the base lock, so count that.
2342 KMP_INC_STAT(lck,nonSpeculativeAcquires );
2343}
2344
2345static void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002346__kmp_acquire_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002347{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002348 char const * const func = "omp_set_lock";
2349 if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) {
2350 KMP_FATAL( LockIsUninitialized, func );
2351 }
2352 if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) == gtid ) {
2353 KMP_FATAL( LockIsAlreadyOwned, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002354 }
2355
2356 __kmp_acquire_adaptive_lock( lck, gtid );
2357
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002358 lck->lk.qlk.owner_id = gtid + 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002359}
2360
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00002361static int
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002362__kmp_release_adaptive_lock( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002363{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002364 if ( __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002365 { // If the lock doesn't look claimed we must be speculating.
2366 // (Or the user's code is buggy and they're releasing without locking;
2367 // if we had XTEST we'd be able to check that case...)
2368 _xend(); // Exit speculation
2369 __kmp_update_badness_after_success( lck );
2370 }
2371 else
2372 { // Since the lock *is* visibly locked we're not speculating,
2373 // so should use the underlying lock's release scheme.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002374 __kmp_release_queuing_lock( GET_QLK_PTR(lck), gtid );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002375 }
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00002376 return KMP_LOCK_RELEASED;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002377}
2378
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00002379static int
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002380__kmp_release_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002381{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002382 char const * const func = "omp_unset_lock";
2383 KMP_MB(); /* in case another processor initialized lock */
2384 if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) {
2385 KMP_FATAL( LockIsUninitialized, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002386 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002387 if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) == -1 ) {
2388 KMP_FATAL( LockUnsettingFree, func );
2389 }
2390 if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) != gtid ) {
2391 KMP_FATAL( LockUnsettingSetByAnother, func );
2392 }
2393 lck->lk.qlk.owner_id = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002394 __kmp_release_adaptive_lock( lck, gtid );
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00002395 return KMP_LOCK_RELEASED;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002396}
2397
2398static void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002399__kmp_init_adaptive_lock( kmp_adaptive_lock_t *lck )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002400{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002401 __kmp_init_queuing_lock( GET_QLK_PTR(lck) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002402 lck->lk.adaptive.badness = 0;
2403 lck->lk.adaptive.acquire_attempts = 0; //nonSpeculativeAcquireAttempts = 0;
2404 lck->lk.adaptive.max_soft_retries = __kmp_adaptive_backoff_params.max_soft_retries;
2405 lck->lk.adaptive.max_badness = __kmp_adaptive_backoff_params.max_badness;
2406#if KMP_DEBUG_ADAPTIVE_LOCKS
2407 __kmp_zero_speculative_stats( &lck->lk.adaptive );
2408#endif
2409 KA_TRACE(1000, ("__kmp_init_adaptive_lock: lock %p initialized\n", lck));
2410}
2411
2412static void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002413__kmp_init_adaptive_lock_with_checks( kmp_adaptive_lock_t * lck )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002414{
2415 __kmp_init_adaptive_lock( lck );
2416}
2417
2418static void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002419__kmp_destroy_adaptive_lock( kmp_adaptive_lock_t *lck )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002420{
2421#if KMP_DEBUG_ADAPTIVE_LOCKS
2422 __kmp_accumulate_speculative_stats( &lck->lk.adaptive );
2423#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002424 __kmp_destroy_queuing_lock (GET_QLK_PTR(lck));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002425 // Nothing needed for the speculative part.
2426}
2427
2428static void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002429__kmp_destroy_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002430{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002431 char const * const func = "omp_destroy_lock";
2432 if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) {
2433 KMP_FATAL( LockIsUninitialized, func );
2434 }
2435 if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) != -1 ) {
2436 KMP_FATAL( LockStillOwned, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002437 }
2438 __kmp_destroy_adaptive_lock( lck );
2439}
2440
2441
2442#endif // KMP_USE_ADAPTIVE_LOCKS
2443
2444
2445/* ------------------------------------------------------------------------ */
2446/* DRDPA ticket locks */
2447/* "DRDPA" means Dynamically Reconfigurable Distributed Polling Area */
2448
2449static kmp_int32
2450__kmp_get_drdpa_lock_owner( kmp_drdpa_lock_t *lck )
2451{
2452 return TCR_4( lck->lk.owner_id ) - 1;
2453}
2454
2455static inline bool
2456__kmp_is_drdpa_lock_nestable( kmp_drdpa_lock_t *lck )
2457{
2458 return lck->lk.depth_locked != -1;
2459}
2460
2461__forceinline static void
2462__kmp_acquire_drdpa_lock_timed_template( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2463{
2464 kmp_uint64 ticket = KMP_TEST_THEN_INC64((kmp_int64 *)&lck->lk.next_ticket);
2465 kmp_uint64 mask = TCR_8(lck->lk.mask); // volatile load
2466 volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls
2467 = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2468 TCR_PTR(lck->lk.polls); // volatile load
2469
2470#ifdef USE_LOCK_PROFILE
2471 if (TCR_8(polls[ticket & mask].poll) != ticket)
2472 __kmp_printf("LOCK CONTENTION: %p\n", lck);
2473 /* else __kmp_printf( "." );*/
2474#endif /* USE_LOCK_PROFILE */
2475
2476 //
2477 // Now spin-wait, but reload the polls pointer and mask, in case the
2478 // polling area has been reconfigured. Unless it is reconfigured, the
2479 // reloads stay in L1 cache and are cheap.
2480 //
2481 // Keep this code in sync with KMP_WAIT_YIELD, in kmp_dispatch.c !!!
2482 //
2483 // The current implementation of KMP_WAIT_YIELD doesn't allow for mask
2484 // and poll to be re-read every spin iteration.
2485 //
2486 kmp_uint32 spins;
2487
2488 KMP_FSYNC_PREPARE(lck);
2489 KMP_INIT_YIELD(spins);
2490 while (TCR_8(polls[ticket & mask]).poll < ticket) { // volatile load
Jim Cownie5e8470a2013-09-27 10:38:44 +00002491 // If we are oversubscribed,
Alp Toker8f2d3f02014-02-24 10:40:15 +00002492 // or have waited a bit (and KMP_LIBRARY=turnaround), then yield.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002493 // CPU Pause is in the macros for yield.
2494 //
2495 KMP_YIELD(TCR_4(__kmp_nth)
2496 > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc));
2497 KMP_YIELD_SPIN(spins);
2498
2499 // Re-read the mask and the poll pointer from the lock structure.
2500 //
2501 // Make certain that "mask" is read before "polls" !!!
2502 //
2503 // If another thread picks reconfigures the polling area and updates
2504 // their values, and we get the new value of mask and the old polls
2505 // pointer, we could access memory beyond the end of the old polling
2506 // area.
2507 //
2508 mask = TCR_8(lck->lk.mask); // volatile load
2509 polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2510 TCR_PTR(lck->lk.polls); // volatile load
2511 }
2512
2513 //
2514 // Critical section starts here
2515 //
2516 KMP_FSYNC_ACQUIRED(lck);
2517 KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld acquired lock %p\n",
2518 ticket, lck));
2519 lck->lk.now_serving = ticket; // non-volatile store
2520
2521 //
2522 // Deallocate a garbage polling area if we know that we are the last
2523 // thread that could possibly access it.
2524 //
2525 // The >= check is in case __kmp_test_drdpa_lock() allocated the cleanup
2526 // ticket.
2527 //
2528 if ((lck->lk.old_polls != NULL) && (ticket >= lck->lk.cleanup_ticket)) {
2529 __kmp_free((void *)lck->lk.old_polls);
2530 lck->lk.old_polls = NULL;
2531 lck->lk.cleanup_ticket = 0;
2532 }
2533
2534 //
2535 // Check to see if we should reconfigure the polling area.
2536 // If there is still a garbage polling area to be deallocated from a
2537 // previous reconfiguration, let a later thread reconfigure it.
2538 //
2539 if (lck->lk.old_polls == NULL) {
2540 bool reconfigure = false;
2541 volatile struct kmp_base_drdpa_lock::kmp_lock_poll *old_polls = polls;
2542 kmp_uint32 num_polls = TCR_4(lck->lk.num_polls);
2543
2544 if (TCR_4(__kmp_nth)
2545 > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) {
2546 //
2547 // We are in oversubscription mode. Contract the polling area
2548 // down to a single location, if that hasn't been done already.
2549 //
2550 if (num_polls > 1) {
2551 reconfigure = true;
2552 num_polls = TCR_4(lck->lk.num_polls);
2553 mask = 0;
2554 num_polls = 1;
2555 polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2556 __kmp_allocate(num_polls * sizeof(*polls));
2557 polls[0].poll = ticket;
2558 }
2559 }
2560 else {
2561 //
2562 // We are in under/fully subscribed mode. Check the number of
2563 // threads waiting on the lock. The size of the polling area
2564 // should be at least the number of threads waiting.
2565 //
2566 kmp_uint64 num_waiting = TCR_8(lck->lk.next_ticket) - ticket - 1;
2567 if (num_waiting > num_polls) {
2568 kmp_uint32 old_num_polls = num_polls;
2569 reconfigure = true;
2570 do {
2571 mask = (mask << 1) | 1;
2572 num_polls *= 2;
2573 } while (num_polls <= num_waiting);
2574
2575 //
2576 // Allocate the new polling area, and copy the relevant portion
2577 // of the old polling area to the new area. __kmp_allocate()
2578 // zeroes the memory it allocates, and most of the old area is
2579 // just zero padding, so we only copy the release counters.
2580 //
2581 polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2582 __kmp_allocate(num_polls * sizeof(*polls));
2583 kmp_uint32 i;
2584 for (i = 0; i < old_num_polls; i++) {
2585 polls[i].poll = old_polls[i].poll;
2586 }
2587 }
2588 }
2589
2590 if (reconfigure) {
2591 //
2592 // Now write the updated fields back to the lock structure.
2593 //
2594 // Make certain that "polls" is written before "mask" !!!
2595 //
2596 // If another thread picks up the new value of mask and the old
2597 // polls pointer , it could access memory beyond the end of the
2598 // old polling area.
2599 //
2600 // On x86, we need memory fences.
2601 //
2602 KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld reconfiguring lock %p to %d polls\n",
2603 ticket, lck, num_polls));
2604
2605 lck->lk.old_polls = old_polls; // non-volatile store
2606 lck->lk.polls = polls; // volatile store
2607
2608 KMP_MB();
2609
2610 lck->lk.num_polls = num_polls; // non-volatile store
2611 lck->lk.mask = mask; // volatile store
2612
2613 KMP_MB();
2614
2615 //
2616 // Only after the new polling area and mask have been flushed
2617 // to main memory can we update the cleanup ticket field.
2618 //
2619 // volatile load / non-volatile store
2620 //
2621 lck->lk.cleanup_ticket = TCR_8(lck->lk.next_ticket);
2622 }
2623 }
2624}
2625
2626void
2627__kmp_acquire_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2628{
2629 __kmp_acquire_drdpa_lock_timed_template( lck, gtid );
2630}
2631
2632static void
2633__kmp_acquire_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2634{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002635 char const * const func = "omp_set_lock";
2636 if ( lck->lk.initialized != lck ) {
2637 KMP_FATAL( LockIsUninitialized, func );
2638 }
2639 if ( __kmp_is_drdpa_lock_nestable( lck ) ) {
2640 KMP_FATAL( LockNestableUsedAsSimple, func );
2641 }
2642 if ( ( gtid >= 0 ) && ( __kmp_get_drdpa_lock_owner( lck ) == gtid ) ) {
2643 KMP_FATAL( LockIsAlreadyOwned, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002644 }
2645
2646 __kmp_acquire_drdpa_lock( lck, gtid );
2647
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002648 lck->lk.owner_id = gtid + 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002649}
2650
2651int
2652__kmp_test_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2653{
2654 //
2655 // First get a ticket, then read the polls pointer and the mask.
2656 // The polls pointer must be read before the mask!!! (See above)
2657 //
2658 kmp_uint64 ticket = TCR_8(lck->lk.next_ticket); // volatile load
2659 volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls
2660 = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2661 TCR_PTR(lck->lk.polls); // volatile load
2662 kmp_uint64 mask = TCR_8(lck->lk.mask); // volatile load
2663 if (TCR_8(polls[ticket & mask].poll) == ticket) {
2664 kmp_uint64 next_ticket = ticket + 1;
2665 if (KMP_COMPARE_AND_STORE_ACQ64((kmp_int64 *)&lck->lk.next_ticket,
2666 ticket, next_ticket)) {
2667 KMP_FSYNC_ACQUIRED(lck);
2668 KA_TRACE(1000, ("__kmp_test_drdpa_lock: ticket #%lld acquired lock %p\n",
2669 ticket, lck));
2670 lck->lk.now_serving = ticket; // non-volatile store
2671
2672 //
Alp Toker8f2d3f02014-02-24 10:40:15 +00002673 // Since no threads are waiting, there is no possibility that
Jim Cownie5e8470a2013-09-27 10:38:44 +00002674 // we would want to reconfigure the polling area. We might
2675 // have the cleanup ticket value (which says that it is now
2676 // safe to deallocate old_polls), but we'll let a later thread
2677 // which calls __kmp_acquire_lock do that - this routine
2678 // isn't supposed to block, and we would risk blocks if we
2679 // called __kmp_free() to do the deallocation.
2680 //
2681 return TRUE;
2682 }
2683 }
2684 return FALSE;
2685}
2686
2687static int
2688__kmp_test_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2689{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002690 char const * const func = "omp_test_lock";
2691 if ( lck->lk.initialized != lck ) {
2692 KMP_FATAL( LockIsUninitialized, func );
2693 }
2694 if ( __kmp_is_drdpa_lock_nestable( lck ) ) {
2695 KMP_FATAL( LockNestableUsedAsSimple, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002696 }
2697
2698 int retval = __kmp_test_drdpa_lock( lck, gtid );
2699
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002700 if ( retval ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002701 lck->lk.owner_id = gtid + 1;
2702 }
2703 return retval;
2704}
2705
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00002706int
Jim Cownie5e8470a2013-09-27 10:38:44 +00002707__kmp_release_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2708{
2709 //
2710 // Read the ticket value from the lock data struct, then the polls
2711 // pointer and the mask. The polls pointer must be read before the
2712 // mask!!! (See above)
2713 //
2714 kmp_uint64 ticket = lck->lk.now_serving + 1; // non-volatile load
2715 volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls
2716 = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2717 TCR_PTR(lck->lk.polls); // volatile load
2718 kmp_uint64 mask = TCR_8(lck->lk.mask); // volatile load
2719 KA_TRACE(1000, ("__kmp_release_drdpa_lock: ticket #%lld released lock %p\n",
2720 ticket - 1, lck));
2721 KMP_FSYNC_RELEASING(lck);
2722 KMP_ST_REL64(&(polls[ticket & mask].poll), ticket); // volatile store
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00002723 return KMP_LOCK_RELEASED;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002724}
2725
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00002726static int
Jim Cownie5e8470a2013-09-27 10:38:44 +00002727__kmp_release_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2728{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002729 char const * const func = "omp_unset_lock";
2730 KMP_MB(); /* in case another processor initialized lock */
2731 if ( lck->lk.initialized != lck ) {
2732 KMP_FATAL( LockIsUninitialized, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002733 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002734 if ( __kmp_is_drdpa_lock_nestable( lck ) ) {
2735 KMP_FATAL( LockNestableUsedAsSimple, func );
2736 }
2737 if ( __kmp_get_drdpa_lock_owner( lck ) == -1 ) {
2738 KMP_FATAL( LockUnsettingFree, func );
2739 }
2740 if ( ( gtid >= 0 ) && ( __kmp_get_drdpa_lock_owner( lck ) >= 0 )
2741 && ( __kmp_get_drdpa_lock_owner( lck ) != gtid ) ) {
2742 KMP_FATAL( LockUnsettingSetByAnother, func );
2743 }
2744 lck->lk.owner_id = 0;
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00002745 return __kmp_release_drdpa_lock( lck, gtid );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002746}
2747
2748void
2749__kmp_init_drdpa_lock( kmp_drdpa_lock_t *lck )
2750{
2751 lck->lk.location = NULL;
2752 lck->lk.mask = 0;
2753 lck->lk.num_polls = 1;
2754 lck->lk.polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2755 __kmp_allocate(lck->lk.num_polls * sizeof(*(lck->lk.polls)));
2756 lck->lk.cleanup_ticket = 0;
2757 lck->lk.old_polls = NULL;
2758 lck->lk.next_ticket = 0;
2759 lck->lk.now_serving = 0;
2760 lck->lk.owner_id = 0; // no thread owns the lock.
2761 lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks.
2762 lck->lk.initialized = lck;
2763
2764 KA_TRACE(1000, ("__kmp_init_drdpa_lock: lock %p initialized\n", lck));
2765}
2766
2767static void
2768__kmp_init_drdpa_lock_with_checks( kmp_drdpa_lock_t * lck )
2769{
2770 __kmp_init_drdpa_lock( lck );
2771}
2772
2773void
2774__kmp_destroy_drdpa_lock( kmp_drdpa_lock_t *lck )
2775{
2776 lck->lk.initialized = NULL;
2777 lck->lk.location = NULL;
2778 if (lck->lk.polls != NULL) {
2779 __kmp_free((void *)lck->lk.polls);
2780 lck->lk.polls = NULL;
2781 }
2782 if (lck->lk.old_polls != NULL) {
2783 __kmp_free((void *)lck->lk.old_polls);
2784 lck->lk.old_polls = NULL;
2785 }
2786 lck->lk.mask = 0;
2787 lck->lk.num_polls = 0;
2788 lck->lk.cleanup_ticket = 0;
2789 lck->lk.next_ticket = 0;
2790 lck->lk.now_serving = 0;
2791 lck->lk.owner_id = 0;
2792 lck->lk.depth_locked = -1;
2793}
2794
2795static void
2796__kmp_destroy_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck )
2797{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002798 char const * const func = "omp_destroy_lock";
2799 if ( lck->lk.initialized != lck ) {
2800 KMP_FATAL( LockIsUninitialized, func );
2801 }
2802 if ( __kmp_is_drdpa_lock_nestable( lck ) ) {
2803 KMP_FATAL( LockNestableUsedAsSimple, func );
2804 }
2805 if ( __kmp_get_drdpa_lock_owner( lck ) != -1 ) {
2806 KMP_FATAL( LockStillOwned, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002807 }
2808 __kmp_destroy_drdpa_lock( lck );
2809}
2810
2811
2812//
2813// nested drdpa ticket locks
2814//
2815
2816void
2817__kmp_acquire_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2818{
2819 KMP_DEBUG_ASSERT( gtid >= 0 );
2820
2821 if ( __kmp_get_drdpa_lock_owner( lck ) == gtid ) {
2822 lck->lk.depth_locked += 1;
2823 }
2824 else {
2825 __kmp_acquire_drdpa_lock_timed_template( lck, gtid );
2826 KMP_MB();
2827 lck->lk.depth_locked = 1;
2828 KMP_MB();
2829 lck->lk.owner_id = gtid + 1;
2830 }
2831}
2832
2833static void
2834__kmp_acquire_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2835{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002836 char const * const func = "omp_set_nest_lock";
2837 if ( lck->lk.initialized != lck ) {
2838 KMP_FATAL( LockIsUninitialized, func );
2839 }
2840 if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) {
2841 KMP_FATAL( LockSimpleUsedAsNestable, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002842 }
2843 __kmp_acquire_nested_drdpa_lock( lck, gtid );
2844}
2845
2846int
2847__kmp_test_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2848{
2849 int retval;
2850
2851 KMP_DEBUG_ASSERT( gtid >= 0 );
2852
2853 if ( __kmp_get_drdpa_lock_owner( lck ) == gtid ) {
2854 retval = ++lck->lk.depth_locked;
2855 }
2856 else if ( !__kmp_test_drdpa_lock( lck, gtid ) ) {
2857 retval = 0;
2858 }
2859 else {
2860 KMP_MB();
2861 retval = lck->lk.depth_locked = 1;
2862 KMP_MB();
2863 lck->lk.owner_id = gtid + 1;
2864 }
2865 return retval;
2866}
2867
2868static int
2869__kmp_test_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2870{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002871 char const * const func = "omp_test_nest_lock";
2872 if ( lck->lk.initialized != lck ) {
2873 KMP_FATAL( LockIsUninitialized, func );
2874 }
2875 if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) {
2876 KMP_FATAL( LockSimpleUsedAsNestable, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002877 }
2878 return __kmp_test_nested_drdpa_lock( lck, gtid );
2879}
2880
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00002881int
Jim Cownie5e8470a2013-09-27 10:38:44 +00002882__kmp_release_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2883{
2884 KMP_DEBUG_ASSERT( gtid >= 0 );
2885
2886 KMP_MB();
2887 if ( --(lck->lk.depth_locked) == 0 ) {
2888 KMP_MB();
2889 lck->lk.owner_id = 0;
2890 __kmp_release_drdpa_lock( lck, gtid );
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00002891 return KMP_LOCK_RELEASED;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002892 }
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00002893 return KMP_LOCK_STILL_HELD;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002894}
2895
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00002896static int
Jim Cownie5e8470a2013-09-27 10:38:44 +00002897__kmp_release_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2898{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002899 char const * const func = "omp_unset_nest_lock";
2900 KMP_MB(); /* in case another processor initialized lock */
2901 if ( lck->lk.initialized != lck ) {
2902 KMP_FATAL( LockIsUninitialized, func );
2903 }
2904 if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) {
2905 KMP_FATAL( LockSimpleUsedAsNestable, func );
2906 }
2907 if ( __kmp_get_drdpa_lock_owner( lck ) == -1 ) {
2908 KMP_FATAL( LockUnsettingFree, func );
2909 }
2910 if ( __kmp_get_drdpa_lock_owner( lck ) != gtid ) {
2911 KMP_FATAL( LockUnsettingSetByAnother, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002912 }
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00002913 return __kmp_release_nested_drdpa_lock( lck, gtid );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002914}
2915
2916void
2917__kmp_init_nested_drdpa_lock( kmp_drdpa_lock_t * lck )
2918{
2919 __kmp_init_drdpa_lock( lck );
2920 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
2921}
2922
2923static void
2924__kmp_init_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t * lck )
2925{
2926 __kmp_init_nested_drdpa_lock( lck );
2927}
2928
2929void
2930__kmp_destroy_nested_drdpa_lock( kmp_drdpa_lock_t *lck )
2931{
2932 __kmp_destroy_drdpa_lock( lck );
2933 lck->lk.depth_locked = 0;
2934}
2935
2936static void
2937__kmp_destroy_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck )
2938{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002939 char const * const func = "omp_destroy_nest_lock";
2940 if ( lck->lk.initialized != lck ) {
2941 KMP_FATAL( LockIsUninitialized, func );
2942 }
2943 if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) {
2944 KMP_FATAL( LockSimpleUsedAsNestable, func );
2945 }
2946 if ( __kmp_get_drdpa_lock_owner( lck ) != -1 ) {
2947 KMP_FATAL( LockStillOwned, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002948 }
2949 __kmp_destroy_nested_drdpa_lock( lck );
2950}
2951
2952
2953//
2954// access functions to fields which don't exist for all lock kinds.
2955//
2956
2957static int
2958__kmp_is_drdpa_lock_initialized( kmp_drdpa_lock_t *lck )
2959{
2960 return lck == lck->lk.initialized;
2961}
2962
2963static const ident_t *
2964__kmp_get_drdpa_lock_location( kmp_drdpa_lock_t *lck )
2965{
2966 return lck->lk.location;
2967}
2968
2969static void
2970__kmp_set_drdpa_lock_location( kmp_drdpa_lock_t *lck, const ident_t *loc )
2971{
2972 lck->lk.location = loc;
2973}
2974
2975static kmp_lock_flags_t
2976__kmp_get_drdpa_lock_flags( kmp_drdpa_lock_t *lck )
2977{
2978 return lck->lk.flags;
2979}
2980
2981static void
2982__kmp_set_drdpa_lock_flags( kmp_drdpa_lock_t *lck, kmp_lock_flags_t flags )
2983{
2984 lck->lk.flags = flags;
2985}
2986
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002987#if KMP_USE_DYNAMIC_LOCK
2988
2989// Definitions of lock hints.
2990# ifndef __OMP_H
2991typedef enum kmp_lock_hint_t {
2992 kmp_lock_hint_none = 0,
2993 kmp_lock_hint_contended,
2994 kmp_lock_hint_uncontended,
2995 kmp_lock_hint_nonspeculative,
2996 kmp_lock_hint_speculative,
2997 kmp_lock_hint_adaptive,
2998} kmp_lock_hint_t;
2999# endif
3000
3001// Direct lock initializers. It simply writes a tag to the low 8 bits of the lock word.
3002#define expand_init_lock(l, a) \
3003static void init_##l##_lock(kmp_dyna_lock_t *lck, kmp_dyna_lockseq_t seq) { \
3004 *lck = DYNA_LOCK_FREE(l); \
3005 KA_TRACE(20, ("Initialized direct lock, tag = %x\n", *lck)); \
3006}
3007FOREACH_D_LOCK(expand_init_lock, 0)
3008#undef expand_init_lock
3009
3010#if DYNA_HAS_HLE
3011
3012// HLE lock functions - imported from the testbed runtime.
3013#if KMP_MIC
3014# define machine_pause() _mm_delay_32(10) // TODO: find the right argument
3015#else
3016# define machine_pause() _mm_pause()
3017#endif
3018#define HLE_ACQUIRE ".byte 0xf2;"
3019#define HLE_RELEASE ".byte 0xf3;"
3020
3021static inline kmp_uint32
3022swap4(kmp_uint32 volatile *p, kmp_uint32 v)
3023{
3024 __asm__ volatile(HLE_ACQUIRE "xchg %1,%0"
3025 : "+r"(v), "+m"(*p)
3026 :
3027 : "memory");
3028 return v;
3029}
3030
3031static void
3032__kmp_destroy_hle_lock(kmp_dyna_lock_t *lck)
3033{
3034 *lck = 0;
3035}
3036
3037static void
3038__kmp_acquire_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid)
3039{
3040 // Use gtid for DYNA_LOCK_BUSY if necessary
3041 if (swap4(lck, DYNA_LOCK_BUSY(1, hle)) != DYNA_LOCK_FREE(hle)) {
3042 int delay = 1;
3043 do {
3044 while (*(kmp_uint32 volatile *)lck != DYNA_LOCK_FREE(hle)) {
3045 for (int i = delay; i != 0; --i)
3046 machine_pause();
3047 delay = ((delay << 1) | 1) & 7;
3048 }
3049 } while (swap4(lck, DYNA_LOCK_BUSY(1, hle)) != DYNA_LOCK_FREE(hle));
3050 }
3051}
3052
3053static void
3054__kmp_acquire_hle_lock_with_checks(kmp_dyna_lock_t *lck, kmp_int32 gtid)
3055{
3056 __kmp_acquire_hle_lock(lck, gtid); // TODO: add checks
3057}
3058
3059static void
3060__kmp_release_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid)
3061{
3062 __asm__ volatile(HLE_RELEASE "movl %1,%0"
3063 : "=m"(*lck)
3064 : "r"(DYNA_LOCK_FREE(hle))
3065 : "memory");
3066}
3067
3068static void
3069__kmp_release_hle_lock_with_checks(kmp_dyna_lock_t *lck, kmp_int32 gtid)
3070{
3071 __kmp_release_hle_lock(lck, gtid); // TODO: add checks
3072}
3073
3074static int
3075__kmp_test_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid)
3076{
3077 return swap4(lck, DYNA_LOCK_BUSY(1, hle)) == DYNA_LOCK_FREE(hle);
3078}
3079
3080static int
3081__kmp_test_hle_lock_with_checks(kmp_dyna_lock_t *lck, kmp_int32 gtid)
3082{
3083 return __kmp_test_hle_lock(lck, gtid); // TODO: add checks
3084}
3085
3086#endif // DYNA_HAS_HLE
3087
3088// Entry functions for indirect locks (first element of direct_*_ops[]).
3089static void __kmp_init_indirect_lock(kmp_dyna_lock_t * l, kmp_dyna_lockseq_t tag);
3090static void __kmp_destroy_indirect_lock(kmp_dyna_lock_t * lock);
3091static void __kmp_set_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32);
3092static void __kmp_unset_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32);
3093static int __kmp_test_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32);
3094static void __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32);
3095static void __kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32);
3096static int __kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32);
3097
3098//
3099// Jump tables for the indirect lock functions.
3100// Only fill in the odd entries, that avoids the need to shift out the low bit.
3101//
3102#define expand_func0(l, op) 0,op##_##l##_##lock,
3103void (*__kmp_direct_init_ops[])(kmp_dyna_lock_t *, kmp_dyna_lockseq_t)
3104 = { __kmp_init_indirect_lock, 0, FOREACH_D_LOCK(expand_func0, init) };
3105
3106#define expand_func1(l, op) 0,(void (*)(kmp_dyna_lock_t *))__kmp_##op##_##l##_##lock,
3107void (*__kmp_direct_destroy_ops[])(kmp_dyna_lock_t *)
3108 = { __kmp_destroy_indirect_lock, 0, FOREACH_D_LOCK(expand_func1, destroy) };
3109
3110// Differentiates *lock and *lock_with_checks.
3111#define expand_func2(l, op) 0,(void (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_##lock,
3112#define expand_func2c(l, op) 0,(void (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_##lock_with_checks,
3113static void (*direct_set_tab[][DYNA_NUM_D_LOCKS*2+2])(kmp_dyna_lock_t *, kmp_int32)
3114 = { { __kmp_set_indirect_lock, 0, FOREACH_D_LOCK(expand_func2, acquire) },
3115 { __kmp_set_indirect_lock_with_checks, 0, FOREACH_D_LOCK(expand_func2c, acquire) } };
3116static void (*direct_unset_tab[][DYNA_NUM_D_LOCKS*2+2])(kmp_dyna_lock_t *, kmp_int32)
3117 = { { __kmp_unset_indirect_lock, 0, FOREACH_D_LOCK(expand_func2, release) },
3118 { __kmp_unset_indirect_lock_with_checks, 0, FOREACH_D_LOCK(expand_func2c, release) } };
3119
3120#define expand_func3(l, op) 0,(int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_##lock,
3121#define expand_func3c(l, op) 0,(int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_##lock_with_checks,
3122static int (*direct_test_tab[][DYNA_NUM_D_LOCKS*2+2])(kmp_dyna_lock_t *, kmp_int32)
3123 = { { __kmp_test_indirect_lock, 0, FOREACH_D_LOCK(expand_func3, test) },
3124 { __kmp_test_indirect_lock_with_checks, 0, FOREACH_D_LOCK(expand_func3c, test) } };
3125
3126// Exposes only one set of jump tables (*lock or *lock_with_checks).
3127void (*(*__kmp_direct_set_ops))(kmp_dyna_lock_t *, kmp_int32) = 0;
3128void (*(*__kmp_direct_unset_ops))(kmp_dyna_lock_t *, kmp_int32) = 0;
3129int (*(*__kmp_direct_test_ops))(kmp_dyna_lock_t *, kmp_int32) = 0;
3130
3131//
3132// Jump tables for the indirect lock functions.
3133//
3134#define expand_func4(l, op) (void (*)(kmp_user_lock_p))__kmp_##op##_##l##_##lock,
3135void (*__kmp_indirect_init_ops[])(kmp_user_lock_p)
3136 = { FOREACH_I_LOCK(expand_func4, init) };
3137void (*__kmp_indirect_destroy_ops[])(kmp_user_lock_p)
3138 = { FOREACH_I_LOCK(expand_func4, destroy) };
3139
3140// Differentiates *lock and *lock_with_checks.
3141#define expand_func5(l, op) (void (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock,
3142#define expand_func5c(l, op) (void (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock_with_checks,
3143static void (*indirect_set_tab[][DYNA_NUM_I_LOCKS])(kmp_user_lock_p, kmp_int32)
3144 = { { FOREACH_I_LOCK(expand_func5, acquire) },
3145 { FOREACH_I_LOCK(expand_func5c, acquire) } };
3146static void (*indirect_unset_tab[][DYNA_NUM_I_LOCKS])(kmp_user_lock_p, kmp_int32)
3147 = { { FOREACH_I_LOCK(expand_func5, release) },
3148 { FOREACH_I_LOCK(expand_func5c, release) } };
3149
3150#define expand_func6(l, op) (int (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock,
3151#define expand_func6c(l, op) (int (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock_with_checks,
3152static int (*indirect_test_tab[][DYNA_NUM_I_LOCKS])(kmp_user_lock_p, kmp_int32)
3153 = { { FOREACH_I_LOCK(expand_func6, test) },
3154 { FOREACH_I_LOCK(expand_func6c, test) } };
3155
3156// Exposes only one set of jump tables (*lock or *lock_with_checks).
3157void (*(*__kmp_indirect_set_ops))(kmp_user_lock_p, kmp_int32) = 0;
3158void (*(*__kmp_indirect_unset_ops))(kmp_user_lock_p, kmp_int32) = 0;
3159int (*(*__kmp_indirect_test_ops))(kmp_user_lock_p, kmp_int32) = 0;
3160
3161// Lock index table.
3162kmp_indirect_lock_t **__kmp_indirect_lock_table;
3163kmp_lock_index_t __kmp_indirect_lock_table_size;
3164kmp_lock_index_t __kmp_indirect_lock_table_next;
3165
3166// Size of indirect locks.
3167static kmp_uint32 __kmp_indirect_lock_size[DYNA_NUM_I_LOCKS] = {
3168 sizeof(kmp_ticket_lock_t), sizeof(kmp_queuing_lock_t),
3169#if KMP_USE_ADAPTIVE_LOCKS
3170 sizeof(kmp_adaptive_lock_t),
3171#endif
3172 sizeof(kmp_drdpa_lock_t),
3173 sizeof(kmp_tas_lock_t),
3174#if DYNA_HAS_FUTEX
3175 sizeof(kmp_futex_lock_t),
3176#endif
3177 sizeof(kmp_ticket_lock_t), sizeof(kmp_queuing_lock_t),
3178 sizeof(kmp_drdpa_lock_t)
3179};
3180
3181// Jump tables for lock accessor/modifier.
3182void (*__kmp_indirect_set_location[DYNA_NUM_I_LOCKS])(kmp_user_lock_p, const ident_t *) = { 0 };
3183void (*__kmp_indirect_set_flags[DYNA_NUM_I_LOCKS])(kmp_user_lock_p, kmp_lock_flags_t) = { 0 };
3184const ident_t * (*__kmp_indirect_get_location[DYNA_NUM_I_LOCKS])(kmp_user_lock_p) = { 0 };
3185kmp_lock_flags_t (*__kmp_indirect_get_flags[DYNA_NUM_I_LOCKS])(kmp_user_lock_p) = { 0 };
3186
3187// Use different lock pools for different lock types.
3188static kmp_indirect_lock_t * __kmp_indirect_lock_pool[DYNA_NUM_I_LOCKS] = { 0 };
3189
3190// Inserts the given lock ptr to the lock table.
3191kmp_lock_index_t
3192__kmp_insert_indirect_lock(kmp_indirect_lock_t *lck)
3193{
3194 kmp_lock_index_t next = __kmp_indirect_lock_table_next;
3195 // Check capacity and double the size if required
3196 if (next >= __kmp_indirect_lock_table_size) {
3197 kmp_lock_index_t i;
3198 kmp_lock_index_t size = __kmp_indirect_lock_table_size;
3199 kmp_indirect_lock_t **old_table = __kmp_indirect_lock_table;
3200 __kmp_indirect_lock_table = (kmp_indirect_lock_t **)__kmp_allocate(2*next*sizeof(kmp_indirect_lock_t *));
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00003201 KMP_MEMCPY(__kmp_indirect_lock_table, old_table, next*sizeof(kmp_indirect_lock_t *));
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003202 __kmp_free(old_table);
3203 __kmp_indirect_lock_table_size = 2*next;
3204 }
3205 // Insert lck to the table and return the index.
3206 __kmp_indirect_lock_table[next] = lck;
3207 __kmp_indirect_lock_table_next++;
3208 return next;
3209}
3210
3211// User lock allocator for dynamically dispatched locks.
3212kmp_indirect_lock_t *
3213__kmp_allocate_indirect_lock(void **user_lock, kmp_int32 gtid, kmp_indirect_locktag_t tag)
3214{
3215 kmp_indirect_lock_t *lck;
3216 kmp_lock_index_t idx;
3217
3218 __kmp_acquire_lock(&__kmp_global_lock, gtid);
3219
3220 if (__kmp_indirect_lock_pool[tag] != NULL) {
3221 lck = __kmp_indirect_lock_pool[tag];
3222 if (OMP_LOCK_T_SIZE < sizeof(void *))
3223 idx = lck->lock->pool.index;
3224 __kmp_indirect_lock_pool[tag] = (kmp_indirect_lock_t *)lck->lock->pool.next;
3225 } else {
3226 lck = (kmp_indirect_lock_t *)__kmp_allocate(sizeof(kmp_indirect_lock_t));
3227 lck->lock = (kmp_user_lock_p)__kmp_allocate(__kmp_indirect_lock_size[tag]);
3228 if (OMP_LOCK_T_SIZE < sizeof(void *))
3229 idx = __kmp_insert_indirect_lock(lck);
3230 }
3231
3232 __kmp_release_lock(&__kmp_global_lock, gtid);
3233
3234 lck->type = tag;
3235
3236 if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3237 *((kmp_lock_index_t *)user_lock) = idx << 1; // indirect lock word must be even.
3238 } else {
3239 *((kmp_indirect_lock_t **)user_lock) = lck;
3240 }
3241
3242 return lck;
3243}
3244
3245// User lock lookup for dynamically dispatched locks.
3246static __forceinline
3247kmp_indirect_lock_t *
3248__kmp_lookup_indirect_lock(void **user_lock, const char *func)
3249{
3250 if (__kmp_env_consistency_check) {
3251 kmp_indirect_lock_t *lck = NULL;
3252 if (user_lock == NULL) {
3253 KMP_FATAL(LockIsUninitialized, func);
3254 }
3255 if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3256 kmp_lock_index_t idx = DYNA_EXTRACT_I_INDEX(user_lock);
3257 if (idx < 0 || idx >= __kmp_indirect_lock_table_size) {
3258 KMP_FATAL(LockIsUninitialized, func);
3259 }
3260 lck = __kmp_indirect_lock_table[idx];
3261 } else {
3262 lck = *((kmp_indirect_lock_t **)user_lock);
3263 }
3264 if (lck == NULL) {
3265 KMP_FATAL(LockIsUninitialized, func);
3266 }
3267 return lck;
3268 } else {
3269 if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3270 return __kmp_indirect_lock_table[DYNA_EXTRACT_I_INDEX(user_lock)];
3271 } else {
3272 return *((kmp_indirect_lock_t **)user_lock);
3273 }
3274 }
3275}
3276
3277static void
3278__kmp_init_indirect_lock(kmp_dyna_lock_t * lock, kmp_dyna_lockseq_t seq)
3279{
3280#if KMP_USE_ADAPTIVE_LOCKS
3281 if (seq == lockseq_adaptive && !__kmp_cpuinfo.rtm) {
3282 KMP_WARNING(AdaptiveNotSupported, "kmp_lockseq_t", "adaptive");
3283 seq = lockseq_queuing;
3284 }
3285#endif
3286 kmp_indirect_locktag_t tag = DYNA_GET_I_TAG(seq);
3287 kmp_indirect_lock_t *l = __kmp_allocate_indirect_lock((void **)lock, __kmp_entry_gtid(), tag);
3288 DYNA_I_LOCK_FUNC(l, init)(l->lock);
3289 KA_TRACE(20, ("__kmp_init_indirect_lock: initialized indirect lock, tag = %x\n", l->type));
3290}
3291
3292static void
3293__kmp_destroy_indirect_lock(kmp_dyna_lock_t * lock)
3294{
3295 kmp_uint32 gtid = __kmp_entry_gtid();
3296 kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_destroy_lock");
3297 DYNA_I_LOCK_FUNC(l, destroy)(l->lock);
3298 kmp_indirect_locktag_t tag = l->type;
3299
3300 __kmp_acquire_lock(&__kmp_global_lock, gtid);
3301
3302 // Use the base lock's space to keep the pool chain.
3303 l->lock->pool.next = (kmp_user_lock_p)__kmp_indirect_lock_pool[tag];
3304 if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3305 l->lock->pool.index = DYNA_EXTRACT_I_INDEX(lock);
3306 }
3307 __kmp_indirect_lock_pool[tag] = l;
3308
3309 __kmp_release_lock(&__kmp_global_lock, gtid);
3310}
3311
3312static void
3313__kmp_set_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32 gtid)
3314{
3315 kmp_indirect_lock_t *l = DYNA_LOOKUP_I_LOCK(lock);
3316 DYNA_I_LOCK_FUNC(l, set)(l->lock, gtid);
3317}
3318
3319static void
3320__kmp_unset_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32 gtid)
3321{
3322 kmp_indirect_lock_t *l = DYNA_LOOKUP_I_LOCK(lock);
3323 DYNA_I_LOCK_FUNC(l, unset)(l->lock, gtid);
3324}
3325
3326static int
3327__kmp_test_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32 gtid)
3328{
3329 kmp_indirect_lock_t *l = DYNA_LOOKUP_I_LOCK(lock);
3330 return DYNA_I_LOCK_FUNC(l, test)(l->lock, gtid);
3331}
3332
3333static void
3334__kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32 gtid)
3335{
3336 kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_set_lock");
3337 DYNA_I_LOCK_FUNC(l, set)(l->lock, gtid);
3338}
3339
3340static void
3341__kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32 gtid)
3342{
3343 kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_unset_lock");
3344 DYNA_I_LOCK_FUNC(l, unset)(l->lock, gtid);
3345}
3346
3347static int
3348__kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32 gtid)
3349{
3350 kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_test_lock");
3351 return DYNA_I_LOCK_FUNC(l, test)(l->lock, gtid);
3352}
3353
3354kmp_dyna_lockseq_t __kmp_user_lock_seq = lockseq_queuing;
3355
3356// Initialize a hinted lock.
3357void
3358__kmp_init_lock_hinted(void **lock, int hint)
3359{
3360 kmp_dyna_lockseq_t seq;
3361 switch (hint) {
3362 case kmp_lock_hint_uncontended:
3363 seq = lockseq_tas;
3364 break;
3365 case kmp_lock_hint_speculative:
3366#if DYNA_HAS_HLE
3367 seq = lockseq_hle;
3368#else
3369 seq = lockseq_tas;
3370#endif
3371 break;
3372 case kmp_lock_hint_adaptive:
3373#if KMP_USE_ADAPTIVE_LOCKS
3374 seq = lockseq_adaptive;
3375#else
3376 seq = lockseq_queuing;
3377#endif
3378 break;
3379 // Defaults to queuing locks.
3380 case kmp_lock_hint_contended:
3381 case kmp_lock_hint_nonspeculative:
3382 default:
3383 seq = lockseq_queuing;
3384 break;
3385 }
3386 if (DYNA_IS_D_LOCK(seq)) {
3387 DYNA_INIT_D_LOCK(lock, seq);
3388#if USE_ITT_BUILD
3389 __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL);
3390#endif
3391 } else {
3392 DYNA_INIT_I_LOCK(lock, seq);
3393#if USE_ITT_BUILD
3394 kmp_indirect_lock_t *ilk = DYNA_LOOKUP_I_LOCK(lock);
3395 __kmp_itt_lock_creating(ilk->lock, NULL);
3396#endif
3397 }
3398}
3399
3400// This is used only in kmp_error.c when consistency checking is on.
3401kmp_int32
3402__kmp_get_user_lock_owner(kmp_user_lock_p lck, kmp_uint32 seq)
3403{
3404 switch (seq) {
3405 case lockseq_tas:
3406 case lockseq_nested_tas:
3407 return __kmp_get_tas_lock_owner((kmp_tas_lock_t *)lck);
3408#if DYNA_HAS_FUTEX
3409 case lockseq_futex:
3410 case lockseq_nested_futex:
3411 return __kmp_get_futex_lock_owner((kmp_futex_lock_t *)lck);
3412#endif
3413 case lockseq_ticket:
3414 case lockseq_nested_ticket:
3415 return __kmp_get_ticket_lock_owner((kmp_ticket_lock_t *)lck);
3416 case lockseq_queuing:
3417 case lockseq_nested_queuing:
3418#if KMP_USE_ADAPTIVE_LOCKS
3419 case lockseq_adaptive:
3420 return __kmp_get_queuing_lock_owner((kmp_queuing_lock_t *)lck);
3421#endif
3422 case lockseq_drdpa:
3423 case lockseq_nested_drdpa:
3424 return __kmp_get_drdpa_lock_owner((kmp_drdpa_lock_t *)lck);
3425 default:
3426 return 0;
3427 }
3428}
3429
3430// The value initialized from KMP_LOCK_KIND needs to be translated to its
3431// nested version.
3432void
3433__kmp_init_nest_lock_hinted(void **lock, int hint)
3434{
3435 kmp_dyna_lockseq_t seq;
3436 switch (hint) {
3437 case kmp_lock_hint_uncontended:
3438 seq = lockseq_nested_tas;
3439 break;
3440 // Defaults to queuing locks.
3441 case kmp_lock_hint_contended:
3442 case kmp_lock_hint_nonspeculative:
3443 default:
3444 seq = lockseq_nested_queuing;
3445 break;
3446 }
3447 DYNA_INIT_I_LOCK(lock, seq);
3448#if USE_ITT_BUILD
3449 kmp_indirect_lock_t *ilk = DYNA_LOOKUP_I_LOCK(lock);
3450 __kmp_itt_lock_creating(ilk->lock, NULL);
3451#endif
3452}
3453
3454// Initializes the lock table for indirect locks.
3455static void
3456__kmp_init_indirect_lock_table()
3457{
3458 __kmp_indirect_lock_table = (kmp_indirect_lock_t **)__kmp_allocate(sizeof(kmp_indirect_lock_t *)*1024);
3459 __kmp_indirect_lock_table_size = 1024;
3460 __kmp_indirect_lock_table_next = 0;
3461}
3462
3463#if KMP_USE_ADAPTIVE_LOCKS
3464# define init_lock_func(table, expand) { \
3465 table[locktag_ticket] = expand(ticket); \
3466 table[locktag_queuing] = expand(queuing); \
3467 table[locktag_adaptive] = expand(queuing); \
3468 table[locktag_drdpa] = expand(drdpa); \
3469 table[locktag_nested_ticket] = expand(ticket); \
3470 table[locktag_nested_queuing] = expand(queuing); \
3471 table[locktag_nested_drdpa] = expand(drdpa); \
3472}
3473#else
3474# define init_lock_func(table, expand) { \
3475 table[locktag_ticket] = expand(ticket); \
3476 table[locktag_queuing] = expand(queuing); \
3477 table[locktag_drdpa] = expand(drdpa); \
3478 table[locktag_nested_ticket] = expand(ticket); \
3479 table[locktag_nested_queuing] = expand(queuing); \
3480 table[locktag_nested_drdpa] = expand(drdpa); \
3481}
3482#endif // KMP_USE_ADAPTIVE_LOCKS
3483
3484// Initializes data for dynamic user locks.
3485void
3486__kmp_init_dynamic_user_locks()
3487{
3488 // Initialize jump table location
3489 int offset = (__kmp_env_consistency_check)? 1: 0;
3490 __kmp_direct_set_ops = direct_set_tab[offset];
3491 __kmp_direct_unset_ops = direct_unset_tab[offset];
3492 __kmp_direct_test_ops = direct_test_tab[offset];
3493 __kmp_indirect_set_ops = indirect_set_tab[offset];
3494 __kmp_indirect_unset_ops = indirect_unset_tab[offset];
3495 __kmp_indirect_test_ops = indirect_test_tab[offset];
3496 __kmp_init_indirect_lock_table();
3497
3498 // Initialize lock accessor/modifier
3499 // Could have used designated initializer, but -TP /Qstd=c99 did not work with icl.exe.
3500#define expand_func(l) (void (*)(kmp_user_lock_p, const ident_t *))__kmp_set_##l##_lock_location
3501 init_lock_func(__kmp_indirect_set_location, expand_func);
3502#undef expand_func
3503#define expand_func(l) (void (*)(kmp_user_lock_p, kmp_lock_flags_t))__kmp_set_##l##_lock_flags
3504 init_lock_func(__kmp_indirect_set_flags, expand_func);
3505#undef expand_func
3506#define expand_func(l) (const ident_t * (*)(kmp_user_lock_p))__kmp_get_##l##_lock_location
3507 init_lock_func(__kmp_indirect_get_location, expand_func);
3508#undef expand_func
3509#define expand_func(l) (kmp_lock_flags_t (*)(kmp_user_lock_p))__kmp_get_##l##_lock_flags
3510 init_lock_func(__kmp_indirect_get_flags, expand_func);
3511#undef expand_func
3512
3513 __kmp_init_user_locks = TRUE;
3514}
3515
3516// Clean up the lock table.
3517void
3518__kmp_cleanup_indirect_user_locks()
3519{
3520 kmp_lock_index_t i;
3521 int k;
3522
3523 // Clean up locks in the pools first (they were already destroyed before going into the pools).
3524 for (k = 0; k < DYNA_NUM_I_LOCKS; ++k) {
3525 kmp_indirect_lock_t *l = __kmp_indirect_lock_pool[k];
3526 while (l != NULL) {
3527 kmp_indirect_lock_t *ll = l;
3528 l = (kmp_indirect_lock_t *)l->lock->pool.next;
3529 if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3530 __kmp_indirect_lock_table[ll->lock->pool.index] = NULL;
3531 }
3532 __kmp_free(ll->lock);
3533 __kmp_free(ll);
3534 }
3535 }
3536 // Clean up the remaining undestroyed locks.
3537 for (i = 0; i < __kmp_indirect_lock_table_next; i++) {
3538 kmp_indirect_lock_t *l = __kmp_indirect_lock_table[i];
3539 if (l != NULL) {
3540 // Locks not destroyed explicitly need to be destroyed here.
3541 DYNA_I_LOCK_FUNC(l, destroy)(l->lock);
3542 __kmp_free(l->lock);
3543 __kmp_free(l);
3544 }
3545 }
3546 // Free the table
3547 __kmp_free(__kmp_indirect_lock_table);
3548
3549 __kmp_init_user_locks = FALSE;
3550}
3551
3552enum kmp_lock_kind __kmp_user_lock_kind = lk_default;
3553int __kmp_num_locks_in_block = 1; // FIXME - tune this value
3554
3555#else // KMP_USE_DYNAMIC_LOCK
3556
Jim Cownie5e8470a2013-09-27 10:38:44 +00003557/* ------------------------------------------------------------------------ */
3558/* user locks
3559 *
3560 * They are implemented as a table of function pointers which are set to the
3561 * lock functions of the appropriate kind, once that has been determined.
3562 */
3563
3564enum kmp_lock_kind __kmp_user_lock_kind = lk_default;
3565
3566size_t __kmp_base_user_lock_size = 0;
3567size_t __kmp_user_lock_size = 0;
3568
3569kmp_int32 ( *__kmp_get_user_lock_owner_ )( kmp_user_lock_p lck ) = NULL;
3570void ( *__kmp_acquire_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
3571
3572int ( *__kmp_test_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00003573int ( *__kmp_release_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003574void ( *__kmp_init_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL;
3575void ( *__kmp_destroy_user_lock_ )( kmp_user_lock_p lck ) = NULL;
3576void ( *__kmp_destroy_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL;
3577void ( *__kmp_acquire_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
3578
3579int ( *__kmp_test_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00003580int ( *__kmp_release_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003581void ( *__kmp_init_nested_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL;
3582void ( *__kmp_destroy_nested_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL;
3583
3584int ( *__kmp_is_user_lock_initialized_ )( kmp_user_lock_p lck ) = NULL;
3585const ident_t * ( *__kmp_get_user_lock_location_ )( kmp_user_lock_p lck ) = NULL;
3586void ( *__kmp_set_user_lock_location_ )( kmp_user_lock_p lck, const ident_t *loc ) = NULL;
3587kmp_lock_flags_t ( *__kmp_get_user_lock_flags_ )( kmp_user_lock_p lck ) = NULL;
3588void ( *__kmp_set_user_lock_flags_ )( kmp_user_lock_p lck, kmp_lock_flags_t flags ) = NULL;
3589
3590void __kmp_set_user_lock_vptrs( kmp_lock_kind_t user_lock_kind )
3591{
3592 switch ( user_lock_kind ) {
3593 case lk_default:
3594 default:
3595 KMP_ASSERT( 0 );
3596
3597 case lk_tas: {
3598 __kmp_base_user_lock_size = sizeof( kmp_base_tas_lock_t );
3599 __kmp_user_lock_size = sizeof( kmp_tas_lock_t );
3600
3601 __kmp_get_user_lock_owner_ =
3602 ( kmp_int32 ( * )( kmp_user_lock_p ) )
3603 ( &__kmp_get_tas_lock_owner );
3604
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003605 if ( __kmp_env_consistency_check ) {
3606 KMP_BIND_USER_LOCK_WITH_CHECKS(tas);
3607 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(tas);
3608 }
3609 else {
3610 KMP_BIND_USER_LOCK(tas);
3611 KMP_BIND_NESTED_USER_LOCK(tas);
3612 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003613
3614 __kmp_destroy_user_lock_ =
3615 ( void ( * )( kmp_user_lock_p ) )
3616 ( &__kmp_destroy_tas_lock );
3617
Jim Cownie5e8470a2013-09-27 10:38:44 +00003618 __kmp_is_user_lock_initialized_ =
3619 ( int ( * )( kmp_user_lock_p ) ) NULL;
3620
3621 __kmp_get_user_lock_location_ =
3622 ( const ident_t * ( * )( kmp_user_lock_p ) ) NULL;
3623
3624 __kmp_set_user_lock_location_ =
3625 ( void ( * )( kmp_user_lock_p, const ident_t * ) ) NULL;
3626
3627 __kmp_get_user_lock_flags_ =
3628 ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) NULL;
3629
3630 __kmp_set_user_lock_flags_ =
3631 ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) NULL;
3632 }
3633 break;
3634
Jim Cownie181b4bb2013-12-23 17:28:57 +00003635#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
Jim Cownie5e8470a2013-09-27 10:38:44 +00003636
3637 case lk_futex: {
3638 __kmp_base_user_lock_size = sizeof( kmp_base_futex_lock_t );
3639 __kmp_user_lock_size = sizeof( kmp_futex_lock_t );
3640
3641 __kmp_get_user_lock_owner_ =
3642 ( kmp_int32 ( * )( kmp_user_lock_p ) )
3643 ( &__kmp_get_futex_lock_owner );
3644
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003645 if ( __kmp_env_consistency_check ) {
3646 KMP_BIND_USER_LOCK_WITH_CHECKS(futex);
3647 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(futex);
3648 }
3649 else {
3650 KMP_BIND_USER_LOCK(futex);
3651 KMP_BIND_NESTED_USER_LOCK(futex);
3652 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003653
3654 __kmp_destroy_user_lock_ =
3655 ( void ( * )( kmp_user_lock_p ) )
3656 ( &__kmp_destroy_futex_lock );
3657
Jim Cownie5e8470a2013-09-27 10:38:44 +00003658 __kmp_is_user_lock_initialized_ =
3659 ( int ( * )( kmp_user_lock_p ) ) NULL;
3660
3661 __kmp_get_user_lock_location_ =
3662 ( const ident_t * ( * )( kmp_user_lock_p ) ) NULL;
3663
3664 __kmp_set_user_lock_location_ =
3665 ( void ( * )( kmp_user_lock_p, const ident_t * ) ) NULL;
3666
3667 __kmp_get_user_lock_flags_ =
3668 ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) NULL;
3669
3670 __kmp_set_user_lock_flags_ =
3671 ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) NULL;
3672 }
3673 break;
3674
Jim Cownie181b4bb2013-12-23 17:28:57 +00003675#endif // KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
Jim Cownie5e8470a2013-09-27 10:38:44 +00003676
3677 case lk_ticket: {
3678 __kmp_base_user_lock_size = sizeof( kmp_base_ticket_lock_t );
3679 __kmp_user_lock_size = sizeof( kmp_ticket_lock_t );
3680
3681 __kmp_get_user_lock_owner_ =
3682 ( kmp_int32 ( * )( kmp_user_lock_p ) )
3683 ( &__kmp_get_ticket_lock_owner );
3684
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003685 if ( __kmp_env_consistency_check ) {
3686 KMP_BIND_USER_LOCK_WITH_CHECKS(ticket);
3687 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(ticket);
3688 }
3689 else {
3690 KMP_BIND_USER_LOCK(ticket);
3691 KMP_BIND_NESTED_USER_LOCK(ticket);
3692 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003693
3694 __kmp_destroy_user_lock_ =
3695 ( void ( * )( kmp_user_lock_p ) )
3696 ( &__kmp_destroy_ticket_lock );
3697
Jim Cownie5e8470a2013-09-27 10:38:44 +00003698 __kmp_is_user_lock_initialized_ =
3699 ( int ( * )( kmp_user_lock_p ) )
3700 ( &__kmp_is_ticket_lock_initialized );
3701
3702 __kmp_get_user_lock_location_ =
3703 ( const ident_t * ( * )( kmp_user_lock_p ) )
3704 ( &__kmp_get_ticket_lock_location );
3705
3706 __kmp_set_user_lock_location_ =
3707 ( void ( * )( kmp_user_lock_p, const ident_t * ) )
3708 ( &__kmp_set_ticket_lock_location );
3709
3710 __kmp_get_user_lock_flags_ =
3711 ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) )
3712 ( &__kmp_get_ticket_lock_flags );
3713
3714 __kmp_set_user_lock_flags_ =
3715 ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) )
3716 ( &__kmp_set_ticket_lock_flags );
3717 }
3718 break;
3719
3720 case lk_queuing: {
3721 __kmp_base_user_lock_size = sizeof( kmp_base_queuing_lock_t );
3722 __kmp_user_lock_size = sizeof( kmp_queuing_lock_t );
3723
3724 __kmp_get_user_lock_owner_ =
3725 ( kmp_int32 ( * )( kmp_user_lock_p ) )
3726 ( &__kmp_get_queuing_lock_owner );
3727
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003728 if ( __kmp_env_consistency_check ) {
3729 KMP_BIND_USER_LOCK_WITH_CHECKS(queuing);
3730 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(queuing);
3731 }
3732 else {
3733 KMP_BIND_USER_LOCK(queuing);
3734 KMP_BIND_NESTED_USER_LOCK(queuing);
3735 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003736
3737 __kmp_destroy_user_lock_ =
3738 ( void ( * )( kmp_user_lock_p ) )
3739 ( &__kmp_destroy_queuing_lock );
3740
Jim Cownie5e8470a2013-09-27 10:38:44 +00003741 __kmp_is_user_lock_initialized_ =
3742 ( int ( * )( kmp_user_lock_p ) )
3743 ( &__kmp_is_queuing_lock_initialized );
3744
3745 __kmp_get_user_lock_location_ =
3746 ( const ident_t * ( * )( kmp_user_lock_p ) )
3747 ( &__kmp_get_queuing_lock_location );
3748
3749 __kmp_set_user_lock_location_ =
3750 ( void ( * )( kmp_user_lock_p, const ident_t * ) )
3751 ( &__kmp_set_queuing_lock_location );
3752
3753 __kmp_get_user_lock_flags_ =
3754 ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) )
3755 ( &__kmp_get_queuing_lock_flags );
3756
3757 __kmp_set_user_lock_flags_ =
3758 ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) )
3759 ( &__kmp_set_queuing_lock_flags );
3760 }
3761 break;
3762
3763#if KMP_USE_ADAPTIVE_LOCKS
3764 case lk_adaptive: {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003765 __kmp_base_user_lock_size = sizeof( kmp_base_adaptive_lock_t );
3766 __kmp_user_lock_size = sizeof( kmp_adaptive_lock_t );
Jim Cownie5e8470a2013-09-27 10:38:44 +00003767
3768 __kmp_get_user_lock_owner_ =
3769 ( kmp_int32 ( * )( kmp_user_lock_p ) )
3770 ( &__kmp_get_queuing_lock_owner );
3771
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003772 if ( __kmp_env_consistency_check ) {
3773 KMP_BIND_USER_LOCK_WITH_CHECKS(adaptive);
3774 }
3775 else {
3776 KMP_BIND_USER_LOCK(adaptive);
3777 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003778
3779 __kmp_destroy_user_lock_ =
3780 ( void ( * )( kmp_user_lock_p ) )
3781 ( &__kmp_destroy_adaptive_lock );
3782
3783 __kmp_is_user_lock_initialized_ =
3784 ( int ( * )( kmp_user_lock_p ) )
3785 ( &__kmp_is_queuing_lock_initialized );
3786
3787 __kmp_get_user_lock_location_ =
3788 ( const ident_t * ( * )( kmp_user_lock_p ) )
3789 ( &__kmp_get_queuing_lock_location );
3790
3791 __kmp_set_user_lock_location_ =
3792 ( void ( * )( kmp_user_lock_p, const ident_t * ) )
3793 ( &__kmp_set_queuing_lock_location );
3794
3795 __kmp_get_user_lock_flags_ =
3796 ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) )
3797 ( &__kmp_get_queuing_lock_flags );
3798
3799 __kmp_set_user_lock_flags_ =
3800 ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) )
3801 ( &__kmp_set_queuing_lock_flags );
3802
3803 }
3804 break;
3805#endif // KMP_USE_ADAPTIVE_LOCKS
3806
3807 case lk_drdpa: {
3808 __kmp_base_user_lock_size = sizeof( kmp_base_drdpa_lock_t );
3809 __kmp_user_lock_size = sizeof( kmp_drdpa_lock_t );
3810
3811 __kmp_get_user_lock_owner_ =
3812 ( kmp_int32 ( * )( kmp_user_lock_p ) )
3813 ( &__kmp_get_drdpa_lock_owner );
3814
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003815 if ( __kmp_env_consistency_check ) {
3816 KMP_BIND_USER_LOCK_WITH_CHECKS(drdpa);
3817 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(drdpa);
3818 }
3819 else {
3820 KMP_BIND_USER_LOCK(drdpa);
3821 KMP_BIND_NESTED_USER_LOCK(drdpa);
3822 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003823
3824 __kmp_destroy_user_lock_ =
3825 ( void ( * )( kmp_user_lock_p ) )
3826 ( &__kmp_destroy_drdpa_lock );
3827
Jim Cownie5e8470a2013-09-27 10:38:44 +00003828 __kmp_is_user_lock_initialized_ =
3829 ( int ( * )( kmp_user_lock_p ) )
3830 ( &__kmp_is_drdpa_lock_initialized );
3831
3832 __kmp_get_user_lock_location_ =
3833 ( const ident_t * ( * )( kmp_user_lock_p ) )
3834 ( &__kmp_get_drdpa_lock_location );
3835
3836 __kmp_set_user_lock_location_ =
3837 ( void ( * )( kmp_user_lock_p, const ident_t * ) )
3838 ( &__kmp_set_drdpa_lock_location );
3839
3840 __kmp_get_user_lock_flags_ =
3841 ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) )
3842 ( &__kmp_get_drdpa_lock_flags );
3843
3844 __kmp_set_user_lock_flags_ =
3845 ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) )
3846 ( &__kmp_set_drdpa_lock_flags );
3847 }
3848 break;
3849 }
3850}
3851
3852
3853// ----------------------------------------------------------------------------
3854// User lock table & lock allocation
3855
3856kmp_lock_table_t __kmp_user_lock_table = { 1, 0, NULL };
3857kmp_user_lock_p __kmp_lock_pool = NULL;
3858
3859// Lock block-allocation support.
3860kmp_block_of_locks* __kmp_lock_blocks = NULL;
3861int __kmp_num_locks_in_block = 1; // FIXME - tune this value
3862
3863static kmp_lock_index_t
3864__kmp_lock_table_insert( kmp_user_lock_p lck )
3865{
3866 // Assume that kmp_global_lock is held upon entry/exit.
3867 kmp_lock_index_t index;
3868 if ( __kmp_user_lock_table.used >= __kmp_user_lock_table.allocated ) {
3869 kmp_lock_index_t size;
3870 kmp_user_lock_p *table;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003871 // Reallocate lock table.
3872 if ( __kmp_user_lock_table.allocated == 0 ) {
3873 size = 1024;
3874 }
3875 else {
3876 size = __kmp_user_lock_table.allocated * 2;
3877 }
3878 table = (kmp_user_lock_p *)__kmp_allocate( sizeof( kmp_user_lock_p ) * size );
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00003879 KMP_MEMCPY( table + 1, __kmp_user_lock_table.table + 1, sizeof( kmp_user_lock_p ) * ( __kmp_user_lock_table.used - 1 ) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00003880 table[ 0 ] = (kmp_user_lock_p)__kmp_user_lock_table.table;
Jonathan Peyton1406f012015-05-22 22:35:51 +00003881 // We cannot free the previous table now, since it may be in use by other
Jim Cownie5e8470a2013-09-27 10:38:44 +00003882 // threads. So save the pointer to the previous table in in the first element of the
3883 // new table. All the tables will be organized into a list, and could be freed when
3884 // library shutting down.
3885 __kmp_user_lock_table.table = table;
3886 __kmp_user_lock_table.allocated = size;
3887 }
3888 KMP_DEBUG_ASSERT( __kmp_user_lock_table.used < __kmp_user_lock_table.allocated );
3889 index = __kmp_user_lock_table.used;
3890 __kmp_user_lock_table.table[ index ] = lck;
3891 ++ __kmp_user_lock_table.used;
3892 return index;
3893}
3894
3895static kmp_user_lock_p
3896__kmp_lock_block_allocate()
3897{
3898 // Assume that kmp_global_lock is held upon entry/exit.
3899 static int last_index = 0;
3900 if ( ( last_index >= __kmp_num_locks_in_block )
3901 || ( __kmp_lock_blocks == NULL ) ) {
3902 // Restart the index.
3903 last_index = 0;
3904 // Need to allocate a new block.
3905 KMP_DEBUG_ASSERT( __kmp_user_lock_size > 0 );
3906 size_t space_for_locks = __kmp_user_lock_size * __kmp_num_locks_in_block;
3907 char* buffer = (char*)__kmp_allocate( space_for_locks + sizeof( kmp_block_of_locks ) );
3908 // Set up the new block.
3909 kmp_block_of_locks *new_block = (kmp_block_of_locks *)(& buffer[space_for_locks]);
3910 new_block->next_block = __kmp_lock_blocks;
3911 new_block->locks = (void *)buffer;
3912 // Publish the new block.
3913 KMP_MB();
3914 __kmp_lock_blocks = new_block;
3915 }
3916 kmp_user_lock_p ret = (kmp_user_lock_p)(& ( ( (char *)( __kmp_lock_blocks->locks ) )
3917 [ last_index * __kmp_user_lock_size ] ) );
3918 last_index++;
3919 return ret;
3920}
3921
3922//
3923// Get memory for a lock. It may be freshly allocated memory or reused memory
3924// from lock pool.
3925//
3926kmp_user_lock_p
3927__kmp_user_lock_allocate( void **user_lock, kmp_int32 gtid,
3928 kmp_lock_flags_t flags )
3929{
3930 kmp_user_lock_p lck;
3931 kmp_lock_index_t index;
3932 KMP_DEBUG_ASSERT( user_lock );
3933
3934 __kmp_acquire_lock( &__kmp_global_lock, gtid );
3935
3936 if ( __kmp_lock_pool == NULL ) {
3937 // Lock pool is empty. Allocate new memory.
3938 if ( __kmp_num_locks_in_block <= 1 ) { // Tune this cutoff point.
3939 lck = (kmp_user_lock_p) __kmp_allocate( __kmp_user_lock_size );
3940 }
3941 else {
3942 lck = __kmp_lock_block_allocate();
3943 }
3944
3945 // Insert lock in the table so that it can be freed in __kmp_cleanup,
3946 // and debugger has info on all allocated locks.
3947 index = __kmp_lock_table_insert( lck );
3948 }
3949 else {
3950 // Pick up lock from pool.
3951 lck = __kmp_lock_pool;
3952 index = __kmp_lock_pool->pool.index;
3953 __kmp_lock_pool = __kmp_lock_pool->pool.next;
3954 }
3955
3956 //
3957 // We could potentially differentiate between nested and regular locks
3958 // here, and do the lock table lookup for regular locks only.
3959 //
3960 if ( OMP_LOCK_T_SIZE < sizeof(void *) ) {
3961 * ( (kmp_lock_index_t *) user_lock ) = index;
3962 }
3963 else {
3964 * ( (kmp_user_lock_p *) user_lock ) = lck;
3965 }
3966
3967 // mark the lock if it is critical section lock.
3968 __kmp_set_user_lock_flags( lck, flags );
3969
3970 __kmp_release_lock( & __kmp_global_lock, gtid ); // AC: TODO: move this line upper
3971
3972 return lck;
3973}
3974
3975// Put lock's memory to pool for reusing.
3976void
3977__kmp_user_lock_free( void **user_lock, kmp_int32 gtid, kmp_user_lock_p lck )
3978{
Jim Cownie5e8470a2013-09-27 10:38:44 +00003979 KMP_DEBUG_ASSERT( user_lock != NULL );
3980 KMP_DEBUG_ASSERT( lck != NULL );
3981
3982 __kmp_acquire_lock( & __kmp_global_lock, gtid );
3983
3984 lck->pool.next = __kmp_lock_pool;
3985 __kmp_lock_pool = lck;
3986 if ( OMP_LOCK_T_SIZE < sizeof(void *) ) {
3987 kmp_lock_index_t index = * ( (kmp_lock_index_t *) user_lock );
3988 KMP_DEBUG_ASSERT( 0 < index && index <= __kmp_user_lock_table.used );
3989 lck->pool.index = index;
3990 }
3991
3992 __kmp_release_lock( & __kmp_global_lock, gtid );
3993}
3994
3995kmp_user_lock_p
3996__kmp_lookup_user_lock( void **user_lock, char const *func )
3997{
3998 kmp_user_lock_p lck = NULL;
3999
4000 if ( __kmp_env_consistency_check ) {
4001 if ( user_lock == NULL ) {
4002 KMP_FATAL( LockIsUninitialized, func );
4003 }
4004 }
4005
4006 if ( OMP_LOCK_T_SIZE < sizeof(void *) ) {
4007 kmp_lock_index_t index = *( (kmp_lock_index_t *)user_lock );
4008 if ( __kmp_env_consistency_check ) {
4009 if ( ! ( 0 < index && index < __kmp_user_lock_table.used ) ) {
4010 KMP_FATAL( LockIsUninitialized, func );
4011 }
4012 }
4013 KMP_DEBUG_ASSERT( 0 < index && index < __kmp_user_lock_table.used );
4014 KMP_DEBUG_ASSERT( __kmp_user_lock_size > 0 );
4015 lck = __kmp_user_lock_table.table[index];
4016 }
4017 else {
4018 lck = *( (kmp_user_lock_p *)user_lock );
4019 }
4020
4021 if ( __kmp_env_consistency_check ) {
4022 if ( lck == NULL ) {
4023 KMP_FATAL( LockIsUninitialized, func );
4024 }
4025 }
4026
4027 return lck;
4028}
4029
4030void
4031__kmp_cleanup_user_locks( void )
4032{
4033 //
4034 // Reset lock pool. Do not worry about lock in the pool -- we will free
4035 // them when iterating through lock table (it includes all the locks,
4036 // dead or alive).
4037 //
4038 __kmp_lock_pool = NULL;
4039
4040#define IS_CRITICAL(lck) \
4041 ( ( __kmp_get_user_lock_flags_ != NULL ) && \
4042 ( ( *__kmp_get_user_lock_flags_ )( lck ) & kmp_lf_critical_section ) )
4043
4044 //
4045 // Loop through lock table, free all locks.
4046 //
4047 // Do not free item [0], it is reserved for lock tables list.
4048 //
4049 // FIXME - we are iterating through a list of (pointers to) objects of
4050 // type union kmp_user_lock, but we have no way of knowing whether the
4051 // base type is currently "pool" or whatever the global user lock type
4052 // is.
4053 //
4054 // We are relying on the fact that for all of the user lock types
4055 // (except "tas"), the first field in the lock struct is the "initialized"
4056 // field, which is set to the address of the lock object itself when
4057 // the lock is initialized. When the union is of type "pool", the
4058 // first field is a pointer to the next object in the free list, which
4059 // will not be the same address as the object itself.
4060 //
4061 // This means that the check ( *__kmp_is_user_lock_initialized_ )( lck )
4062 // will fail for "pool" objects on the free list. This must happen as
4063 // the "location" field of real user locks overlaps the "index" field
4064 // of "pool" objects.
4065 //
4066 // It would be better to run through the free list, and remove all "pool"
4067 // objects from the lock table before executing this loop. However,
4068 // "pool" objects do not always have their index field set (only on
4069 // lin_32e), and I don't want to search the lock table for the address
4070 // of every "pool" object on the free list.
4071 //
4072 while ( __kmp_user_lock_table.used > 1 ) {
4073 const ident *loc;
4074
4075 //
4076 // reduce __kmp_user_lock_table.used before freeing the lock,
4077 // so that state of locks is consistent
4078 //
4079 kmp_user_lock_p lck = __kmp_user_lock_table.table[
4080 --__kmp_user_lock_table.used ];
4081
4082 if ( ( __kmp_is_user_lock_initialized_ != NULL ) &&
4083 ( *__kmp_is_user_lock_initialized_ )( lck ) ) {
4084 //
4085 // Issue a warning if: KMP_CONSISTENCY_CHECK AND lock is
4086 // initialized AND it is NOT a critical section (user is not
4087 // responsible for destroying criticals) AND we know source
4088 // location to report.
4089 //
4090 if ( __kmp_env_consistency_check && ( ! IS_CRITICAL( lck ) ) &&
4091 ( ( loc = __kmp_get_user_lock_location( lck ) ) != NULL ) &&
4092 ( loc->psource != NULL ) ) {
4093 kmp_str_loc_t str_loc = __kmp_str_loc_init( loc->psource, 0 );
Andrey Churbanove8595de2015-02-20 18:19:41 +00004094 KMP_WARNING( CnsLockNotDestroyed, str_loc.file, str_loc.line );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004095 __kmp_str_loc_free( &str_loc);
4096 }
4097
4098#ifdef KMP_DEBUG
4099 if ( IS_CRITICAL( lck ) ) {
4100 KA_TRACE( 20, ("__kmp_cleanup_user_locks: free critical section lock %p (%p)\n", lck, *(void**)lck ) );
4101 }
4102 else {
4103 KA_TRACE( 20, ("__kmp_cleanup_user_locks: free lock %p (%p)\n", lck, *(void**)lck ) );
4104 }
4105#endif // KMP_DEBUG
4106
4107 //
4108 // Cleanup internal lock dynamic resources
4109 // (for drdpa locks particularly).
4110 //
4111 __kmp_destroy_user_lock( lck );
4112 }
4113
4114 //
4115 // Free the lock if block allocation of locks is not used.
4116 //
4117 if ( __kmp_lock_blocks == NULL ) {
4118 __kmp_free( lck );
4119 }
4120 }
4121
4122#undef IS_CRITICAL
4123
4124 //
4125 // delete lock table(s).
4126 //
4127 kmp_user_lock_p *table_ptr = __kmp_user_lock_table.table;
4128 __kmp_user_lock_table.table = NULL;
4129 __kmp_user_lock_table.allocated = 0;
4130
4131 while ( table_ptr != NULL ) {
4132 //
4133 // In the first element we saved the pointer to the previous
4134 // (smaller) lock table.
4135 //
4136 kmp_user_lock_p *next = (kmp_user_lock_p *)( table_ptr[ 0 ] );
4137 __kmp_free( table_ptr );
4138 table_ptr = next;
4139 }
4140
4141 //
4142 // Free buffers allocated for blocks of locks.
4143 //
4144 kmp_block_of_locks_t *block_ptr = __kmp_lock_blocks;
4145 __kmp_lock_blocks = NULL;
4146
4147 while ( block_ptr != NULL ) {
4148 kmp_block_of_locks_t *next = block_ptr->next_block;
4149 __kmp_free( block_ptr->locks );
4150 //
4151 // *block_ptr itself was allocated at the end of the locks vector.
4152 //
4153 block_ptr = next;
4154 }
4155
4156 TCW_4(__kmp_init_user_locks, FALSE);
4157}
4158
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00004159#endif // KMP_USE_DYNAMIC_LOCK