blob: 28d7957d326b4133cae6236870407f3a54bc4f8e [file] [log] [blame]
Jim Cownie5e8470a2013-09-27 10:38:44 +00001/*
2 * kmp_lock.cpp -- lock-related functions
Jim Cownie5e8470a2013-09-27 10:38:44 +00003 */
4
5
6//===----------------------------------------------------------------------===//
7//
8// The LLVM Compiler Infrastructure
9//
10// This file is dual licensed under the MIT and the University of Illinois Open
11// Source Licenses. See LICENSE.txt for details.
12//
13//===----------------------------------------------------------------------===//
14
15
16#include <stddef.h>
17
18#include "kmp.h"
19#include "kmp_itt.h"
20#include "kmp_i18n.h"
21#include "kmp_lock.h"
22#include "kmp_io.h"
23
Andrey Churbanovcbda8682015-01-13 14:43:35 +000024#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +000025# include <unistd.h>
26# include <sys/syscall.h>
27// We should really include <futex.h>, but that causes compatibility problems on different
28// Linux* OS distributions that either require that you include (or break when you try to include)
29// <pci/types.h>.
30// Since all we need is the two macros below (which are part of the kernel ABI, so can't change)
31// we just define the constants here and don't include <futex.h>
32# ifndef FUTEX_WAIT
33# define FUTEX_WAIT 0
34# endif
35# ifndef FUTEX_WAKE
36# define FUTEX_WAKE 1
37# endif
38#endif
39
Jim Cownie5e8470a2013-09-27 10:38:44 +000040/* Implement spin locks for internal library use. */
41/* The algorithm implemented is Lamport's bakery lock [1974]. */
42
43void
44__kmp_validate_locks( void )
45{
46 int i;
47 kmp_uint32 x, y;
48
49 /* Check to make sure unsigned arithmetic does wraps properly */
50 x = ~((kmp_uint32) 0) - 2;
51 y = x - 2;
52
53 for (i = 0; i < 8; ++i, ++x, ++y) {
54 kmp_uint32 z = (x - y);
55 KMP_ASSERT( z == 2 );
56 }
57
58 KMP_ASSERT( offsetof( kmp_base_queuing_lock, tail_id ) % 8 == 0 );
59}
60
61
62/* ------------------------------------------------------------------------ */
63/* test and set locks */
64
65//
66// For the non-nested locks, we can only assume that the first 4 bytes were
67// allocated, since gcc only allocates 4 bytes for omp_lock_t, and the Intel
68// compiler only allocates a 4 byte pointer on IA-32 architecture. On
69// Windows* OS on Intel(R) 64, we can assume that all 8 bytes were allocated.
70//
71// gcc reserves >= 8 bytes for nested locks, so we can assume that the
72// entire 8 bytes were allocated for nested locks on all 64-bit platforms.
73//
74
75static kmp_int32
76__kmp_get_tas_lock_owner( kmp_tas_lock_t *lck )
77{
Andrey Churbanov5c56fb52015-02-20 18:05:17 +000078 return DYNA_LOCK_STRIP(TCR_4( lck->lk.poll )) - 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +000079}
80
81static inline bool
82__kmp_is_tas_lock_nestable( kmp_tas_lock_t *lck )
83{
84 return lck->lk.depth_locked != -1;
85}
86
87__forceinline static void
88__kmp_acquire_tas_lock_timed_template( kmp_tas_lock_t *lck, kmp_int32 gtid )
89{
90 KMP_MB();
91
92#ifdef USE_LOCK_PROFILE
93 kmp_uint32 curr = TCR_4( lck->lk.poll );
94 if ( ( curr != 0 ) && ( curr != gtid + 1 ) )
95 __kmp_printf( "LOCK CONTENTION: %p\n", lck );
96 /* else __kmp_printf( "." );*/
97#endif /* USE_LOCK_PROFILE */
98
Andrey Churbanov5c56fb52015-02-20 18:05:17 +000099 if ( ( lck->lk.poll == DYNA_LOCK_FREE(tas) )
100 && KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), DYNA_LOCK_FREE(tas), DYNA_LOCK_BUSY(gtid+1, tas) ) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000101 KMP_FSYNC_ACQUIRED(lck);
102 return;
103 }
104
105 kmp_uint32 spins;
106 KMP_FSYNC_PREPARE( lck );
107 KMP_INIT_YIELD( spins );
108 if ( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc :
109 __kmp_xproc ) ) {
110 KMP_YIELD( TRUE );
111 }
112 else {
113 KMP_YIELD_SPIN( spins );
114 }
115
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000116 while ( ( lck->lk.poll != DYNA_LOCK_FREE(tas) ) ||
117 ( ! KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), DYNA_LOCK_FREE(tas), DYNA_LOCK_BUSY(gtid+1, tas) ) ) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000118 //
119 // FIXME - use exponential backoff here
120 //
121 if ( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc :
122 __kmp_xproc ) ) {
123 KMP_YIELD( TRUE );
124 }
125 else {
126 KMP_YIELD_SPIN( spins );
127 }
128 }
129 KMP_FSYNC_ACQUIRED( lck );
130}
131
132void
133__kmp_acquire_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
134{
135 __kmp_acquire_tas_lock_timed_template( lck, gtid );
136}
137
138static void
139__kmp_acquire_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
140{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000141 char const * const func = "omp_set_lock";
142 if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE )
143 && __kmp_is_tas_lock_nestable( lck ) ) {
144 KMP_FATAL( LockNestableUsedAsSimple, func );
145 }
146 if ( ( gtid >= 0 ) && ( __kmp_get_tas_lock_owner( lck ) == gtid ) ) {
147 KMP_FATAL( LockIsAlreadyOwned, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000148 }
149 __kmp_acquire_tas_lock( lck, gtid );
150}
151
152int
153__kmp_test_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
154{
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000155 if ( ( lck->lk.poll == DYNA_LOCK_FREE(tas) )
156 && KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), DYNA_LOCK_FREE(tas), DYNA_LOCK_BUSY(gtid+1, tas) ) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000157 KMP_FSYNC_ACQUIRED( lck );
158 return TRUE;
159 }
160 return FALSE;
161}
162
163static int
164__kmp_test_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
165{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000166 char const * const func = "omp_test_lock";
167 if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE )
168 && __kmp_is_tas_lock_nestable( lck ) ) {
169 KMP_FATAL( LockNestableUsedAsSimple, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000170 }
171 return __kmp_test_tas_lock( lck, gtid );
172}
173
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000174int
Jim Cownie5e8470a2013-09-27 10:38:44 +0000175__kmp_release_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
176{
177 KMP_MB(); /* Flush all pending memory write invalidates. */
178
179 KMP_FSYNC_RELEASING(lck);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000180 KMP_ST_REL32( &(lck->lk.poll), DYNA_LOCK_FREE(tas) );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000181 KMP_MB(); /* Flush all pending memory write invalidates. */
182
183 KMP_YIELD( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc :
184 __kmp_xproc ) );
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000185 return KMP_LOCK_RELEASED;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000186}
187
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000188static int
Jim Cownie5e8470a2013-09-27 10:38:44 +0000189__kmp_release_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
190{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000191 char const * const func = "omp_unset_lock";
192 KMP_MB(); /* in case another processor initialized lock */
193 if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE )
194 && __kmp_is_tas_lock_nestable( lck ) ) {
195 KMP_FATAL( LockNestableUsedAsSimple, func );
196 }
197 if ( __kmp_get_tas_lock_owner( lck ) == -1 ) {
198 KMP_FATAL( LockUnsettingFree, func );
199 }
200 if ( ( gtid >= 0 ) && ( __kmp_get_tas_lock_owner( lck ) >= 0 )
201 && ( __kmp_get_tas_lock_owner( lck ) != gtid ) ) {
202 KMP_FATAL( LockUnsettingSetByAnother, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000203 }
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000204 return __kmp_release_tas_lock( lck, gtid );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000205}
206
207void
208__kmp_init_tas_lock( kmp_tas_lock_t * lck )
209{
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000210 TCW_4( lck->lk.poll, DYNA_LOCK_FREE(tas) );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000211}
212
213static void
214__kmp_init_tas_lock_with_checks( kmp_tas_lock_t * lck )
215{
216 __kmp_init_tas_lock( lck );
217}
218
219void
220__kmp_destroy_tas_lock( kmp_tas_lock_t *lck )
221{
222 lck->lk.poll = 0;
223}
224
225static void
226__kmp_destroy_tas_lock_with_checks( kmp_tas_lock_t *lck )
227{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000228 char const * const func = "omp_destroy_lock";
229 if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE )
230 && __kmp_is_tas_lock_nestable( lck ) ) {
231 KMP_FATAL( LockNestableUsedAsSimple, func );
232 }
233 if ( __kmp_get_tas_lock_owner( lck ) != -1 ) {
234 KMP_FATAL( LockStillOwned, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000235 }
236 __kmp_destroy_tas_lock( lck );
237}
238
239
240//
241// nested test and set locks
242//
243
244void
245__kmp_acquire_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
246{
247 KMP_DEBUG_ASSERT( gtid >= 0 );
248
249 if ( __kmp_get_tas_lock_owner( lck ) == gtid ) {
250 lck->lk.depth_locked += 1;
251 }
252 else {
253 __kmp_acquire_tas_lock_timed_template( lck, gtid );
254 lck->lk.depth_locked = 1;
255 }
256}
257
258static void
259__kmp_acquire_nested_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
260{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000261 char const * const func = "omp_set_nest_lock";
262 if ( ! __kmp_is_tas_lock_nestable( lck ) ) {
263 KMP_FATAL( LockSimpleUsedAsNestable, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000264 }
265 __kmp_acquire_nested_tas_lock( lck, gtid );
266}
267
268int
269__kmp_test_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
270{
271 int retval;
272
273 KMP_DEBUG_ASSERT( gtid >= 0 );
274
275 if ( __kmp_get_tas_lock_owner( lck ) == gtid ) {
276 retval = ++lck->lk.depth_locked;
277 }
278 else if ( !__kmp_test_tas_lock( lck, gtid ) ) {
279 retval = 0;
280 }
281 else {
282 KMP_MB();
283 retval = lck->lk.depth_locked = 1;
284 }
285 return retval;
286}
287
288static int
289__kmp_test_nested_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
290{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000291 char const * const func = "omp_test_nest_lock";
292 if ( ! __kmp_is_tas_lock_nestable( lck ) ) {
293 KMP_FATAL( LockSimpleUsedAsNestable, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000294 }
295 return __kmp_test_nested_tas_lock( lck, gtid );
296}
297
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000298int
Jim Cownie5e8470a2013-09-27 10:38:44 +0000299__kmp_release_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
300{
301 KMP_DEBUG_ASSERT( gtid >= 0 );
302
303 KMP_MB();
304 if ( --(lck->lk.depth_locked) == 0 ) {
305 __kmp_release_tas_lock( lck, gtid );
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000306 return KMP_LOCK_RELEASED;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000307 }
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000308 return KMP_LOCK_STILL_HELD;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000309}
310
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000311static int
Jim Cownie5e8470a2013-09-27 10:38:44 +0000312__kmp_release_nested_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
313{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000314 char const * const func = "omp_unset_nest_lock";
315 KMP_MB(); /* in case another processor initialized lock */
316 if ( ! __kmp_is_tas_lock_nestable( lck ) ) {
317 KMP_FATAL( LockSimpleUsedAsNestable, func );
318 }
319 if ( __kmp_get_tas_lock_owner( lck ) == -1 ) {
320 KMP_FATAL( LockUnsettingFree, func );
321 }
322 if ( __kmp_get_tas_lock_owner( lck ) != gtid ) {
323 KMP_FATAL( LockUnsettingSetByAnother, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000324 }
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000325 return __kmp_release_nested_tas_lock( lck, gtid );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000326}
327
328void
329__kmp_init_nested_tas_lock( kmp_tas_lock_t * lck )
330{
331 __kmp_init_tas_lock( lck );
332 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
333}
334
335static void
336__kmp_init_nested_tas_lock_with_checks( kmp_tas_lock_t * lck )
337{
338 __kmp_init_nested_tas_lock( lck );
339}
340
341void
342__kmp_destroy_nested_tas_lock( kmp_tas_lock_t *lck )
343{
344 __kmp_destroy_tas_lock( lck );
345 lck->lk.depth_locked = 0;
346}
347
348static void
349__kmp_destroy_nested_tas_lock_with_checks( kmp_tas_lock_t *lck )
350{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000351 char const * const func = "omp_destroy_nest_lock";
352 if ( ! __kmp_is_tas_lock_nestable( lck ) ) {
353 KMP_FATAL( LockSimpleUsedAsNestable, func );
354 }
355 if ( __kmp_get_tas_lock_owner( lck ) != -1 ) {
356 KMP_FATAL( LockStillOwned, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000357 }
358 __kmp_destroy_nested_tas_lock( lck );
359}
360
361
Andrey Churbanovcbda8682015-01-13 14:43:35 +0000362#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +0000363
364/* ------------------------------------------------------------------------ */
365/* futex locks */
366
367// futex locks are really just test and set locks, with a different method
368// of handling contention. They take the same amount of space as test and
369// set locks, and are allocated the same way (i.e. use the area allocated by
370// the compiler for non-nested locks / allocate nested locks on the heap).
371
372static kmp_int32
373__kmp_get_futex_lock_owner( kmp_futex_lock_t *lck )
374{
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000375 return DYNA_LOCK_STRIP(( TCR_4( lck->lk.poll ) >> 1 )) - 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000376}
377
378static inline bool
379__kmp_is_futex_lock_nestable( kmp_futex_lock_t *lck )
380{
381 return lck->lk.depth_locked != -1;
382}
383
384__forceinline static void
385__kmp_acquire_futex_lock_timed_template( kmp_futex_lock_t *lck, kmp_int32 gtid )
386{
387 kmp_int32 gtid_code = ( gtid + 1 ) << 1;
388
389 KMP_MB();
390
391#ifdef USE_LOCK_PROFILE
392 kmp_uint32 curr = TCR_4( lck->lk.poll );
393 if ( ( curr != 0 ) && ( curr != gtid_code ) )
394 __kmp_printf( "LOCK CONTENTION: %p\n", lck );
395 /* else __kmp_printf( "." );*/
396#endif /* USE_LOCK_PROFILE */
397
398 KMP_FSYNC_PREPARE( lck );
399 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d entering\n",
400 lck, lck->lk.poll, gtid ) );
401
402 kmp_int32 poll_val;
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000403
404 while ( ( poll_val = KMP_COMPARE_AND_STORE_RET32( & ( lck->lk.poll ), DYNA_LOCK_FREE(futex),
405 DYNA_LOCK_BUSY(gtid_code, futex) ) ) != DYNA_LOCK_FREE(futex) ) {
406
407 kmp_int32 cond = DYNA_LOCK_STRIP(poll_val) & 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000408 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d poll_val = 0x%x cond = 0x%x\n",
409 lck, gtid, poll_val, cond ) );
410
411 //
412 // NOTE: if you try to use the following condition for this branch
413 //
414 // if ( poll_val & 1 == 0 )
415 //
416 // Then the 12.0 compiler has a bug where the following block will
417 // always be skipped, regardless of the value of the LSB of poll_val.
418 //
419 if ( ! cond ) {
420 //
421 // Try to set the lsb in the poll to indicate to the owner
422 // thread that they need to wake this thread up.
423 //
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000424 if ( ! KMP_COMPARE_AND_STORE_REL32( & ( lck->lk.poll ), poll_val, poll_val | DYNA_LOCK_BUSY(1, futex) ) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000425 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d can't set bit 0\n",
426 lck, lck->lk.poll, gtid ) );
427 continue;
428 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000429 poll_val |= DYNA_LOCK_BUSY(1, futex);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000430
431 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d bit 0 set\n",
432 lck, lck->lk.poll, gtid ) );
433 }
434
435 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d before futex_wait(0x%x)\n",
436 lck, gtid, poll_val ) );
437
438 kmp_int32 rc;
439 if ( ( rc = syscall( __NR_futex, & ( lck->lk.poll ), FUTEX_WAIT,
440 poll_val, NULL, NULL, 0 ) ) != 0 ) {
441 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d futex_wait(0x%x) failed (rc=%d errno=%d)\n",
442 lck, gtid, poll_val, rc, errno ) );
443 continue;
444 }
445
446 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d after futex_wait(0x%x)\n",
447 lck, gtid, poll_val ) );
448 //
Alp Toker8f2d3f02014-02-24 10:40:15 +0000449 // This thread has now done a successful futex wait call and was
Jim Cownie5e8470a2013-09-27 10:38:44 +0000450 // entered on the OS futex queue. We must now perform a futex
451 // wake call when releasing the lock, as we have no idea how many
452 // other threads are in the queue.
453 //
454 gtid_code |= 1;
455 }
456
457 KMP_FSYNC_ACQUIRED( lck );
458 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d exiting\n",
459 lck, lck->lk.poll, gtid ) );
460}
461
462void
463__kmp_acquire_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
464{
465 __kmp_acquire_futex_lock_timed_template( lck, gtid );
466}
467
468static void
469__kmp_acquire_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
470{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000471 char const * const func = "omp_set_lock";
472 if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE )
473 && __kmp_is_futex_lock_nestable( lck ) ) {
474 KMP_FATAL( LockNestableUsedAsSimple, func );
475 }
476 if ( ( gtid >= 0 ) && ( __kmp_get_futex_lock_owner( lck ) == gtid ) ) {
477 KMP_FATAL( LockIsAlreadyOwned, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000478 }
479 __kmp_acquire_futex_lock( lck, gtid );
480}
481
482int
483__kmp_test_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
484{
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000485 if ( KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), DYNA_LOCK_FREE(futex), DYNA_LOCK_BUSY(gtid+1, futex) << 1 ) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000486 KMP_FSYNC_ACQUIRED( lck );
487 return TRUE;
488 }
489 return FALSE;
490}
491
492static int
493__kmp_test_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
494{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000495 char const * const func = "omp_test_lock";
496 if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE )
497 && __kmp_is_futex_lock_nestable( lck ) ) {
498 KMP_FATAL( LockNestableUsedAsSimple, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000499 }
500 return __kmp_test_futex_lock( lck, gtid );
501}
502
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000503int
Jim Cownie5e8470a2013-09-27 10:38:44 +0000504__kmp_release_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
505{
506 KMP_MB(); /* Flush all pending memory write invalidates. */
507
508 KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d entering\n",
509 lck, lck->lk.poll, gtid ) );
510
511 KMP_FSYNC_RELEASING(lck);
512
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000513 kmp_int32 poll_val = KMP_XCHG_FIXED32( & ( lck->lk.poll ), DYNA_LOCK_FREE(futex) );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000514
515 KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p, T#%d released poll_val = 0x%x\n",
516 lck, gtid, poll_val ) );
517
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000518 if ( DYNA_LOCK_STRIP(poll_val) & 1 ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000519 KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p, T#%d futex_wake 1 thread\n",
520 lck, gtid ) );
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000521 syscall( __NR_futex, & ( lck->lk.poll ), FUTEX_WAKE, DYNA_LOCK_BUSY(1, futex), NULL, NULL, 0 );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000522 }
523
524 KMP_MB(); /* Flush all pending memory write invalidates. */
525
526 KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d exiting\n",
527 lck, lck->lk.poll, gtid ) );
528
529 KMP_YIELD( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc :
530 __kmp_xproc ) );
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000531 return KMP_LOCK_RELEASED;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000532}
533
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000534static int
Jim Cownie5e8470a2013-09-27 10:38:44 +0000535__kmp_release_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
536{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000537 char const * const func = "omp_unset_lock";
538 KMP_MB(); /* in case another processor initialized lock */
539 if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE )
540 && __kmp_is_futex_lock_nestable( lck ) ) {
541 KMP_FATAL( LockNestableUsedAsSimple, func );
542 }
543 if ( __kmp_get_futex_lock_owner( lck ) == -1 ) {
544 KMP_FATAL( LockUnsettingFree, func );
545 }
546 if ( ( gtid >= 0 ) && ( __kmp_get_futex_lock_owner( lck ) >= 0 )
547 && ( __kmp_get_futex_lock_owner( lck ) != gtid ) ) {
548 KMP_FATAL( LockUnsettingSetByAnother, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000549 }
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000550 return __kmp_release_futex_lock( lck, gtid );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000551}
552
553void
554__kmp_init_futex_lock( kmp_futex_lock_t * lck )
555{
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000556 TCW_4( lck->lk.poll, DYNA_LOCK_FREE(futex) );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000557}
558
559static void
560__kmp_init_futex_lock_with_checks( kmp_futex_lock_t * lck )
561{
562 __kmp_init_futex_lock( lck );
563}
564
565void
566__kmp_destroy_futex_lock( kmp_futex_lock_t *lck )
567{
568 lck->lk.poll = 0;
569}
570
571static void
572__kmp_destroy_futex_lock_with_checks( kmp_futex_lock_t *lck )
573{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000574 char const * const func = "omp_destroy_lock";
575 if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE )
576 && __kmp_is_futex_lock_nestable( lck ) ) {
577 KMP_FATAL( LockNestableUsedAsSimple, func );
578 }
579 if ( __kmp_get_futex_lock_owner( lck ) != -1 ) {
580 KMP_FATAL( LockStillOwned, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000581 }
582 __kmp_destroy_futex_lock( lck );
583}
584
585
586//
587// nested futex locks
588//
589
590void
591__kmp_acquire_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
592{
593 KMP_DEBUG_ASSERT( gtid >= 0 );
594
595 if ( __kmp_get_futex_lock_owner( lck ) == gtid ) {
596 lck->lk.depth_locked += 1;
597 }
598 else {
599 __kmp_acquire_futex_lock_timed_template( lck, gtid );
600 lck->lk.depth_locked = 1;
601 }
602}
603
604static void
605__kmp_acquire_nested_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
606{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000607 char const * const func = "omp_set_nest_lock";
608 if ( ! __kmp_is_futex_lock_nestable( lck ) ) {
609 KMP_FATAL( LockSimpleUsedAsNestable, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000610 }
611 __kmp_acquire_nested_futex_lock( lck, gtid );
612}
613
614int
615__kmp_test_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
616{
617 int retval;
618
619 KMP_DEBUG_ASSERT( gtid >= 0 );
620
621 if ( __kmp_get_futex_lock_owner( lck ) == gtid ) {
622 retval = ++lck->lk.depth_locked;
623 }
624 else if ( !__kmp_test_futex_lock( lck, gtid ) ) {
625 retval = 0;
626 }
627 else {
628 KMP_MB();
629 retval = lck->lk.depth_locked = 1;
630 }
631 return retval;
632}
633
634static int
635__kmp_test_nested_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
636{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000637 char const * const func = "omp_test_nest_lock";
638 if ( ! __kmp_is_futex_lock_nestable( lck ) ) {
639 KMP_FATAL( LockSimpleUsedAsNestable, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000640 }
641 return __kmp_test_nested_futex_lock( lck, gtid );
642}
643
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000644int
Jim Cownie5e8470a2013-09-27 10:38:44 +0000645__kmp_release_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
646{
647 KMP_DEBUG_ASSERT( gtid >= 0 );
648
649 KMP_MB();
650 if ( --(lck->lk.depth_locked) == 0 ) {
651 __kmp_release_futex_lock( lck, gtid );
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000652 return KMP_LOCK_RELEASED;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000653 }
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000654 return KMP_LOCK_STILL_HELD;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000655}
656
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000657static int
Jim Cownie5e8470a2013-09-27 10:38:44 +0000658__kmp_release_nested_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
659{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000660 char const * const func = "omp_unset_nest_lock";
661 KMP_MB(); /* in case another processor initialized lock */
662 if ( ! __kmp_is_futex_lock_nestable( lck ) ) {
663 KMP_FATAL( LockSimpleUsedAsNestable, func );
664 }
665 if ( __kmp_get_futex_lock_owner( lck ) == -1 ) {
666 KMP_FATAL( LockUnsettingFree, func );
667 }
668 if ( __kmp_get_futex_lock_owner( lck ) != gtid ) {
669 KMP_FATAL( LockUnsettingSetByAnother, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000670 }
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000671 return __kmp_release_nested_futex_lock( lck, gtid );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000672}
673
674void
675__kmp_init_nested_futex_lock( kmp_futex_lock_t * lck )
676{
677 __kmp_init_futex_lock( lck );
678 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
679}
680
681static void
682__kmp_init_nested_futex_lock_with_checks( kmp_futex_lock_t * lck )
683{
684 __kmp_init_nested_futex_lock( lck );
685}
686
687void
688__kmp_destroy_nested_futex_lock( kmp_futex_lock_t *lck )
689{
690 __kmp_destroy_futex_lock( lck );
691 lck->lk.depth_locked = 0;
692}
693
694static void
695__kmp_destroy_nested_futex_lock_with_checks( kmp_futex_lock_t *lck )
696{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000697 char const * const func = "omp_destroy_nest_lock";
698 if ( ! __kmp_is_futex_lock_nestable( lck ) ) {
699 KMP_FATAL( LockSimpleUsedAsNestable, func );
700 }
701 if ( __kmp_get_futex_lock_owner( lck ) != -1 ) {
702 KMP_FATAL( LockStillOwned, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000703 }
704 __kmp_destroy_nested_futex_lock( lck );
705}
706
Jim Cownie181b4bb2013-12-23 17:28:57 +0000707#endif // KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
Jim Cownie5e8470a2013-09-27 10:38:44 +0000708
709
710/* ------------------------------------------------------------------------ */
711/* ticket (bakery) locks */
712
713static kmp_int32
714__kmp_get_ticket_lock_owner( kmp_ticket_lock_t *lck )
715{
716 return TCR_4( lck->lk.owner_id ) - 1;
717}
718
719static inline bool
720__kmp_is_ticket_lock_nestable( kmp_ticket_lock_t *lck )
721{
722 return lck->lk.depth_locked != -1;
723}
724
725static kmp_uint32
726__kmp_bakery_check(kmp_uint value, kmp_uint checker)
727{
728 register kmp_uint32 pause;
729
730 if (value == checker) {
731 return TRUE;
732 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000733 for (pause = checker - value; pause != 0; --pause);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000734 return FALSE;
735}
736
737__forceinline static void
738__kmp_acquire_ticket_lock_timed_template( kmp_ticket_lock_t *lck, kmp_int32 gtid )
739{
740 kmp_uint32 my_ticket;
741 KMP_MB();
742
743 my_ticket = KMP_TEST_THEN_INC32( (kmp_int32 *) &lck->lk.next_ticket );
744
745#ifdef USE_LOCK_PROFILE
746 if ( TCR_4( lck->lk.now_serving ) != my_ticket )
747 __kmp_printf( "LOCK CONTENTION: %p\n", lck );
748 /* else __kmp_printf( "." );*/
749#endif /* USE_LOCK_PROFILE */
750
751 if ( TCR_4( lck->lk.now_serving ) == my_ticket ) {
752 KMP_FSYNC_ACQUIRED(lck);
753 return;
754 }
755 KMP_WAIT_YIELD( &lck->lk.now_serving, my_ticket, __kmp_bakery_check, lck );
756 KMP_FSYNC_ACQUIRED(lck);
757}
758
759void
760__kmp_acquire_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
761{
762 __kmp_acquire_ticket_lock_timed_template( lck, gtid );
763}
764
765static void
766__kmp_acquire_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
767{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000768 char const * const func = "omp_set_lock";
769 if ( lck->lk.initialized != lck ) {
770 KMP_FATAL( LockIsUninitialized, func );
771 }
772 if ( __kmp_is_ticket_lock_nestable( lck ) ) {
773 KMP_FATAL( LockNestableUsedAsSimple, func );
774 }
775 if ( ( gtid >= 0 ) && ( __kmp_get_ticket_lock_owner( lck ) == gtid ) ) {
776 KMP_FATAL( LockIsAlreadyOwned, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000777 }
778
779 __kmp_acquire_ticket_lock( lck, gtid );
780
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000781 lck->lk.owner_id = gtid + 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000782}
783
784int
785__kmp_test_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
786{
787 kmp_uint32 my_ticket = TCR_4( lck->lk.next_ticket );
788 if ( TCR_4( lck->lk.now_serving ) == my_ticket ) {
789 kmp_uint32 next_ticket = my_ticket + 1;
790 if ( KMP_COMPARE_AND_STORE_ACQ32( (kmp_int32 *) &lck->lk.next_ticket,
791 my_ticket, next_ticket ) ) {
792 KMP_FSYNC_ACQUIRED( lck );
793 return TRUE;
794 }
795 }
796 return FALSE;
797}
798
799static int
800__kmp_test_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
801{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000802 char const * const func = "omp_test_lock";
803 if ( lck->lk.initialized != lck ) {
804 KMP_FATAL( LockIsUninitialized, func );
805 }
806 if ( __kmp_is_ticket_lock_nestable( lck ) ) {
807 KMP_FATAL( LockNestableUsedAsSimple, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000808 }
809
810 int retval = __kmp_test_ticket_lock( lck, gtid );
811
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000812 if ( retval ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000813 lck->lk.owner_id = gtid + 1;
814 }
815 return retval;
816}
817
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000818int
Jim Cownie5e8470a2013-09-27 10:38:44 +0000819__kmp_release_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
820{
821 kmp_uint32 distance;
822
823 KMP_MB(); /* Flush all pending memory write invalidates. */
824
825 KMP_FSYNC_RELEASING(lck);
826 distance = ( TCR_4( lck->lk.next_ticket ) - TCR_4( lck->lk.now_serving ) );
827
828 KMP_ST_REL32( &(lck->lk.now_serving), lck->lk.now_serving + 1 );
829
830 KMP_MB(); /* Flush all pending memory write invalidates. */
831
832 KMP_YIELD( distance
833 > (kmp_uint32) (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc) );
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000834 return KMP_LOCK_RELEASED;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000835}
836
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000837static int
Jim Cownie5e8470a2013-09-27 10:38:44 +0000838__kmp_release_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
839{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000840 char const * const func = "omp_unset_lock";
841 KMP_MB(); /* in case another processor initialized lock */
842 if ( lck->lk.initialized != lck ) {
843 KMP_FATAL( LockIsUninitialized, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000844 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000845 if ( __kmp_is_ticket_lock_nestable( lck ) ) {
846 KMP_FATAL( LockNestableUsedAsSimple, func );
847 }
848 if ( __kmp_get_ticket_lock_owner( lck ) == -1 ) {
849 KMP_FATAL( LockUnsettingFree, func );
850 }
851 if ( ( gtid >= 0 ) && ( __kmp_get_ticket_lock_owner( lck ) >= 0 )
852 && ( __kmp_get_ticket_lock_owner( lck ) != gtid ) ) {
853 KMP_FATAL( LockUnsettingSetByAnother, func );
854 }
855 lck->lk.owner_id = 0;
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000856 return __kmp_release_ticket_lock( lck, gtid );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000857}
858
859void
860__kmp_init_ticket_lock( kmp_ticket_lock_t * lck )
861{
862 lck->lk.location = NULL;
863 TCW_4( lck->lk.next_ticket, 0 );
864 TCW_4( lck->lk.now_serving, 0 );
865 lck->lk.owner_id = 0; // no thread owns the lock.
866 lck->lk.depth_locked = -1; // -1 => not a nested lock.
867 lck->lk.initialized = (kmp_ticket_lock *)lck;
868}
869
870static void
871__kmp_init_ticket_lock_with_checks( kmp_ticket_lock_t * lck )
872{
873 __kmp_init_ticket_lock( lck );
874}
875
876void
877__kmp_destroy_ticket_lock( kmp_ticket_lock_t *lck )
878{
879 lck->lk.initialized = NULL;
880 lck->lk.location = NULL;
881 lck->lk.next_ticket = 0;
882 lck->lk.now_serving = 0;
883 lck->lk.owner_id = 0;
884 lck->lk.depth_locked = -1;
885}
886
887static void
888__kmp_destroy_ticket_lock_with_checks( kmp_ticket_lock_t *lck )
889{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000890 char const * const func = "omp_destroy_lock";
891 if ( lck->lk.initialized != lck ) {
892 KMP_FATAL( LockIsUninitialized, func );
893 }
894 if ( __kmp_is_ticket_lock_nestable( lck ) ) {
895 KMP_FATAL( LockNestableUsedAsSimple, func );
896 }
897 if ( __kmp_get_ticket_lock_owner( lck ) != -1 ) {
898 KMP_FATAL( LockStillOwned, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000899 }
900 __kmp_destroy_ticket_lock( lck );
901}
902
903
904//
905// nested ticket locks
906//
907
908void
909__kmp_acquire_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
910{
911 KMP_DEBUG_ASSERT( gtid >= 0 );
912
913 if ( __kmp_get_ticket_lock_owner( lck ) == gtid ) {
914 lck->lk.depth_locked += 1;
915 }
916 else {
917 __kmp_acquire_ticket_lock_timed_template( lck, gtid );
918 KMP_MB();
919 lck->lk.depth_locked = 1;
920 KMP_MB();
921 lck->lk.owner_id = gtid + 1;
922 }
923}
924
925static void
926__kmp_acquire_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
927{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000928 char const * const func = "omp_set_nest_lock";
929 if ( lck->lk.initialized != lck ) {
930 KMP_FATAL( LockIsUninitialized, func );
931 }
932 if ( ! __kmp_is_ticket_lock_nestable( lck ) ) {
933 KMP_FATAL( LockSimpleUsedAsNestable, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000934 }
935 __kmp_acquire_nested_ticket_lock( lck, gtid );
936}
937
938int
939__kmp_test_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
940{
941 int retval;
942
943 KMP_DEBUG_ASSERT( gtid >= 0 );
944
945 if ( __kmp_get_ticket_lock_owner( lck ) == gtid ) {
946 retval = ++lck->lk.depth_locked;
947 }
948 else if ( !__kmp_test_ticket_lock( lck, gtid ) ) {
949 retval = 0;
950 }
951 else {
952 KMP_MB();
953 retval = lck->lk.depth_locked = 1;
954 KMP_MB();
955 lck->lk.owner_id = gtid + 1;
956 }
957 return retval;
958}
959
960static int
961__kmp_test_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck,
962 kmp_int32 gtid )
963{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000964 char const * const func = "omp_test_nest_lock";
965 if ( lck->lk.initialized != lck ) {
966 KMP_FATAL( LockIsUninitialized, func );
967 }
968 if ( ! __kmp_is_ticket_lock_nestable( lck ) ) {
969 KMP_FATAL( LockSimpleUsedAsNestable, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000970 }
971 return __kmp_test_nested_ticket_lock( lck, gtid );
972}
973
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000974int
Jim Cownie5e8470a2013-09-27 10:38:44 +0000975__kmp_release_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
976{
977 KMP_DEBUG_ASSERT( gtid >= 0 );
978
979 KMP_MB();
980 if ( --(lck->lk.depth_locked) == 0 ) {
981 KMP_MB();
982 lck->lk.owner_id = 0;
983 __kmp_release_ticket_lock( lck, gtid );
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000984 return KMP_LOCK_RELEASED;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000985 }
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000986 return KMP_LOCK_STILL_HELD;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000987}
988
Andrey Churbanov8d09fac2015-04-29 15:52:19 +0000989static int
Jim Cownie5e8470a2013-09-27 10:38:44 +0000990__kmp_release_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
991{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000992 char const * const func = "omp_unset_nest_lock";
993 KMP_MB(); /* in case another processor initialized lock */
994 if ( lck->lk.initialized != lck ) {
995 KMP_FATAL( LockIsUninitialized, func );
996 }
997 if ( ! __kmp_is_ticket_lock_nestable( lck ) ) {
998 KMP_FATAL( LockSimpleUsedAsNestable, func );
999 }
1000 if ( __kmp_get_ticket_lock_owner( lck ) == -1 ) {
1001 KMP_FATAL( LockUnsettingFree, func );
1002 }
1003 if ( __kmp_get_ticket_lock_owner( lck ) != gtid ) {
1004 KMP_FATAL( LockUnsettingSetByAnother, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001005 }
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00001006 return __kmp_release_nested_ticket_lock( lck, gtid );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001007}
1008
1009void
1010__kmp_init_nested_ticket_lock( kmp_ticket_lock_t * lck )
1011{
1012 __kmp_init_ticket_lock( lck );
1013 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
1014}
1015
1016static void
1017__kmp_init_nested_ticket_lock_with_checks( kmp_ticket_lock_t * lck )
1018{
1019 __kmp_init_nested_ticket_lock( lck );
1020}
1021
1022void
1023__kmp_destroy_nested_ticket_lock( kmp_ticket_lock_t *lck )
1024{
1025 __kmp_destroy_ticket_lock( lck );
1026 lck->lk.depth_locked = 0;
1027}
1028
1029static void
1030__kmp_destroy_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck )
1031{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001032 char const * const func = "omp_destroy_nest_lock";
1033 if ( lck->lk.initialized != lck ) {
1034 KMP_FATAL( LockIsUninitialized, func );
1035 }
1036 if ( ! __kmp_is_ticket_lock_nestable( lck ) ) {
1037 KMP_FATAL( LockSimpleUsedAsNestable, func );
1038 }
1039 if ( __kmp_get_ticket_lock_owner( lck ) != -1 ) {
1040 KMP_FATAL( LockStillOwned, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001041 }
1042 __kmp_destroy_nested_ticket_lock( lck );
1043}
1044
1045
1046//
1047// access functions to fields which don't exist for all lock kinds.
1048//
1049
1050static int
1051__kmp_is_ticket_lock_initialized( kmp_ticket_lock_t *lck )
1052{
1053 return lck == lck->lk.initialized;
1054}
1055
1056static const ident_t *
1057__kmp_get_ticket_lock_location( kmp_ticket_lock_t *lck )
1058{
1059 return lck->lk.location;
1060}
1061
1062static void
1063__kmp_set_ticket_lock_location( kmp_ticket_lock_t *lck, const ident_t *loc )
1064{
1065 lck->lk.location = loc;
1066}
1067
1068static kmp_lock_flags_t
1069__kmp_get_ticket_lock_flags( kmp_ticket_lock_t *lck )
1070{
1071 return lck->lk.flags;
1072}
1073
1074static void
1075__kmp_set_ticket_lock_flags( kmp_ticket_lock_t *lck, kmp_lock_flags_t flags )
1076{
1077 lck->lk.flags = flags;
1078}
1079
1080/* ------------------------------------------------------------------------ */
1081/* queuing locks */
1082
1083/*
1084 * First the states
1085 * (head,tail) = 0, 0 means lock is unheld, nobody on queue
1086 * UINT_MAX or -1, 0 means lock is held, nobody on queue
1087 * h, h means lock is held or about to transition, 1 element on queue
1088 * h, t h <> t, means lock is held or about to transition, >1 elements on queue
1089 *
1090 * Now the transitions
1091 * Acquire(0,0) = -1 ,0
1092 * Release(0,0) = Error
1093 * Acquire(-1,0) = h ,h h > 0
1094 * Release(-1,0) = 0 ,0
1095 * Acquire(h,h) = h ,t h > 0, t > 0, h <> t
1096 * Release(h,h) = -1 ,0 h > 0
1097 * Acquire(h,t) = h ,t' h > 0, t > 0, t' > 0, h <> t, h <> t', t <> t'
1098 * Release(h,t) = h',t h > 0, t > 0, h <> t, h <> h', h' maybe = t
1099 *
1100 * And pictorially
1101 *
1102 *
1103 * +-----+
1104 * | 0, 0|------- release -------> Error
1105 * +-----+
1106 * | ^
1107 * acquire| |release
1108 * | |
1109 * | |
1110 * v |
1111 * +-----+
1112 * |-1, 0|
1113 * +-----+
1114 * | ^
1115 * acquire| |release
1116 * | |
1117 * | |
1118 * v |
1119 * +-----+
1120 * | h, h|
1121 * +-----+
1122 * | ^
1123 * acquire| |release
1124 * | |
1125 * | |
1126 * v |
1127 * +-----+
1128 * | h, t|----- acquire, release loopback ---+
1129 * +-----+ |
1130 * ^ |
1131 * | |
1132 * +------------------------------------+
1133 *
1134 */
1135
1136#ifdef DEBUG_QUEUING_LOCKS
1137
1138/* Stuff for circular trace buffer */
1139#define TRACE_BUF_ELE 1024
1140static char traces[TRACE_BUF_ELE][128] = { 0 }
1141static int tc = 0;
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00001142#define TRACE_LOCK(X,Y) KMP_SNPRINTF( traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s\n", X, Y );
1143#define TRACE_LOCK_T(X,Y,Z) KMP_SNPRINTF( traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s%d\n", X,Y,Z );
1144#define TRACE_LOCK_HT(X,Y,Z,Q) KMP_SNPRINTF( traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s %d,%d\n", X, Y, Z, Q );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001145
1146static void
1147__kmp_dump_queuing_lock( kmp_info_t *this_thr, kmp_int32 gtid,
1148 kmp_queuing_lock_t *lck, kmp_int32 head_id, kmp_int32 tail_id )
1149{
1150 kmp_int32 t, i;
1151
1152 __kmp_printf_no_lock( "\n__kmp_dump_queuing_lock: TRACE BEGINS HERE! \n" );
1153
1154 i = tc % TRACE_BUF_ELE;
1155 __kmp_printf_no_lock( "%s\n", traces[i] );
1156 i = (i+1) % TRACE_BUF_ELE;
1157 while ( i != (tc % TRACE_BUF_ELE) ) {
1158 __kmp_printf_no_lock( "%s", traces[i] );
1159 i = (i+1) % TRACE_BUF_ELE;
1160 }
1161 __kmp_printf_no_lock( "\n" );
1162
1163 __kmp_printf_no_lock(
1164 "\n__kmp_dump_queuing_lock: gtid+1:%d, spin_here:%d, next_wait:%d, head_id:%d, tail_id:%d\n",
1165 gtid+1, this_thr->th.th_spin_here, this_thr->th.th_next_waiting,
1166 head_id, tail_id );
1167
1168 __kmp_printf_no_lock( "\t\thead: %d ", lck->lk.head_id );
1169
1170 if ( lck->lk.head_id >= 1 ) {
1171 t = __kmp_threads[lck->lk.head_id-1]->th.th_next_waiting;
1172 while (t > 0) {
1173 __kmp_printf_no_lock( "-> %d ", t );
1174 t = __kmp_threads[t-1]->th.th_next_waiting;
1175 }
1176 }
1177 __kmp_printf_no_lock( "; tail: %d ", lck->lk.tail_id );
1178 __kmp_printf_no_lock( "\n\n" );
1179}
1180
1181#endif /* DEBUG_QUEUING_LOCKS */
1182
1183static kmp_int32
1184__kmp_get_queuing_lock_owner( kmp_queuing_lock_t *lck )
1185{
1186 return TCR_4( lck->lk.owner_id ) - 1;
1187}
1188
1189static inline bool
1190__kmp_is_queuing_lock_nestable( kmp_queuing_lock_t *lck )
1191{
1192 return lck->lk.depth_locked != -1;
1193}
1194
1195/* Acquire a lock using a the queuing lock implementation */
1196template <bool takeTime>
1197/* [TLW] The unused template above is left behind because of what BEB believes is a
1198 potential compiler problem with __forceinline. */
1199__forceinline static void
1200__kmp_acquire_queuing_lock_timed_template( kmp_queuing_lock_t *lck,
1201 kmp_int32 gtid )
1202{
1203 register kmp_info_t *this_thr = __kmp_thread_from_gtid( gtid );
1204 volatile kmp_int32 *head_id_p = & lck->lk.head_id;
1205 volatile kmp_int32 *tail_id_p = & lck->lk.tail_id;
1206 volatile kmp_uint32 *spin_here_p;
1207 kmp_int32 need_mf = 1;
1208
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001209#if OMPT_SUPPORT
1210 ompt_state_t prev_state = ompt_state_undefined;
1211#endif
1212
Jim Cownie5e8470a2013-09-27 10:38:44 +00001213 KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d entering\n", lck, gtid ));
1214
1215 KMP_FSYNC_PREPARE( lck );
1216 KMP_DEBUG_ASSERT( this_thr != NULL );
1217 spin_here_p = & this_thr->th.th_spin_here;
1218
1219#ifdef DEBUG_QUEUING_LOCKS
1220 TRACE_LOCK( gtid+1, "acq ent" );
1221 if ( *spin_here_p )
1222 __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1223 if ( this_thr->th.th_next_waiting != 0 )
1224 __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1225#endif
1226 KMP_DEBUG_ASSERT( !*spin_here_p );
1227 KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 );
1228
1229
1230 /* The following st.rel to spin_here_p needs to precede the cmpxchg.acq to head_id_p
1231 that may follow, not just in execution order, but also in visibility order. This way,
1232 when a releasing thread observes the changes to the queue by this thread, it can
1233 rightly assume that spin_here_p has already been set to TRUE, so that when it sets
1234 spin_here_p to FALSE, it is not premature. If the releasing thread sets spin_here_p
1235 to FALSE before this thread sets it to TRUE, this thread will hang.
1236 */
1237 *spin_here_p = TRUE; /* before enqueuing to prevent race */
1238
1239 while( 1 ) {
1240 kmp_int32 enqueued;
1241 kmp_int32 head;
1242 kmp_int32 tail;
1243
1244 head = *head_id_p;
1245
1246 switch ( head ) {
1247
1248 case -1:
1249 {
1250#ifdef DEBUG_QUEUING_LOCKS
1251 tail = *tail_id_p;
1252 TRACE_LOCK_HT( gtid+1, "acq read: ", head, tail );
1253#endif
1254 tail = 0; /* to make sure next link asynchronously read is not set accidentally;
1255 this assignment prevents us from entering the if ( t > 0 )
1256 condition in the enqueued case below, which is not necessary for
1257 this state transition */
1258
1259 need_mf = 0;
1260 /* try (-1,0)->(tid,tid) */
1261 enqueued = KMP_COMPARE_AND_STORE_ACQ64( (volatile kmp_int64 *) tail_id_p,
1262 KMP_PACK_64( -1, 0 ),
1263 KMP_PACK_64( gtid+1, gtid+1 ) );
1264#ifdef DEBUG_QUEUING_LOCKS
1265 if ( enqueued ) TRACE_LOCK( gtid+1, "acq enq: (-1,0)->(tid,tid)" );
1266#endif
1267 }
1268 break;
1269
1270 default:
1271 {
1272 tail = *tail_id_p;
1273 KMP_DEBUG_ASSERT( tail != gtid + 1 );
1274
1275#ifdef DEBUG_QUEUING_LOCKS
1276 TRACE_LOCK_HT( gtid+1, "acq read: ", head, tail );
1277#endif
1278
1279 if ( tail == 0 ) {
1280 enqueued = FALSE;
1281 }
1282 else {
1283 need_mf = 0;
1284 /* try (h,t) or (h,h)->(h,tid) */
1285 enqueued = KMP_COMPARE_AND_STORE_ACQ32( tail_id_p, tail, gtid+1 );
1286
1287#ifdef DEBUG_QUEUING_LOCKS
1288 if ( enqueued ) TRACE_LOCK( gtid+1, "acq enq: (h,t)->(h,tid)" );
1289#endif
1290 }
1291 }
1292 break;
1293
1294 case 0: /* empty queue */
1295 {
1296 kmp_int32 grabbed_lock;
1297
1298#ifdef DEBUG_QUEUING_LOCKS
1299 tail = *tail_id_p;
1300 TRACE_LOCK_HT( gtid+1, "acq read: ", head, tail );
1301#endif
1302 /* try (0,0)->(-1,0) */
1303
1304 /* only legal transition out of head = 0 is head = -1 with no change to tail */
1305 grabbed_lock = KMP_COMPARE_AND_STORE_ACQ32( head_id_p, 0, -1 );
1306
1307 if ( grabbed_lock ) {
1308
1309 *spin_here_p = FALSE;
1310
1311 KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: no queuing\n",
1312 lck, gtid ));
1313#ifdef DEBUG_QUEUING_LOCKS
1314 TRACE_LOCK_HT( gtid+1, "acq exit: ", head, 0 );
1315#endif
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001316
1317#if OMPT_SUPPORT
1318 if ((ompt_status & ompt_status_track) &&
1319 prev_state != ompt_state_undefined) {
1320 /* change the state before clearing wait_id */
1321 this_thr->th.ompt_thread_info.state = prev_state;
1322 this_thr->th.ompt_thread_info.wait_id = 0;
1323 }
1324#endif
1325
Jim Cownie5e8470a2013-09-27 10:38:44 +00001326 KMP_FSYNC_ACQUIRED( lck );
1327 return; /* lock holder cannot be on queue */
1328 }
1329 enqueued = FALSE;
1330 }
1331 break;
1332 }
1333
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001334#if OMPT_SUPPORT
1335 if ((ompt_status & ompt_status_track) &&
1336 prev_state == ompt_state_undefined) {
1337 /* this thread will spin; set wait_id before entering wait state */
1338 prev_state = this_thr->th.ompt_thread_info.state;
1339 this_thr->th.ompt_thread_info.wait_id = (uint64_t) lck;
1340 this_thr->th.ompt_thread_info.state = ompt_state_wait_lock;
1341 }
1342#endif
1343
Jim Cownie5e8470a2013-09-27 10:38:44 +00001344 if ( enqueued ) {
1345 if ( tail > 0 ) {
1346 kmp_info_t *tail_thr = __kmp_thread_from_gtid( tail - 1 );
1347 KMP_ASSERT( tail_thr != NULL );
1348 tail_thr->th.th_next_waiting = gtid+1;
1349 /* corresponding wait for this write in release code */
1350 }
1351 KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d waiting for lock\n", lck, gtid ));
1352
1353
1354 /* ToDo: May want to consider using __kmp_wait_sleep or something that sleeps for
1355 * throughput only here.
1356 */
1357 KMP_MB();
1358 KMP_WAIT_YIELD(spin_here_p, FALSE, KMP_EQ, lck);
1359
1360#ifdef DEBUG_QUEUING_LOCKS
1361 TRACE_LOCK( gtid+1, "acq spin" );
1362
1363 if ( this_thr->th.th_next_waiting != 0 )
1364 __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1365#endif
1366 KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 );
1367 KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: after waiting on queue\n",
1368 lck, gtid ));
1369
1370#ifdef DEBUG_QUEUING_LOCKS
1371 TRACE_LOCK( gtid+1, "acq exit 2" );
1372#endif
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001373
1374#if OMPT_SUPPORT
1375 /* change the state before clearing wait_id */
1376 this_thr->th.ompt_thread_info.state = prev_state;
1377 this_thr->th.ompt_thread_info.wait_id = 0;
1378#endif
1379
Jim Cownie5e8470a2013-09-27 10:38:44 +00001380 /* got lock, we were dequeued by the thread that released lock */
1381 return;
1382 }
1383
1384 /* Yield if number of threads > number of logical processors */
1385 /* ToDo: Not sure why this should only be in oversubscription case,
1386 maybe should be traditional YIELD_INIT/YIELD_WHEN loop */
1387 KMP_YIELD( TCR_4( __kmp_nth ) > (__kmp_avail_proc ? __kmp_avail_proc :
1388 __kmp_xproc ) );
1389#ifdef DEBUG_QUEUING_LOCKS
1390 TRACE_LOCK( gtid+1, "acq retry" );
1391#endif
1392
1393 }
1394 KMP_ASSERT2( 0, "should not get here" );
1395}
1396
1397void
1398__kmp_acquire_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1399{
1400 KMP_DEBUG_ASSERT( gtid >= 0 );
1401
1402 __kmp_acquire_queuing_lock_timed_template<false>( lck, gtid );
1403}
1404
1405static void
1406__kmp_acquire_queuing_lock_with_checks( kmp_queuing_lock_t *lck,
1407 kmp_int32 gtid )
1408{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001409 char const * const func = "omp_set_lock";
1410 if ( lck->lk.initialized != lck ) {
1411 KMP_FATAL( LockIsUninitialized, func );
1412 }
1413 if ( __kmp_is_queuing_lock_nestable( lck ) ) {
1414 KMP_FATAL( LockNestableUsedAsSimple, func );
1415 }
1416 if ( __kmp_get_queuing_lock_owner( lck ) == gtid ) {
1417 KMP_FATAL( LockIsAlreadyOwned, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001418 }
1419
1420 __kmp_acquire_queuing_lock( lck, gtid );
1421
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001422 lck->lk.owner_id = gtid + 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001423}
1424
1425int
1426__kmp_test_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1427{
1428 volatile kmp_int32 *head_id_p = & lck->lk.head_id;
1429 kmp_int32 head;
1430#ifdef KMP_DEBUG
1431 kmp_info_t *this_thr;
1432#endif
1433
1434 KA_TRACE( 1000, ("__kmp_test_queuing_lock: T#%d entering\n", gtid ));
1435 KMP_DEBUG_ASSERT( gtid >= 0 );
1436#ifdef KMP_DEBUG
1437 this_thr = __kmp_thread_from_gtid( gtid );
1438 KMP_DEBUG_ASSERT( this_thr != NULL );
1439 KMP_DEBUG_ASSERT( !this_thr->th.th_spin_here );
1440#endif
1441
1442 head = *head_id_p;
1443
1444 if ( head == 0 ) { /* nobody on queue, nobody holding */
1445
1446 /* try (0,0)->(-1,0) */
1447
1448 if ( KMP_COMPARE_AND_STORE_ACQ32( head_id_p, 0, -1 ) ) {
1449 KA_TRACE( 1000, ("__kmp_test_queuing_lock: T#%d exiting: holding lock\n", gtid ));
1450 KMP_FSYNC_ACQUIRED(lck);
1451 return TRUE;
1452 }
1453 }
1454
1455 KA_TRACE( 1000, ("__kmp_test_queuing_lock: T#%d exiting: without lock\n", gtid ));
1456 return FALSE;
1457}
1458
1459static int
1460__kmp_test_queuing_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1461{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001462 char const * const func = "omp_test_lock";
1463 if ( lck->lk.initialized != lck ) {
1464 KMP_FATAL( LockIsUninitialized, func );
1465 }
1466 if ( __kmp_is_queuing_lock_nestable( lck ) ) {
1467 KMP_FATAL( LockNestableUsedAsSimple, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001468 }
1469
1470 int retval = __kmp_test_queuing_lock( lck, gtid );
1471
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001472 if ( retval ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001473 lck->lk.owner_id = gtid + 1;
1474 }
1475 return retval;
1476}
1477
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00001478int
Jim Cownie5e8470a2013-09-27 10:38:44 +00001479__kmp_release_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1480{
1481 register kmp_info_t *this_thr;
1482 volatile kmp_int32 *head_id_p = & lck->lk.head_id;
1483 volatile kmp_int32 *tail_id_p = & lck->lk.tail_id;
1484
1485 KA_TRACE( 1000, ("__kmp_release_queuing_lock: lck:%p, T#%d entering\n", lck, gtid ));
1486 KMP_DEBUG_ASSERT( gtid >= 0 );
1487 this_thr = __kmp_thread_from_gtid( gtid );
1488 KMP_DEBUG_ASSERT( this_thr != NULL );
1489#ifdef DEBUG_QUEUING_LOCKS
1490 TRACE_LOCK( gtid+1, "rel ent" );
1491
1492 if ( this_thr->th.th_spin_here )
1493 __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1494 if ( this_thr->th.th_next_waiting != 0 )
1495 __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1496#endif
1497 KMP_DEBUG_ASSERT( !this_thr->th.th_spin_here );
1498 KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 );
1499
1500 KMP_FSYNC_RELEASING(lck);
1501
1502 while( 1 ) {
1503 kmp_int32 dequeued;
1504 kmp_int32 head;
1505 kmp_int32 tail;
1506
1507 head = *head_id_p;
1508
1509#ifdef DEBUG_QUEUING_LOCKS
1510 tail = *tail_id_p;
1511 TRACE_LOCK_HT( gtid+1, "rel read: ", head, tail );
1512 if ( head == 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail );
1513#endif
1514 KMP_DEBUG_ASSERT( head != 0 ); /* holding the lock, head must be -1 or queue head */
1515
1516 if ( head == -1 ) { /* nobody on queue */
1517
1518 /* try (-1,0)->(0,0) */
1519 if ( KMP_COMPARE_AND_STORE_REL32( head_id_p, -1, 0 ) ) {
1520 KA_TRACE( 1000, ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: queue empty\n",
1521 lck, gtid ));
1522#ifdef DEBUG_QUEUING_LOCKS
1523 TRACE_LOCK_HT( gtid+1, "rel exit: ", 0, 0 );
1524#endif
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001525
1526#if OMPT_SUPPORT
1527 /* nothing to do - no other thread is trying to shift blame */
1528#endif
1529
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00001530 return KMP_LOCK_RELEASED;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001531 }
1532 dequeued = FALSE;
1533
1534 }
1535 else {
1536
1537 tail = *tail_id_p;
1538 if ( head == tail ) { /* only one thread on the queue */
1539
1540#ifdef DEBUG_QUEUING_LOCKS
1541 if ( head <= 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail );
1542#endif
1543 KMP_DEBUG_ASSERT( head > 0 );
1544
1545 /* try (h,h)->(-1,0) */
1546 dequeued = KMP_COMPARE_AND_STORE_REL64( (kmp_int64 *) tail_id_p,
1547 KMP_PACK_64( head, head ), KMP_PACK_64( -1, 0 ) );
1548#ifdef DEBUG_QUEUING_LOCKS
1549 TRACE_LOCK( gtid+1, "rel deq: (h,h)->(-1,0)" );
1550#endif
1551
1552 }
1553 else {
1554 volatile kmp_int32 *waiting_id_p;
1555 kmp_info_t *head_thr = __kmp_thread_from_gtid( head - 1 );
1556 KMP_DEBUG_ASSERT( head_thr != NULL );
1557 waiting_id_p = & head_thr->th.th_next_waiting;
1558
1559 /* Does this require synchronous reads? */
1560#ifdef DEBUG_QUEUING_LOCKS
1561 if ( head <= 0 || tail <= 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail );
1562#endif
1563 KMP_DEBUG_ASSERT( head > 0 && tail > 0 );
1564
1565 /* try (h,t)->(h',t) or (t,t) */
1566
1567 KMP_MB();
1568 /* make sure enqueuing thread has time to update next waiting thread field */
1569 *head_id_p = (kmp_int32) KMP_WAIT_YIELD((volatile kmp_uint*) waiting_id_p, 0, KMP_NEQ, NULL);
1570#ifdef DEBUG_QUEUING_LOCKS
1571 TRACE_LOCK( gtid+1, "rel deq: (h,t)->(h',t)" );
1572#endif
1573 dequeued = TRUE;
1574 }
1575 }
1576
1577 if ( dequeued ) {
1578 kmp_info_t *head_thr = __kmp_thread_from_gtid( head - 1 );
1579 KMP_DEBUG_ASSERT( head_thr != NULL );
1580
1581 /* Does this require synchronous reads? */
1582#ifdef DEBUG_QUEUING_LOCKS
1583 if ( head <= 0 || tail <= 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail );
1584#endif
1585 KMP_DEBUG_ASSERT( head > 0 && tail > 0 );
1586
1587 /* For clean code only.
1588 * Thread not released until next statement prevents race with acquire code.
1589 */
1590 head_thr->th.th_next_waiting = 0;
1591#ifdef DEBUG_QUEUING_LOCKS
1592 TRACE_LOCK_T( gtid+1, "rel nw=0 for t=", head );
1593#endif
1594
1595 KMP_MB();
1596 /* reset spin value */
1597 head_thr->th.th_spin_here = FALSE;
1598
1599 KA_TRACE( 1000, ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: after dequeuing\n",
1600 lck, gtid ));
1601#ifdef DEBUG_QUEUING_LOCKS
1602 TRACE_LOCK( gtid+1, "rel exit 2" );
1603#endif
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00001604 return KMP_LOCK_RELEASED;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001605 }
1606 /* KMP_CPU_PAUSE( ); don't want to make releasing thread hold up acquiring threads */
1607
1608#ifdef DEBUG_QUEUING_LOCKS
1609 TRACE_LOCK( gtid+1, "rel retry" );
1610#endif
1611
1612 } /* while */
1613 KMP_ASSERT2( 0, "should not get here" );
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00001614 return KMP_LOCK_RELEASED;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001615}
1616
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00001617static int
Jim Cownie5e8470a2013-09-27 10:38:44 +00001618__kmp_release_queuing_lock_with_checks( kmp_queuing_lock_t *lck,
1619 kmp_int32 gtid )
1620{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001621 char const * const func = "omp_unset_lock";
1622 KMP_MB(); /* in case another processor initialized lock */
1623 if ( lck->lk.initialized != lck ) {
1624 KMP_FATAL( LockIsUninitialized, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001625 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001626 if ( __kmp_is_queuing_lock_nestable( lck ) ) {
1627 KMP_FATAL( LockNestableUsedAsSimple, func );
1628 }
1629 if ( __kmp_get_queuing_lock_owner( lck ) == -1 ) {
1630 KMP_FATAL( LockUnsettingFree, func );
1631 }
1632 if ( __kmp_get_queuing_lock_owner( lck ) != gtid ) {
1633 KMP_FATAL( LockUnsettingSetByAnother, func );
1634 }
1635 lck->lk.owner_id = 0;
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00001636 return __kmp_release_queuing_lock( lck, gtid );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001637}
1638
1639void
1640__kmp_init_queuing_lock( kmp_queuing_lock_t *lck )
1641{
1642 lck->lk.location = NULL;
1643 lck->lk.head_id = 0;
1644 lck->lk.tail_id = 0;
1645 lck->lk.next_ticket = 0;
1646 lck->lk.now_serving = 0;
1647 lck->lk.owner_id = 0; // no thread owns the lock.
1648 lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks.
1649 lck->lk.initialized = lck;
1650
1651 KA_TRACE(1000, ("__kmp_init_queuing_lock: lock %p initialized\n", lck));
1652}
1653
1654static void
1655__kmp_init_queuing_lock_with_checks( kmp_queuing_lock_t * lck )
1656{
1657 __kmp_init_queuing_lock( lck );
1658}
1659
1660void
1661__kmp_destroy_queuing_lock( kmp_queuing_lock_t *lck )
1662{
1663 lck->lk.initialized = NULL;
1664 lck->lk.location = NULL;
1665 lck->lk.head_id = 0;
1666 lck->lk.tail_id = 0;
1667 lck->lk.next_ticket = 0;
1668 lck->lk.now_serving = 0;
1669 lck->lk.owner_id = 0;
1670 lck->lk.depth_locked = -1;
1671}
1672
1673static void
1674__kmp_destroy_queuing_lock_with_checks( kmp_queuing_lock_t *lck )
1675{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001676 char const * const func = "omp_destroy_lock";
1677 if ( lck->lk.initialized != lck ) {
1678 KMP_FATAL( LockIsUninitialized, func );
1679 }
1680 if ( __kmp_is_queuing_lock_nestable( lck ) ) {
1681 KMP_FATAL( LockNestableUsedAsSimple, func );
1682 }
1683 if ( __kmp_get_queuing_lock_owner( lck ) != -1 ) {
1684 KMP_FATAL( LockStillOwned, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001685 }
1686 __kmp_destroy_queuing_lock( lck );
1687}
1688
1689
1690//
1691// nested queuing locks
1692//
1693
1694void
1695__kmp_acquire_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1696{
1697 KMP_DEBUG_ASSERT( gtid >= 0 );
1698
1699 if ( __kmp_get_queuing_lock_owner( lck ) == gtid ) {
1700 lck->lk.depth_locked += 1;
1701 }
1702 else {
1703 __kmp_acquire_queuing_lock_timed_template<false>( lck, gtid );
1704 KMP_MB();
1705 lck->lk.depth_locked = 1;
1706 KMP_MB();
1707 lck->lk.owner_id = gtid + 1;
1708 }
1709}
1710
1711static void
1712__kmp_acquire_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1713{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001714 char const * const func = "omp_set_nest_lock";
1715 if ( lck->lk.initialized != lck ) {
1716 KMP_FATAL( LockIsUninitialized, func );
1717 }
1718 if ( ! __kmp_is_queuing_lock_nestable( lck ) ) {
1719 KMP_FATAL( LockSimpleUsedAsNestable, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001720 }
1721 __kmp_acquire_nested_queuing_lock( lck, gtid );
1722}
1723
1724int
1725__kmp_test_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1726{
1727 int retval;
1728
1729 KMP_DEBUG_ASSERT( gtid >= 0 );
1730
1731 if ( __kmp_get_queuing_lock_owner( lck ) == gtid ) {
1732 retval = ++lck->lk.depth_locked;
1733 }
1734 else if ( !__kmp_test_queuing_lock( lck, gtid ) ) {
1735 retval = 0;
1736 }
1737 else {
1738 KMP_MB();
1739 retval = lck->lk.depth_locked = 1;
1740 KMP_MB();
1741 lck->lk.owner_id = gtid + 1;
1742 }
1743 return retval;
1744}
1745
1746static int
1747__kmp_test_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck,
1748 kmp_int32 gtid )
1749{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001750 char const * const func = "omp_test_nest_lock";
1751 if ( lck->lk.initialized != lck ) {
1752 KMP_FATAL( LockIsUninitialized, func );
1753 }
1754 if ( ! __kmp_is_queuing_lock_nestable( lck ) ) {
1755 KMP_FATAL( LockSimpleUsedAsNestable, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001756 }
1757 return __kmp_test_nested_queuing_lock( lck, gtid );
1758}
1759
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00001760int
Jim Cownie5e8470a2013-09-27 10:38:44 +00001761__kmp_release_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1762{
1763 KMP_DEBUG_ASSERT( gtid >= 0 );
1764
1765 KMP_MB();
1766 if ( --(lck->lk.depth_locked) == 0 ) {
1767 KMP_MB();
1768 lck->lk.owner_id = 0;
1769 __kmp_release_queuing_lock( lck, gtid );
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00001770 return KMP_LOCK_RELEASED;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001771 }
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00001772 return KMP_LOCK_STILL_HELD;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001773}
1774
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00001775static int
Jim Cownie5e8470a2013-09-27 10:38:44 +00001776__kmp_release_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1777{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001778 char const * const func = "omp_unset_nest_lock";
1779 KMP_MB(); /* in case another processor initialized lock */
1780 if ( lck->lk.initialized != lck ) {
1781 KMP_FATAL( LockIsUninitialized, func );
1782 }
1783 if ( ! __kmp_is_queuing_lock_nestable( lck ) ) {
1784 KMP_FATAL( LockSimpleUsedAsNestable, func );
1785 }
1786 if ( __kmp_get_queuing_lock_owner( lck ) == -1 ) {
1787 KMP_FATAL( LockUnsettingFree, func );
1788 }
1789 if ( __kmp_get_queuing_lock_owner( lck ) != gtid ) {
1790 KMP_FATAL( LockUnsettingSetByAnother, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001791 }
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00001792 return __kmp_release_nested_queuing_lock( lck, gtid );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001793}
1794
1795void
1796__kmp_init_nested_queuing_lock( kmp_queuing_lock_t * lck )
1797{
1798 __kmp_init_queuing_lock( lck );
1799 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
1800}
1801
1802static void
1803__kmp_init_nested_queuing_lock_with_checks( kmp_queuing_lock_t * lck )
1804{
1805 __kmp_init_nested_queuing_lock( lck );
1806}
1807
1808void
1809__kmp_destroy_nested_queuing_lock( kmp_queuing_lock_t *lck )
1810{
1811 __kmp_destroy_queuing_lock( lck );
1812 lck->lk.depth_locked = 0;
1813}
1814
1815static void
1816__kmp_destroy_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck )
1817{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001818 char const * const func = "omp_destroy_nest_lock";
1819 if ( lck->lk.initialized != lck ) {
1820 KMP_FATAL( LockIsUninitialized, func );
1821 }
1822 if ( ! __kmp_is_queuing_lock_nestable( lck ) ) {
1823 KMP_FATAL( LockSimpleUsedAsNestable, func );
1824 }
1825 if ( __kmp_get_queuing_lock_owner( lck ) != -1 ) {
1826 KMP_FATAL( LockStillOwned, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001827 }
1828 __kmp_destroy_nested_queuing_lock( lck );
1829}
1830
1831
1832//
1833// access functions to fields which don't exist for all lock kinds.
1834//
1835
1836static int
1837__kmp_is_queuing_lock_initialized( kmp_queuing_lock_t *lck )
1838{
1839 return lck == lck->lk.initialized;
1840}
1841
1842static const ident_t *
1843__kmp_get_queuing_lock_location( kmp_queuing_lock_t *lck )
1844{
1845 return lck->lk.location;
1846}
1847
1848static void
1849__kmp_set_queuing_lock_location( kmp_queuing_lock_t *lck, const ident_t *loc )
1850{
1851 lck->lk.location = loc;
1852}
1853
1854static kmp_lock_flags_t
1855__kmp_get_queuing_lock_flags( kmp_queuing_lock_t *lck )
1856{
1857 return lck->lk.flags;
1858}
1859
1860static void
1861__kmp_set_queuing_lock_flags( kmp_queuing_lock_t *lck, kmp_lock_flags_t flags )
1862{
1863 lck->lk.flags = flags;
1864}
1865
1866#if KMP_USE_ADAPTIVE_LOCKS
1867
1868/*
1869 RTM Adaptive locks
1870*/
1871
1872// TODO: Use the header for intrinsics below with the compiler 13.0
1873//#include <immintrin.h>
1874
1875// Values from the status register after failed speculation.
1876#define _XBEGIN_STARTED (~0u)
1877#define _XABORT_EXPLICIT (1 << 0)
1878#define _XABORT_RETRY (1 << 1)
1879#define _XABORT_CONFLICT (1 << 2)
1880#define _XABORT_CAPACITY (1 << 3)
1881#define _XABORT_DEBUG (1 << 4)
1882#define _XABORT_NESTED (1 << 5)
1883#define _XABORT_CODE(x) ((unsigned char)(((x) >> 24) & 0xFF))
1884
1885// Aborts for which it's worth trying again immediately
1886#define SOFT_ABORT_MASK (_XABORT_RETRY | _XABORT_CONFLICT | _XABORT_EXPLICIT)
1887
1888#define STRINGIZE_INTERNAL(arg) #arg
1889#define STRINGIZE(arg) STRINGIZE_INTERNAL(arg)
1890
1891// Access to RTM instructions
1892
1893/*
1894 A version of XBegin which returns -1 on speculation, and the value of EAX on an abort.
1895 This is the same definition as the compiler intrinsic that will be supported at some point.
1896*/
1897static __inline int _xbegin()
1898{
1899 int res = -1;
1900
1901#if KMP_OS_WINDOWS
1902#if KMP_ARCH_X86_64
1903 _asm {
1904 _emit 0xC7
1905 _emit 0xF8
1906 _emit 2
1907 _emit 0
1908 _emit 0
1909 _emit 0
1910 jmp L2
1911 mov res, eax
1912 L2:
1913 }
1914#else /* IA32 */
1915 _asm {
1916 _emit 0xC7
1917 _emit 0xF8
1918 _emit 2
1919 _emit 0
1920 _emit 0
1921 _emit 0
1922 jmp L2
1923 mov res, eax
1924 L2:
1925 }
1926#endif // KMP_ARCH_X86_64
1927#else
1928 /* Note that %eax must be noted as killed (clobbered), because
1929 * the XSR is returned in %eax(%rax) on abort. Other register
1930 * values are restored, so don't need to be killed.
1931 *
1932 * We must also mark 'res' as an input and an output, since otherwise
1933 * 'res=-1' may be dropped as being dead, whereas we do need the
1934 * assignment on the successful (i.e., non-abort) path.
1935 */
1936 __asm__ volatile ("1: .byte 0xC7; .byte 0xF8;\n"
1937 " .long 1f-1b-6\n"
1938 " jmp 2f\n"
1939 "1: movl %%eax,%0\n"
1940 "2:"
1941 :"+r"(res)::"memory","%eax");
1942#endif // KMP_OS_WINDOWS
1943 return res;
1944}
1945
1946/*
1947 Transaction end
1948*/
1949static __inline void _xend()
1950{
1951#if KMP_OS_WINDOWS
1952 __asm {
1953 _emit 0x0f
1954 _emit 0x01
1955 _emit 0xd5
1956 }
1957#else
1958 __asm__ volatile (".byte 0x0f; .byte 0x01; .byte 0xd5" :::"memory");
1959#endif
1960}
1961
1962/*
1963 This is a macro, the argument must be a single byte constant which
1964 can be evaluated by the inline assembler, since it is emitted as a
1965 byte into the assembly code.
1966*/
1967#if KMP_OS_WINDOWS
1968#define _xabort(ARG) \
1969 _asm _emit 0xc6 \
1970 _asm _emit 0xf8 \
1971 _asm _emit ARG
1972#else
1973#define _xabort(ARG) \
1974 __asm__ volatile (".byte 0xC6; .byte 0xF8; .byte " STRINGIZE(ARG) :::"memory");
1975#endif
1976
1977//
1978// Statistics is collected for testing purpose
1979//
1980#if KMP_DEBUG_ADAPTIVE_LOCKS
1981
1982// We accumulate speculative lock statistics when the lock is destroyed.
1983// We keep locks that haven't been destroyed in the liveLocks list
1984// so that we can grab their statistics too.
1985static kmp_adaptive_lock_statistics_t destroyedStats;
1986
1987// To hold the list of live locks.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001988static kmp_adaptive_lock_info_t liveLocks;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001989
1990// A lock so we can safely update the list of locks.
1991static kmp_bootstrap_lock_t chain_lock;
1992
1993// Initialize the list of stats.
1994void
1995__kmp_init_speculative_stats()
1996{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001997 kmp_adaptive_lock_info_t *lck = &liveLocks;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001998
1999 memset( ( void * ) & ( lck->stats ), 0, sizeof( lck->stats ) );
2000 lck->stats.next = lck;
2001 lck->stats.prev = lck;
2002
2003 KMP_ASSERT( lck->stats.next->stats.prev == lck );
2004 KMP_ASSERT( lck->stats.prev->stats.next == lck );
2005
2006 __kmp_init_bootstrap_lock( &chain_lock );
2007
2008}
2009
2010// Insert the lock into the circular list
2011static void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002012__kmp_remember_lock( kmp_adaptive_lock_info_t * lck )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002013{
2014 __kmp_acquire_bootstrap_lock( &chain_lock );
2015
2016 lck->stats.next = liveLocks.stats.next;
2017 lck->stats.prev = &liveLocks;
2018
2019 liveLocks.stats.next = lck;
2020 lck->stats.next->stats.prev = lck;
2021
2022 KMP_ASSERT( lck->stats.next->stats.prev == lck );
2023 KMP_ASSERT( lck->stats.prev->stats.next == lck );
2024
2025 __kmp_release_bootstrap_lock( &chain_lock );
2026}
2027
2028static void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002029__kmp_forget_lock( kmp_adaptive_lock_info_t * lck )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002030{
2031 KMP_ASSERT( lck->stats.next->stats.prev == lck );
2032 KMP_ASSERT( lck->stats.prev->stats.next == lck );
2033
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002034 kmp_adaptive_lock_info_t * n = lck->stats.next;
2035 kmp_adaptive_lock_info_t * p = lck->stats.prev;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002036
2037 n->stats.prev = p;
2038 p->stats.next = n;
2039}
2040
2041static void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002042__kmp_zero_speculative_stats( kmp_adaptive_lock_info_t * lck )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002043{
2044 memset( ( void * )&lck->stats, 0, sizeof( lck->stats ) );
2045 __kmp_remember_lock( lck );
2046}
2047
2048static void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002049__kmp_add_stats( kmp_adaptive_lock_statistics_t * t, kmp_adaptive_lock_info_t * lck )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002050{
2051 kmp_adaptive_lock_statistics_t volatile *s = &lck->stats;
2052
2053 t->nonSpeculativeAcquireAttempts += lck->acquire_attempts;
2054 t->successfulSpeculations += s->successfulSpeculations;
2055 t->hardFailedSpeculations += s->hardFailedSpeculations;
2056 t->softFailedSpeculations += s->softFailedSpeculations;
2057 t->nonSpeculativeAcquires += s->nonSpeculativeAcquires;
2058 t->lemmingYields += s->lemmingYields;
2059}
2060
2061static void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002062__kmp_accumulate_speculative_stats( kmp_adaptive_lock_info_t * lck)
Jim Cownie5e8470a2013-09-27 10:38:44 +00002063{
2064 kmp_adaptive_lock_statistics_t *t = &destroyedStats;
2065
2066 __kmp_acquire_bootstrap_lock( &chain_lock );
2067
2068 __kmp_add_stats( &destroyedStats, lck );
2069 __kmp_forget_lock( lck );
2070
2071 __kmp_release_bootstrap_lock( &chain_lock );
2072}
2073
2074static float
2075percent (kmp_uint32 count, kmp_uint32 total)
2076{
2077 return (total == 0) ? 0.0: (100.0 * count)/total;
2078}
2079
2080static
2081FILE * __kmp_open_stats_file()
2082{
2083 if (strcmp (__kmp_speculative_statsfile, "-") == 0)
2084 return stdout;
2085
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00002086 size_t buffLen = KMP_STRLEN( __kmp_speculative_statsfile ) + 20;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002087 char buffer[buffLen];
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00002088 KMP_SNPRINTF (&buffer[0], buffLen, __kmp_speculative_statsfile,
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002089 (kmp_int32)getpid());
Jim Cownie5e8470a2013-09-27 10:38:44 +00002090 FILE * result = fopen(&buffer[0], "w");
2091
2092 // Maybe we should issue a warning here...
2093 return result ? result : stdout;
2094}
2095
2096void
2097__kmp_print_speculative_stats()
2098{
2099 if (__kmp_user_lock_kind != lk_adaptive)
2100 return;
2101
2102 FILE * statsFile = __kmp_open_stats_file();
2103
2104 kmp_adaptive_lock_statistics_t total = destroyedStats;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002105 kmp_adaptive_lock_info_t *lck;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002106
2107 for (lck = liveLocks.stats.next; lck != &liveLocks; lck = lck->stats.next) {
2108 __kmp_add_stats( &total, lck );
2109 }
2110 kmp_adaptive_lock_statistics_t *t = &total;
2111 kmp_uint32 totalSections = t->nonSpeculativeAcquires + t->successfulSpeculations;
2112 kmp_uint32 totalSpeculations = t->successfulSpeculations + t->hardFailedSpeculations +
2113 t->softFailedSpeculations;
2114
2115 fprintf ( statsFile, "Speculative lock statistics (all approximate!)\n");
2116 fprintf ( statsFile, " Lock parameters: \n"
2117 " max_soft_retries : %10d\n"
2118 " max_badness : %10d\n",
2119 __kmp_adaptive_backoff_params.max_soft_retries,
2120 __kmp_adaptive_backoff_params.max_badness);
2121 fprintf( statsFile, " Non-speculative acquire attempts : %10d\n", t->nonSpeculativeAcquireAttempts );
2122 fprintf( statsFile, " Total critical sections : %10d\n", totalSections );
2123 fprintf( statsFile, " Successful speculations : %10d (%5.1f%%)\n",
2124 t->successfulSpeculations, percent( t->successfulSpeculations, totalSections ) );
2125 fprintf( statsFile, " Non-speculative acquires : %10d (%5.1f%%)\n",
2126 t->nonSpeculativeAcquires, percent( t->nonSpeculativeAcquires, totalSections ) );
2127 fprintf( statsFile, " Lemming yields : %10d\n\n", t->lemmingYields );
2128
2129 fprintf( statsFile, " Speculative acquire attempts : %10d\n", totalSpeculations );
2130 fprintf( statsFile, " Successes : %10d (%5.1f%%)\n",
2131 t->successfulSpeculations, percent( t->successfulSpeculations, totalSpeculations ) );
2132 fprintf( statsFile, " Soft failures : %10d (%5.1f%%)\n",
2133 t->softFailedSpeculations, percent( t->softFailedSpeculations, totalSpeculations ) );
2134 fprintf( statsFile, " Hard failures : %10d (%5.1f%%)\n",
2135 t->hardFailedSpeculations, percent( t->hardFailedSpeculations, totalSpeculations ) );
2136
2137 if (statsFile != stdout)
2138 fclose( statsFile );
2139}
2140
2141# define KMP_INC_STAT(lck,stat) ( lck->lk.adaptive.stats.stat++ )
2142#else
2143# define KMP_INC_STAT(lck,stat)
2144
2145#endif // KMP_DEBUG_ADAPTIVE_LOCKS
2146
2147static inline bool
2148__kmp_is_unlocked_queuing_lock( kmp_queuing_lock_t *lck )
2149{
2150 // It is enough to check that the head_id is zero.
2151 // We don't also need to check the tail.
2152 bool res = lck->lk.head_id == 0;
2153
2154 // We need a fence here, since we must ensure that no memory operations
2155 // from later in this thread float above that read.
Jim Cownie181b4bb2013-12-23 17:28:57 +00002156#if KMP_COMPILER_ICC
Jim Cownie5e8470a2013-09-27 10:38:44 +00002157 _mm_mfence();
Jim Cownie181b4bb2013-12-23 17:28:57 +00002158#else
2159 __sync_synchronize();
Jim Cownie5e8470a2013-09-27 10:38:44 +00002160#endif
2161
2162 return res;
2163}
2164
2165// Functions for manipulating the badness
2166static __inline void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002167__kmp_update_badness_after_success( kmp_adaptive_lock_t *lck )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002168{
2169 // Reset the badness to zero so we eagerly try to speculate again
2170 lck->lk.adaptive.badness = 0;
2171 KMP_INC_STAT(lck,successfulSpeculations);
2172}
2173
2174// Create a bit mask with one more set bit.
2175static __inline void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002176__kmp_step_badness( kmp_adaptive_lock_t *lck )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002177{
2178 kmp_uint32 newBadness = ( lck->lk.adaptive.badness << 1 ) | 1;
2179 if ( newBadness > lck->lk.adaptive.max_badness) {
2180 return;
2181 } else {
2182 lck->lk.adaptive.badness = newBadness;
2183 }
2184}
2185
2186// Check whether speculation should be attempted.
2187static __inline int
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002188__kmp_should_speculate( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002189{
2190 kmp_uint32 badness = lck->lk.adaptive.badness;
2191 kmp_uint32 attempts= lck->lk.adaptive.acquire_attempts;
2192 int res = (attempts & badness) == 0;
2193 return res;
2194}
2195
2196// Attempt to acquire only the speculative lock.
2197// Does not back off to the non-speculative lock.
2198//
2199static int
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002200__kmp_test_adaptive_lock_only( kmp_adaptive_lock_t * lck, kmp_int32 gtid )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002201{
2202 int retries = lck->lk.adaptive.max_soft_retries;
2203
2204 // We don't explicitly count the start of speculation, rather we record
2205 // the results (success, hard fail, soft fail). The sum of all of those
2206 // is the total number of times we started speculation since all
2207 // speculations must end one of those ways.
2208 do
2209 {
2210 kmp_uint32 status = _xbegin();
2211 // Switch this in to disable actual speculation but exercise
2212 // at least some of the rest of the code. Useful for debugging...
2213 // kmp_uint32 status = _XABORT_NESTED;
2214
2215 if (status == _XBEGIN_STARTED )
2216 { /* We have successfully started speculation
2217 * Check that no-one acquired the lock for real between when we last looked
2218 * and now. This also gets the lock cache line into our read-set,
2219 * which we need so that we'll abort if anyone later claims it for real.
2220 */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002221 if (! __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002222 {
2223 // Lock is now visibly acquired, so someone beat us to it.
2224 // Abort the transaction so we'll restart from _xbegin with the
2225 // failure status.
2226 _xabort(0x01)
2227 KMP_ASSERT2( 0, "should not get here" );
2228 }
2229 return 1; // Lock has been acquired (speculatively)
2230 } else {
2231 // We have aborted, update the statistics
2232 if ( status & SOFT_ABORT_MASK)
2233 {
2234 KMP_INC_STAT(lck,softFailedSpeculations);
2235 // and loop round to retry.
2236 }
2237 else
2238 {
2239 KMP_INC_STAT(lck,hardFailedSpeculations);
2240 // Give up if we had a hard failure.
2241 break;
2242 }
2243 }
2244 } while( retries-- ); // Loop while we have retries, and didn't fail hard.
2245
2246 // Either we had a hard failure or we didn't succeed softly after
2247 // the full set of attempts, so back off the badness.
2248 __kmp_step_badness( lck );
2249 return 0;
2250}
2251
2252// Attempt to acquire the speculative lock, or back off to the non-speculative one
2253// if the speculative lock cannot be acquired.
2254// We can succeed speculatively, non-speculatively, or fail.
2255static int
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002256__kmp_test_adaptive_lock( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002257{
2258 // First try to acquire the lock speculatively
2259 if ( __kmp_should_speculate( lck, gtid ) && __kmp_test_adaptive_lock_only( lck, gtid ) )
2260 return 1;
2261
2262 // Speculative acquisition failed, so try to acquire it non-speculatively.
2263 // Count the non-speculative acquire attempt
2264 lck->lk.adaptive.acquire_attempts++;
2265
2266 // Use base, non-speculative lock.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002267 if ( __kmp_test_queuing_lock( GET_QLK_PTR(lck), gtid ) )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002268 {
2269 KMP_INC_STAT(lck,nonSpeculativeAcquires);
2270 return 1; // Lock is acquired (non-speculatively)
2271 }
2272 else
2273 {
2274 return 0; // Failed to acquire the lock, it's already visibly locked.
2275 }
2276}
2277
2278static int
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002279__kmp_test_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002280{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002281 char const * const func = "omp_test_lock";
2282 if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) {
2283 KMP_FATAL( LockIsUninitialized, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002284 }
2285
2286 int retval = __kmp_test_adaptive_lock( lck, gtid );
2287
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002288 if ( retval ) {
2289 lck->lk.qlk.owner_id = gtid + 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002290 }
2291 return retval;
2292}
2293
2294// Block until we can acquire a speculative, adaptive lock.
2295// We check whether we should be trying to speculate.
2296// If we should be, we check the real lock to see if it is free,
2297// and, if not, pause without attempting to acquire it until it is.
2298// Then we try the speculative acquire.
2299// This means that although we suffer from lemmings a little (
2300// because all we can't acquire the lock speculatively until
2301// the queue of threads waiting has cleared), we don't get into a
2302// state where we can never acquire the lock speculatively (because we
2303// force the queue to clear by preventing new arrivals from entering the
2304// queue).
2305// This does mean that when we're trying to break lemmings, the lock
2306// is no longer fair. However OpenMP makes no guarantee that its
2307// locks are fair, so this isn't a real problem.
2308static void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002309__kmp_acquire_adaptive_lock( kmp_adaptive_lock_t * lck, kmp_int32 gtid )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002310{
2311 if ( __kmp_should_speculate( lck, gtid ) )
2312 {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002313 if ( __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002314 {
2315 if ( __kmp_test_adaptive_lock_only( lck , gtid ) )
2316 return;
2317 // We tried speculation and failed, so give up.
2318 }
2319 else
2320 {
2321 // We can't try speculation until the lock is free, so we
2322 // pause here (without suspending on the queueing lock,
2323 // to allow it to drain, then try again.
2324 // All other threads will also see the same result for
2325 // shouldSpeculate, so will be doing the same if they
2326 // try to claim the lock from now on.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002327 while ( ! __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002328 {
2329 KMP_INC_STAT(lck,lemmingYields);
2330 __kmp_yield (TRUE);
2331 }
2332
2333 if ( __kmp_test_adaptive_lock_only( lck, gtid ) )
2334 return;
2335 }
2336 }
2337
2338 // Speculative acquisition failed, so acquire it non-speculatively.
2339 // Count the non-speculative acquire attempt
2340 lck->lk.adaptive.acquire_attempts++;
2341
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002342 __kmp_acquire_queuing_lock_timed_template<FALSE>( GET_QLK_PTR(lck), gtid );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002343 // We have acquired the base lock, so count that.
2344 KMP_INC_STAT(lck,nonSpeculativeAcquires );
2345}
2346
2347static void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002348__kmp_acquire_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002349{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002350 char const * const func = "omp_set_lock";
2351 if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) {
2352 KMP_FATAL( LockIsUninitialized, func );
2353 }
2354 if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) == gtid ) {
2355 KMP_FATAL( LockIsAlreadyOwned, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002356 }
2357
2358 __kmp_acquire_adaptive_lock( lck, gtid );
2359
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002360 lck->lk.qlk.owner_id = gtid + 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002361}
2362
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00002363static int
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002364__kmp_release_adaptive_lock( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002365{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002366 if ( __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002367 { // If the lock doesn't look claimed we must be speculating.
2368 // (Or the user's code is buggy and they're releasing without locking;
2369 // if we had XTEST we'd be able to check that case...)
2370 _xend(); // Exit speculation
2371 __kmp_update_badness_after_success( lck );
2372 }
2373 else
2374 { // Since the lock *is* visibly locked we're not speculating,
2375 // so should use the underlying lock's release scheme.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002376 __kmp_release_queuing_lock( GET_QLK_PTR(lck), gtid );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002377 }
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00002378 return KMP_LOCK_RELEASED;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002379}
2380
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00002381static int
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002382__kmp_release_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002383{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002384 char const * const func = "omp_unset_lock";
2385 KMP_MB(); /* in case another processor initialized lock */
2386 if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) {
2387 KMP_FATAL( LockIsUninitialized, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002388 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002389 if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) == -1 ) {
2390 KMP_FATAL( LockUnsettingFree, func );
2391 }
2392 if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) != gtid ) {
2393 KMP_FATAL( LockUnsettingSetByAnother, func );
2394 }
2395 lck->lk.qlk.owner_id = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002396 __kmp_release_adaptive_lock( lck, gtid );
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00002397 return KMP_LOCK_RELEASED;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002398}
2399
2400static void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002401__kmp_init_adaptive_lock( kmp_adaptive_lock_t *lck )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002402{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002403 __kmp_init_queuing_lock( GET_QLK_PTR(lck) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002404 lck->lk.adaptive.badness = 0;
2405 lck->lk.adaptive.acquire_attempts = 0; //nonSpeculativeAcquireAttempts = 0;
2406 lck->lk.adaptive.max_soft_retries = __kmp_adaptive_backoff_params.max_soft_retries;
2407 lck->lk.adaptive.max_badness = __kmp_adaptive_backoff_params.max_badness;
2408#if KMP_DEBUG_ADAPTIVE_LOCKS
2409 __kmp_zero_speculative_stats( &lck->lk.adaptive );
2410#endif
2411 KA_TRACE(1000, ("__kmp_init_adaptive_lock: lock %p initialized\n", lck));
2412}
2413
2414static void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002415__kmp_init_adaptive_lock_with_checks( kmp_adaptive_lock_t * lck )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002416{
2417 __kmp_init_adaptive_lock( lck );
2418}
2419
2420static void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002421__kmp_destroy_adaptive_lock( kmp_adaptive_lock_t *lck )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002422{
2423#if KMP_DEBUG_ADAPTIVE_LOCKS
2424 __kmp_accumulate_speculative_stats( &lck->lk.adaptive );
2425#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002426 __kmp_destroy_queuing_lock (GET_QLK_PTR(lck));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002427 // Nothing needed for the speculative part.
2428}
2429
2430static void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002431__kmp_destroy_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002432{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002433 char const * const func = "omp_destroy_lock";
2434 if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) {
2435 KMP_FATAL( LockIsUninitialized, func );
2436 }
2437 if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) != -1 ) {
2438 KMP_FATAL( LockStillOwned, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002439 }
2440 __kmp_destroy_adaptive_lock( lck );
2441}
2442
2443
2444#endif // KMP_USE_ADAPTIVE_LOCKS
2445
2446
2447/* ------------------------------------------------------------------------ */
2448/* DRDPA ticket locks */
2449/* "DRDPA" means Dynamically Reconfigurable Distributed Polling Area */
2450
2451static kmp_int32
2452__kmp_get_drdpa_lock_owner( kmp_drdpa_lock_t *lck )
2453{
2454 return TCR_4( lck->lk.owner_id ) - 1;
2455}
2456
2457static inline bool
2458__kmp_is_drdpa_lock_nestable( kmp_drdpa_lock_t *lck )
2459{
2460 return lck->lk.depth_locked != -1;
2461}
2462
2463__forceinline static void
2464__kmp_acquire_drdpa_lock_timed_template( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2465{
2466 kmp_uint64 ticket = KMP_TEST_THEN_INC64((kmp_int64 *)&lck->lk.next_ticket);
2467 kmp_uint64 mask = TCR_8(lck->lk.mask); // volatile load
2468 volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls
2469 = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2470 TCR_PTR(lck->lk.polls); // volatile load
2471
2472#ifdef USE_LOCK_PROFILE
2473 if (TCR_8(polls[ticket & mask].poll) != ticket)
2474 __kmp_printf("LOCK CONTENTION: %p\n", lck);
2475 /* else __kmp_printf( "." );*/
2476#endif /* USE_LOCK_PROFILE */
2477
2478 //
2479 // Now spin-wait, but reload the polls pointer and mask, in case the
2480 // polling area has been reconfigured. Unless it is reconfigured, the
2481 // reloads stay in L1 cache and are cheap.
2482 //
2483 // Keep this code in sync with KMP_WAIT_YIELD, in kmp_dispatch.c !!!
2484 //
2485 // The current implementation of KMP_WAIT_YIELD doesn't allow for mask
2486 // and poll to be re-read every spin iteration.
2487 //
2488 kmp_uint32 spins;
2489
2490 KMP_FSYNC_PREPARE(lck);
2491 KMP_INIT_YIELD(spins);
2492 while (TCR_8(polls[ticket & mask]).poll < ticket) { // volatile load
Jim Cownie5e8470a2013-09-27 10:38:44 +00002493 // If we are oversubscribed,
Alp Toker8f2d3f02014-02-24 10:40:15 +00002494 // or have waited a bit (and KMP_LIBRARY=turnaround), then yield.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002495 // CPU Pause is in the macros for yield.
2496 //
2497 KMP_YIELD(TCR_4(__kmp_nth)
2498 > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc));
2499 KMP_YIELD_SPIN(spins);
2500
2501 // Re-read the mask and the poll pointer from the lock structure.
2502 //
2503 // Make certain that "mask" is read before "polls" !!!
2504 //
2505 // If another thread picks reconfigures the polling area and updates
2506 // their values, and we get the new value of mask and the old polls
2507 // pointer, we could access memory beyond the end of the old polling
2508 // area.
2509 //
2510 mask = TCR_8(lck->lk.mask); // volatile load
2511 polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2512 TCR_PTR(lck->lk.polls); // volatile load
2513 }
2514
2515 //
2516 // Critical section starts here
2517 //
2518 KMP_FSYNC_ACQUIRED(lck);
2519 KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld acquired lock %p\n",
2520 ticket, lck));
2521 lck->lk.now_serving = ticket; // non-volatile store
2522
2523 //
2524 // Deallocate a garbage polling area if we know that we are the last
2525 // thread that could possibly access it.
2526 //
2527 // The >= check is in case __kmp_test_drdpa_lock() allocated the cleanup
2528 // ticket.
2529 //
2530 if ((lck->lk.old_polls != NULL) && (ticket >= lck->lk.cleanup_ticket)) {
2531 __kmp_free((void *)lck->lk.old_polls);
2532 lck->lk.old_polls = NULL;
2533 lck->lk.cleanup_ticket = 0;
2534 }
2535
2536 //
2537 // Check to see if we should reconfigure the polling area.
2538 // If there is still a garbage polling area to be deallocated from a
2539 // previous reconfiguration, let a later thread reconfigure it.
2540 //
2541 if (lck->lk.old_polls == NULL) {
2542 bool reconfigure = false;
2543 volatile struct kmp_base_drdpa_lock::kmp_lock_poll *old_polls = polls;
2544 kmp_uint32 num_polls = TCR_4(lck->lk.num_polls);
2545
2546 if (TCR_4(__kmp_nth)
2547 > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) {
2548 //
2549 // We are in oversubscription mode. Contract the polling area
2550 // down to a single location, if that hasn't been done already.
2551 //
2552 if (num_polls > 1) {
2553 reconfigure = true;
2554 num_polls = TCR_4(lck->lk.num_polls);
2555 mask = 0;
2556 num_polls = 1;
2557 polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2558 __kmp_allocate(num_polls * sizeof(*polls));
2559 polls[0].poll = ticket;
2560 }
2561 }
2562 else {
2563 //
2564 // We are in under/fully subscribed mode. Check the number of
2565 // threads waiting on the lock. The size of the polling area
2566 // should be at least the number of threads waiting.
2567 //
2568 kmp_uint64 num_waiting = TCR_8(lck->lk.next_ticket) - ticket - 1;
2569 if (num_waiting > num_polls) {
2570 kmp_uint32 old_num_polls = num_polls;
2571 reconfigure = true;
2572 do {
2573 mask = (mask << 1) | 1;
2574 num_polls *= 2;
2575 } while (num_polls <= num_waiting);
2576
2577 //
2578 // Allocate the new polling area, and copy the relevant portion
2579 // of the old polling area to the new area. __kmp_allocate()
2580 // zeroes the memory it allocates, and most of the old area is
2581 // just zero padding, so we only copy the release counters.
2582 //
2583 polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2584 __kmp_allocate(num_polls * sizeof(*polls));
2585 kmp_uint32 i;
2586 for (i = 0; i < old_num_polls; i++) {
2587 polls[i].poll = old_polls[i].poll;
2588 }
2589 }
2590 }
2591
2592 if (reconfigure) {
2593 //
2594 // Now write the updated fields back to the lock structure.
2595 //
2596 // Make certain that "polls" is written before "mask" !!!
2597 //
2598 // If another thread picks up the new value of mask and the old
2599 // polls pointer , it could access memory beyond the end of the
2600 // old polling area.
2601 //
2602 // On x86, we need memory fences.
2603 //
2604 KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld reconfiguring lock %p to %d polls\n",
2605 ticket, lck, num_polls));
2606
2607 lck->lk.old_polls = old_polls; // non-volatile store
2608 lck->lk.polls = polls; // volatile store
2609
2610 KMP_MB();
2611
2612 lck->lk.num_polls = num_polls; // non-volatile store
2613 lck->lk.mask = mask; // volatile store
2614
2615 KMP_MB();
2616
2617 //
2618 // Only after the new polling area and mask have been flushed
2619 // to main memory can we update the cleanup ticket field.
2620 //
2621 // volatile load / non-volatile store
2622 //
2623 lck->lk.cleanup_ticket = TCR_8(lck->lk.next_ticket);
2624 }
2625 }
2626}
2627
2628void
2629__kmp_acquire_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2630{
2631 __kmp_acquire_drdpa_lock_timed_template( lck, gtid );
2632}
2633
2634static void
2635__kmp_acquire_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2636{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002637 char const * const func = "omp_set_lock";
2638 if ( lck->lk.initialized != lck ) {
2639 KMP_FATAL( LockIsUninitialized, func );
2640 }
2641 if ( __kmp_is_drdpa_lock_nestable( lck ) ) {
2642 KMP_FATAL( LockNestableUsedAsSimple, func );
2643 }
2644 if ( ( gtid >= 0 ) && ( __kmp_get_drdpa_lock_owner( lck ) == gtid ) ) {
2645 KMP_FATAL( LockIsAlreadyOwned, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002646 }
2647
2648 __kmp_acquire_drdpa_lock( lck, gtid );
2649
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002650 lck->lk.owner_id = gtid + 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002651}
2652
2653int
2654__kmp_test_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2655{
2656 //
2657 // First get a ticket, then read the polls pointer and the mask.
2658 // The polls pointer must be read before the mask!!! (See above)
2659 //
2660 kmp_uint64 ticket = TCR_8(lck->lk.next_ticket); // volatile load
2661 volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls
2662 = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2663 TCR_PTR(lck->lk.polls); // volatile load
2664 kmp_uint64 mask = TCR_8(lck->lk.mask); // volatile load
2665 if (TCR_8(polls[ticket & mask].poll) == ticket) {
2666 kmp_uint64 next_ticket = ticket + 1;
2667 if (KMP_COMPARE_AND_STORE_ACQ64((kmp_int64 *)&lck->lk.next_ticket,
2668 ticket, next_ticket)) {
2669 KMP_FSYNC_ACQUIRED(lck);
2670 KA_TRACE(1000, ("__kmp_test_drdpa_lock: ticket #%lld acquired lock %p\n",
2671 ticket, lck));
2672 lck->lk.now_serving = ticket; // non-volatile store
2673
2674 //
Alp Toker8f2d3f02014-02-24 10:40:15 +00002675 // Since no threads are waiting, there is no possibility that
Jim Cownie5e8470a2013-09-27 10:38:44 +00002676 // we would want to reconfigure the polling area. We might
2677 // have the cleanup ticket value (which says that it is now
2678 // safe to deallocate old_polls), but we'll let a later thread
2679 // which calls __kmp_acquire_lock do that - this routine
2680 // isn't supposed to block, and we would risk blocks if we
2681 // called __kmp_free() to do the deallocation.
2682 //
2683 return TRUE;
2684 }
2685 }
2686 return FALSE;
2687}
2688
2689static int
2690__kmp_test_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2691{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002692 char const * const func = "omp_test_lock";
2693 if ( lck->lk.initialized != lck ) {
2694 KMP_FATAL( LockIsUninitialized, func );
2695 }
2696 if ( __kmp_is_drdpa_lock_nestable( lck ) ) {
2697 KMP_FATAL( LockNestableUsedAsSimple, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002698 }
2699
2700 int retval = __kmp_test_drdpa_lock( lck, gtid );
2701
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002702 if ( retval ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002703 lck->lk.owner_id = gtid + 1;
2704 }
2705 return retval;
2706}
2707
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00002708int
Jim Cownie5e8470a2013-09-27 10:38:44 +00002709__kmp_release_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2710{
2711 //
2712 // Read the ticket value from the lock data struct, then the polls
2713 // pointer and the mask. The polls pointer must be read before the
2714 // mask!!! (See above)
2715 //
2716 kmp_uint64 ticket = lck->lk.now_serving + 1; // non-volatile load
2717 volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls
2718 = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2719 TCR_PTR(lck->lk.polls); // volatile load
2720 kmp_uint64 mask = TCR_8(lck->lk.mask); // volatile load
2721 KA_TRACE(1000, ("__kmp_release_drdpa_lock: ticket #%lld released lock %p\n",
2722 ticket - 1, lck));
2723 KMP_FSYNC_RELEASING(lck);
2724 KMP_ST_REL64(&(polls[ticket & mask].poll), ticket); // volatile store
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00002725 return KMP_LOCK_RELEASED;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002726}
2727
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00002728static int
Jim Cownie5e8470a2013-09-27 10:38:44 +00002729__kmp_release_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2730{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002731 char const * const func = "omp_unset_lock";
2732 KMP_MB(); /* in case another processor initialized lock */
2733 if ( lck->lk.initialized != lck ) {
2734 KMP_FATAL( LockIsUninitialized, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002735 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002736 if ( __kmp_is_drdpa_lock_nestable( lck ) ) {
2737 KMP_FATAL( LockNestableUsedAsSimple, func );
2738 }
2739 if ( __kmp_get_drdpa_lock_owner( lck ) == -1 ) {
2740 KMP_FATAL( LockUnsettingFree, func );
2741 }
2742 if ( ( gtid >= 0 ) && ( __kmp_get_drdpa_lock_owner( lck ) >= 0 )
2743 && ( __kmp_get_drdpa_lock_owner( lck ) != gtid ) ) {
2744 KMP_FATAL( LockUnsettingSetByAnother, func );
2745 }
2746 lck->lk.owner_id = 0;
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00002747 return __kmp_release_drdpa_lock( lck, gtid );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002748}
2749
2750void
2751__kmp_init_drdpa_lock( kmp_drdpa_lock_t *lck )
2752{
2753 lck->lk.location = NULL;
2754 lck->lk.mask = 0;
2755 lck->lk.num_polls = 1;
2756 lck->lk.polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2757 __kmp_allocate(lck->lk.num_polls * sizeof(*(lck->lk.polls)));
2758 lck->lk.cleanup_ticket = 0;
2759 lck->lk.old_polls = NULL;
2760 lck->lk.next_ticket = 0;
2761 lck->lk.now_serving = 0;
2762 lck->lk.owner_id = 0; // no thread owns the lock.
2763 lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks.
2764 lck->lk.initialized = lck;
2765
2766 KA_TRACE(1000, ("__kmp_init_drdpa_lock: lock %p initialized\n", lck));
2767}
2768
2769static void
2770__kmp_init_drdpa_lock_with_checks( kmp_drdpa_lock_t * lck )
2771{
2772 __kmp_init_drdpa_lock( lck );
2773}
2774
2775void
2776__kmp_destroy_drdpa_lock( kmp_drdpa_lock_t *lck )
2777{
2778 lck->lk.initialized = NULL;
2779 lck->lk.location = NULL;
2780 if (lck->lk.polls != NULL) {
2781 __kmp_free((void *)lck->lk.polls);
2782 lck->lk.polls = NULL;
2783 }
2784 if (lck->lk.old_polls != NULL) {
2785 __kmp_free((void *)lck->lk.old_polls);
2786 lck->lk.old_polls = NULL;
2787 }
2788 lck->lk.mask = 0;
2789 lck->lk.num_polls = 0;
2790 lck->lk.cleanup_ticket = 0;
2791 lck->lk.next_ticket = 0;
2792 lck->lk.now_serving = 0;
2793 lck->lk.owner_id = 0;
2794 lck->lk.depth_locked = -1;
2795}
2796
2797static void
2798__kmp_destroy_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck )
2799{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002800 char const * const func = "omp_destroy_lock";
2801 if ( lck->lk.initialized != lck ) {
2802 KMP_FATAL( LockIsUninitialized, func );
2803 }
2804 if ( __kmp_is_drdpa_lock_nestable( lck ) ) {
2805 KMP_FATAL( LockNestableUsedAsSimple, func );
2806 }
2807 if ( __kmp_get_drdpa_lock_owner( lck ) != -1 ) {
2808 KMP_FATAL( LockStillOwned, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002809 }
2810 __kmp_destroy_drdpa_lock( lck );
2811}
2812
2813
2814//
2815// nested drdpa ticket locks
2816//
2817
2818void
2819__kmp_acquire_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2820{
2821 KMP_DEBUG_ASSERT( gtid >= 0 );
2822
2823 if ( __kmp_get_drdpa_lock_owner( lck ) == gtid ) {
2824 lck->lk.depth_locked += 1;
2825 }
2826 else {
2827 __kmp_acquire_drdpa_lock_timed_template( lck, gtid );
2828 KMP_MB();
2829 lck->lk.depth_locked = 1;
2830 KMP_MB();
2831 lck->lk.owner_id = gtid + 1;
2832 }
2833}
2834
2835static void
2836__kmp_acquire_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2837{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002838 char const * const func = "omp_set_nest_lock";
2839 if ( lck->lk.initialized != lck ) {
2840 KMP_FATAL( LockIsUninitialized, func );
2841 }
2842 if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) {
2843 KMP_FATAL( LockSimpleUsedAsNestable, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002844 }
2845 __kmp_acquire_nested_drdpa_lock( lck, gtid );
2846}
2847
2848int
2849__kmp_test_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2850{
2851 int retval;
2852
2853 KMP_DEBUG_ASSERT( gtid >= 0 );
2854
2855 if ( __kmp_get_drdpa_lock_owner( lck ) == gtid ) {
2856 retval = ++lck->lk.depth_locked;
2857 }
2858 else if ( !__kmp_test_drdpa_lock( lck, gtid ) ) {
2859 retval = 0;
2860 }
2861 else {
2862 KMP_MB();
2863 retval = lck->lk.depth_locked = 1;
2864 KMP_MB();
2865 lck->lk.owner_id = gtid + 1;
2866 }
2867 return retval;
2868}
2869
2870static int
2871__kmp_test_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2872{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002873 char const * const func = "omp_test_nest_lock";
2874 if ( lck->lk.initialized != lck ) {
2875 KMP_FATAL( LockIsUninitialized, func );
2876 }
2877 if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) {
2878 KMP_FATAL( LockSimpleUsedAsNestable, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002879 }
2880 return __kmp_test_nested_drdpa_lock( lck, gtid );
2881}
2882
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00002883int
Jim Cownie5e8470a2013-09-27 10:38:44 +00002884__kmp_release_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2885{
2886 KMP_DEBUG_ASSERT( gtid >= 0 );
2887
2888 KMP_MB();
2889 if ( --(lck->lk.depth_locked) == 0 ) {
2890 KMP_MB();
2891 lck->lk.owner_id = 0;
2892 __kmp_release_drdpa_lock( lck, gtid );
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00002893 return KMP_LOCK_RELEASED;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002894 }
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00002895 return KMP_LOCK_STILL_HELD;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002896}
2897
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00002898static int
Jim Cownie5e8470a2013-09-27 10:38:44 +00002899__kmp_release_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2900{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002901 char const * const func = "omp_unset_nest_lock";
2902 KMP_MB(); /* in case another processor initialized lock */
2903 if ( lck->lk.initialized != lck ) {
2904 KMP_FATAL( LockIsUninitialized, func );
2905 }
2906 if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) {
2907 KMP_FATAL( LockSimpleUsedAsNestable, func );
2908 }
2909 if ( __kmp_get_drdpa_lock_owner( lck ) == -1 ) {
2910 KMP_FATAL( LockUnsettingFree, func );
2911 }
2912 if ( __kmp_get_drdpa_lock_owner( lck ) != gtid ) {
2913 KMP_FATAL( LockUnsettingSetByAnother, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002914 }
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00002915 return __kmp_release_nested_drdpa_lock( lck, gtid );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002916}
2917
2918void
2919__kmp_init_nested_drdpa_lock( kmp_drdpa_lock_t * lck )
2920{
2921 __kmp_init_drdpa_lock( lck );
2922 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
2923}
2924
2925static void
2926__kmp_init_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t * lck )
2927{
2928 __kmp_init_nested_drdpa_lock( lck );
2929}
2930
2931void
2932__kmp_destroy_nested_drdpa_lock( kmp_drdpa_lock_t *lck )
2933{
2934 __kmp_destroy_drdpa_lock( lck );
2935 lck->lk.depth_locked = 0;
2936}
2937
2938static void
2939__kmp_destroy_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck )
2940{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002941 char const * const func = "omp_destroy_nest_lock";
2942 if ( lck->lk.initialized != lck ) {
2943 KMP_FATAL( LockIsUninitialized, func );
2944 }
2945 if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) {
2946 KMP_FATAL( LockSimpleUsedAsNestable, func );
2947 }
2948 if ( __kmp_get_drdpa_lock_owner( lck ) != -1 ) {
2949 KMP_FATAL( LockStillOwned, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002950 }
2951 __kmp_destroy_nested_drdpa_lock( lck );
2952}
2953
2954
2955//
2956// access functions to fields which don't exist for all lock kinds.
2957//
2958
2959static int
2960__kmp_is_drdpa_lock_initialized( kmp_drdpa_lock_t *lck )
2961{
2962 return lck == lck->lk.initialized;
2963}
2964
2965static const ident_t *
2966__kmp_get_drdpa_lock_location( kmp_drdpa_lock_t *lck )
2967{
2968 return lck->lk.location;
2969}
2970
2971static void
2972__kmp_set_drdpa_lock_location( kmp_drdpa_lock_t *lck, const ident_t *loc )
2973{
2974 lck->lk.location = loc;
2975}
2976
2977static kmp_lock_flags_t
2978__kmp_get_drdpa_lock_flags( kmp_drdpa_lock_t *lck )
2979{
2980 return lck->lk.flags;
2981}
2982
2983static void
2984__kmp_set_drdpa_lock_flags( kmp_drdpa_lock_t *lck, kmp_lock_flags_t flags )
2985{
2986 lck->lk.flags = flags;
2987}
2988
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002989#if KMP_USE_DYNAMIC_LOCK
2990
2991// Definitions of lock hints.
2992# ifndef __OMP_H
2993typedef enum kmp_lock_hint_t {
2994 kmp_lock_hint_none = 0,
2995 kmp_lock_hint_contended,
2996 kmp_lock_hint_uncontended,
2997 kmp_lock_hint_nonspeculative,
2998 kmp_lock_hint_speculative,
2999 kmp_lock_hint_adaptive,
3000} kmp_lock_hint_t;
3001# endif
3002
3003// Direct lock initializers. It simply writes a tag to the low 8 bits of the lock word.
3004#define expand_init_lock(l, a) \
3005static void init_##l##_lock(kmp_dyna_lock_t *lck, kmp_dyna_lockseq_t seq) { \
3006 *lck = DYNA_LOCK_FREE(l); \
3007 KA_TRACE(20, ("Initialized direct lock, tag = %x\n", *lck)); \
3008}
3009FOREACH_D_LOCK(expand_init_lock, 0)
3010#undef expand_init_lock
3011
3012#if DYNA_HAS_HLE
3013
3014// HLE lock functions - imported from the testbed runtime.
3015#if KMP_MIC
3016# define machine_pause() _mm_delay_32(10) // TODO: find the right argument
3017#else
3018# define machine_pause() _mm_pause()
3019#endif
3020#define HLE_ACQUIRE ".byte 0xf2;"
3021#define HLE_RELEASE ".byte 0xf3;"
3022
3023static inline kmp_uint32
3024swap4(kmp_uint32 volatile *p, kmp_uint32 v)
3025{
3026 __asm__ volatile(HLE_ACQUIRE "xchg %1,%0"
3027 : "+r"(v), "+m"(*p)
3028 :
3029 : "memory");
3030 return v;
3031}
3032
3033static void
3034__kmp_destroy_hle_lock(kmp_dyna_lock_t *lck)
3035{
3036 *lck = 0;
3037}
3038
3039static void
3040__kmp_acquire_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid)
3041{
3042 // Use gtid for DYNA_LOCK_BUSY if necessary
3043 if (swap4(lck, DYNA_LOCK_BUSY(1, hle)) != DYNA_LOCK_FREE(hle)) {
3044 int delay = 1;
3045 do {
3046 while (*(kmp_uint32 volatile *)lck != DYNA_LOCK_FREE(hle)) {
3047 for (int i = delay; i != 0; --i)
3048 machine_pause();
3049 delay = ((delay << 1) | 1) & 7;
3050 }
3051 } while (swap4(lck, DYNA_LOCK_BUSY(1, hle)) != DYNA_LOCK_FREE(hle));
3052 }
3053}
3054
3055static void
3056__kmp_acquire_hle_lock_with_checks(kmp_dyna_lock_t *lck, kmp_int32 gtid)
3057{
3058 __kmp_acquire_hle_lock(lck, gtid); // TODO: add checks
3059}
3060
3061static void
3062__kmp_release_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid)
3063{
3064 __asm__ volatile(HLE_RELEASE "movl %1,%0"
3065 : "=m"(*lck)
3066 : "r"(DYNA_LOCK_FREE(hle))
3067 : "memory");
3068}
3069
3070static void
3071__kmp_release_hle_lock_with_checks(kmp_dyna_lock_t *lck, kmp_int32 gtid)
3072{
3073 __kmp_release_hle_lock(lck, gtid); // TODO: add checks
3074}
3075
3076static int
3077__kmp_test_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid)
3078{
3079 return swap4(lck, DYNA_LOCK_BUSY(1, hle)) == DYNA_LOCK_FREE(hle);
3080}
3081
3082static int
3083__kmp_test_hle_lock_with_checks(kmp_dyna_lock_t *lck, kmp_int32 gtid)
3084{
3085 return __kmp_test_hle_lock(lck, gtid); // TODO: add checks
3086}
3087
3088#endif // DYNA_HAS_HLE
3089
3090// Entry functions for indirect locks (first element of direct_*_ops[]).
3091static void __kmp_init_indirect_lock(kmp_dyna_lock_t * l, kmp_dyna_lockseq_t tag);
3092static void __kmp_destroy_indirect_lock(kmp_dyna_lock_t * lock);
3093static void __kmp_set_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32);
3094static void __kmp_unset_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32);
3095static int __kmp_test_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32);
3096static void __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32);
3097static void __kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32);
3098static int __kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32);
3099
3100//
3101// Jump tables for the indirect lock functions.
3102// Only fill in the odd entries, that avoids the need to shift out the low bit.
3103//
3104#define expand_func0(l, op) 0,op##_##l##_##lock,
3105void (*__kmp_direct_init_ops[])(kmp_dyna_lock_t *, kmp_dyna_lockseq_t)
3106 = { __kmp_init_indirect_lock, 0, FOREACH_D_LOCK(expand_func0, init) };
3107
3108#define expand_func1(l, op) 0,(void (*)(kmp_dyna_lock_t *))__kmp_##op##_##l##_##lock,
3109void (*__kmp_direct_destroy_ops[])(kmp_dyna_lock_t *)
3110 = { __kmp_destroy_indirect_lock, 0, FOREACH_D_LOCK(expand_func1, destroy) };
3111
3112// Differentiates *lock and *lock_with_checks.
3113#define expand_func2(l, op) 0,(void (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_##lock,
3114#define expand_func2c(l, op) 0,(void (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_##lock_with_checks,
3115static void (*direct_set_tab[][DYNA_NUM_D_LOCKS*2+2])(kmp_dyna_lock_t *, kmp_int32)
3116 = { { __kmp_set_indirect_lock, 0, FOREACH_D_LOCK(expand_func2, acquire) },
3117 { __kmp_set_indirect_lock_with_checks, 0, FOREACH_D_LOCK(expand_func2c, acquire) } };
3118static void (*direct_unset_tab[][DYNA_NUM_D_LOCKS*2+2])(kmp_dyna_lock_t *, kmp_int32)
3119 = { { __kmp_unset_indirect_lock, 0, FOREACH_D_LOCK(expand_func2, release) },
3120 { __kmp_unset_indirect_lock_with_checks, 0, FOREACH_D_LOCK(expand_func2c, release) } };
3121
3122#define expand_func3(l, op) 0,(int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_##lock,
3123#define expand_func3c(l, op) 0,(int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_##lock_with_checks,
3124static int (*direct_test_tab[][DYNA_NUM_D_LOCKS*2+2])(kmp_dyna_lock_t *, kmp_int32)
3125 = { { __kmp_test_indirect_lock, 0, FOREACH_D_LOCK(expand_func3, test) },
3126 { __kmp_test_indirect_lock_with_checks, 0, FOREACH_D_LOCK(expand_func3c, test) } };
3127
3128// Exposes only one set of jump tables (*lock or *lock_with_checks).
3129void (*(*__kmp_direct_set_ops))(kmp_dyna_lock_t *, kmp_int32) = 0;
3130void (*(*__kmp_direct_unset_ops))(kmp_dyna_lock_t *, kmp_int32) = 0;
3131int (*(*__kmp_direct_test_ops))(kmp_dyna_lock_t *, kmp_int32) = 0;
3132
3133//
3134// Jump tables for the indirect lock functions.
3135//
3136#define expand_func4(l, op) (void (*)(kmp_user_lock_p))__kmp_##op##_##l##_##lock,
3137void (*__kmp_indirect_init_ops[])(kmp_user_lock_p)
3138 = { FOREACH_I_LOCK(expand_func4, init) };
3139void (*__kmp_indirect_destroy_ops[])(kmp_user_lock_p)
3140 = { FOREACH_I_LOCK(expand_func4, destroy) };
3141
3142// Differentiates *lock and *lock_with_checks.
3143#define expand_func5(l, op) (void (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock,
3144#define expand_func5c(l, op) (void (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock_with_checks,
3145static void (*indirect_set_tab[][DYNA_NUM_I_LOCKS])(kmp_user_lock_p, kmp_int32)
3146 = { { FOREACH_I_LOCK(expand_func5, acquire) },
3147 { FOREACH_I_LOCK(expand_func5c, acquire) } };
3148static void (*indirect_unset_tab[][DYNA_NUM_I_LOCKS])(kmp_user_lock_p, kmp_int32)
3149 = { { FOREACH_I_LOCK(expand_func5, release) },
3150 { FOREACH_I_LOCK(expand_func5c, release) } };
3151
3152#define expand_func6(l, op) (int (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock,
3153#define expand_func6c(l, op) (int (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock_with_checks,
3154static int (*indirect_test_tab[][DYNA_NUM_I_LOCKS])(kmp_user_lock_p, kmp_int32)
3155 = { { FOREACH_I_LOCK(expand_func6, test) },
3156 { FOREACH_I_LOCK(expand_func6c, test) } };
3157
3158// Exposes only one set of jump tables (*lock or *lock_with_checks).
3159void (*(*__kmp_indirect_set_ops))(kmp_user_lock_p, kmp_int32) = 0;
3160void (*(*__kmp_indirect_unset_ops))(kmp_user_lock_p, kmp_int32) = 0;
3161int (*(*__kmp_indirect_test_ops))(kmp_user_lock_p, kmp_int32) = 0;
3162
3163// Lock index table.
3164kmp_indirect_lock_t **__kmp_indirect_lock_table;
3165kmp_lock_index_t __kmp_indirect_lock_table_size;
3166kmp_lock_index_t __kmp_indirect_lock_table_next;
3167
3168// Size of indirect locks.
3169static kmp_uint32 __kmp_indirect_lock_size[DYNA_NUM_I_LOCKS] = {
3170 sizeof(kmp_ticket_lock_t), sizeof(kmp_queuing_lock_t),
3171#if KMP_USE_ADAPTIVE_LOCKS
3172 sizeof(kmp_adaptive_lock_t),
3173#endif
3174 sizeof(kmp_drdpa_lock_t),
3175 sizeof(kmp_tas_lock_t),
3176#if DYNA_HAS_FUTEX
3177 sizeof(kmp_futex_lock_t),
3178#endif
3179 sizeof(kmp_ticket_lock_t), sizeof(kmp_queuing_lock_t),
3180 sizeof(kmp_drdpa_lock_t)
3181};
3182
3183// Jump tables for lock accessor/modifier.
3184void (*__kmp_indirect_set_location[DYNA_NUM_I_LOCKS])(kmp_user_lock_p, const ident_t *) = { 0 };
3185void (*__kmp_indirect_set_flags[DYNA_NUM_I_LOCKS])(kmp_user_lock_p, kmp_lock_flags_t) = { 0 };
3186const ident_t * (*__kmp_indirect_get_location[DYNA_NUM_I_LOCKS])(kmp_user_lock_p) = { 0 };
3187kmp_lock_flags_t (*__kmp_indirect_get_flags[DYNA_NUM_I_LOCKS])(kmp_user_lock_p) = { 0 };
3188
3189// Use different lock pools for different lock types.
3190static kmp_indirect_lock_t * __kmp_indirect_lock_pool[DYNA_NUM_I_LOCKS] = { 0 };
3191
3192// Inserts the given lock ptr to the lock table.
3193kmp_lock_index_t
3194__kmp_insert_indirect_lock(kmp_indirect_lock_t *lck)
3195{
3196 kmp_lock_index_t next = __kmp_indirect_lock_table_next;
3197 // Check capacity and double the size if required
3198 if (next >= __kmp_indirect_lock_table_size) {
3199 kmp_lock_index_t i;
3200 kmp_lock_index_t size = __kmp_indirect_lock_table_size;
3201 kmp_indirect_lock_t **old_table = __kmp_indirect_lock_table;
3202 __kmp_indirect_lock_table = (kmp_indirect_lock_t **)__kmp_allocate(2*next*sizeof(kmp_indirect_lock_t *));
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00003203 KMP_MEMCPY(__kmp_indirect_lock_table, old_table, next*sizeof(kmp_indirect_lock_t *));
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00003204 __kmp_free(old_table);
3205 __kmp_indirect_lock_table_size = 2*next;
3206 }
3207 // Insert lck to the table and return the index.
3208 __kmp_indirect_lock_table[next] = lck;
3209 __kmp_indirect_lock_table_next++;
3210 return next;
3211}
3212
3213// User lock allocator for dynamically dispatched locks.
3214kmp_indirect_lock_t *
3215__kmp_allocate_indirect_lock(void **user_lock, kmp_int32 gtid, kmp_indirect_locktag_t tag)
3216{
3217 kmp_indirect_lock_t *lck;
3218 kmp_lock_index_t idx;
3219
3220 __kmp_acquire_lock(&__kmp_global_lock, gtid);
3221
3222 if (__kmp_indirect_lock_pool[tag] != NULL) {
3223 lck = __kmp_indirect_lock_pool[tag];
3224 if (OMP_LOCK_T_SIZE < sizeof(void *))
3225 idx = lck->lock->pool.index;
3226 __kmp_indirect_lock_pool[tag] = (kmp_indirect_lock_t *)lck->lock->pool.next;
3227 } else {
3228 lck = (kmp_indirect_lock_t *)__kmp_allocate(sizeof(kmp_indirect_lock_t));
3229 lck->lock = (kmp_user_lock_p)__kmp_allocate(__kmp_indirect_lock_size[tag]);
3230 if (OMP_LOCK_T_SIZE < sizeof(void *))
3231 idx = __kmp_insert_indirect_lock(lck);
3232 }
3233
3234 __kmp_release_lock(&__kmp_global_lock, gtid);
3235
3236 lck->type = tag;
3237
3238 if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3239 *((kmp_lock_index_t *)user_lock) = idx << 1; // indirect lock word must be even.
3240 } else {
3241 *((kmp_indirect_lock_t **)user_lock) = lck;
3242 }
3243
3244 return lck;
3245}
3246
3247// User lock lookup for dynamically dispatched locks.
3248static __forceinline
3249kmp_indirect_lock_t *
3250__kmp_lookup_indirect_lock(void **user_lock, const char *func)
3251{
3252 if (__kmp_env_consistency_check) {
3253 kmp_indirect_lock_t *lck = NULL;
3254 if (user_lock == NULL) {
3255 KMP_FATAL(LockIsUninitialized, func);
3256 }
3257 if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3258 kmp_lock_index_t idx = DYNA_EXTRACT_I_INDEX(user_lock);
3259 if (idx < 0 || idx >= __kmp_indirect_lock_table_size) {
3260 KMP_FATAL(LockIsUninitialized, func);
3261 }
3262 lck = __kmp_indirect_lock_table[idx];
3263 } else {
3264 lck = *((kmp_indirect_lock_t **)user_lock);
3265 }
3266 if (lck == NULL) {
3267 KMP_FATAL(LockIsUninitialized, func);
3268 }
3269 return lck;
3270 } else {
3271 if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3272 return __kmp_indirect_lock_table[DYNA_EXTRACT_I_INDEX(user_lock)];
3273 } else {
3274 return *((kmp_indirect_lock_t **)user_lock);
3275 }
3276 }
3277}
3278
3279static void
3280__kmp_init_indirect_lock(kmp_dyna_lock_t * lock, kmp_dyna_lockseq_t seq)
3281{
3282#if KMP_USE_ADAPTIVE_LOCKS
3283 if (seq == lockseq_adaptive && !__kmp_cpuinfo.rtm) {
3284 KMP_WARNING(AdaptiveNotSupported, "kmp_lockseq_t", "adaptive");
3285 seq = lockseq_queuing;
3286 }
3287#endif
3288 kmp_indirect_locktag_t tag = DYNA_GET_I_TAG(seq);
3289 kmp_indirect_lock_t *l = __kmp_allocate_indirect_lock((void **)lock, __kmp_entry_gtid(), tag);
3290 DYNA_I_LOCK_FUNC(l, init)(l->lock);
3291 KA_TRACE(20, ("__kmp_init_indirect_lock: initialized indirect lock, tag = %x\n", l->type));
3292}
3293
3294static void
3295__kmp_destroy_indirect_lock(kmp_dyna_lock_t * lock)
3296{
3297 kmp_uint32 gtid = __kmp_entry_gtid();
3298 kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_destroy_lock");
3299 DYNA_I_LOCK_FUNC(l, destroy)(l->lock);
3300 kmp_indirect_locktag_t tag = l->type;
3301
3302 __kmp_acquire_lock(&__kmp_global_lock, gtid);
3303
3304 // Use the base lock's space to keep the pool chain.
3305 l->lock->pool.next = (kmp_user_lock_p)__kmp_indirect_lock_pool[tag];
3306 if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3307 l->lock->pool.index = DYNA_EXTRACT_I_INDEX(lock);
3308 }
3309 __kmp_indirect_lock_pool[tag] = l;
3310
3311 __kmp_release_lock(&__kmp_global_lock, gtid);
3312}
3313
3314static void
3315__kmp_set_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32 gtid)
3316{
3317 kmp_indirect_lock_t *l = DYNA_LOOKUP_I_LOCK(lock);
3318 DYNA_I_LOCK_FUNC(l, set)(l->lock, gtid);
3319}
3320
3321static void
3322__kmp_unset_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32 gtid)
3323{
3324 kmp_indirect_lock_t *l = DYNA_LOOKUP_I_LOCK(lock);
3325 DYNA_I_LOCK_FUNC(l, unset)(l->lock, gtid);
3326}
3327
3328static int
3329__kmp_test_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32 gtid)
3330{
3331 kmp_indirect_lock_t *l = DYNA_LOOKUP_I_LOCK(lock);
3332 return DYNA_I_LOCK_FUNC(l, test)(l->lock, gtid);
3333}
3334
3335static void
3336__kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32 gtid)
3337{
3338 kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_set_lock");
3339 DYNA_I_LOCK_FUNC(l, set)(l->lock, gtid);
3340}
3341
3342static void
3343__kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32 gtid)
3344{
3345 kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_unset_lock");
3346 DYNA_I_LOCK_FUNC(l, unset)(l->lock, gtid);
3347}
3348
3349static int
3350__kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32 gtid)
3351{
3352 kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_test_lock");
3353 return DYNA_I_LOCK_FUNC(l, test)(l->lock, gtid);
3354}
3355
3356kmp_dyna_lockseq_t __kmp_user_lock_seq = lockseq_queuing;
3357
3358// Initialize a hinted lock.
3359void
3360__kmp_init_lock_hinted(void **lock, int hint)
3361{
3362 kmp_dyna_lockseq_t seq;
3363 switch (hint) {
3364 case kmp_lock_hint_uncontended:
3365 seq = lockseq_tas;
3366 break;
3367 case kmp_lock_hint_speculative:
3368#if DYNA_HAS_HLE
3369 seq = lockseq_hle;
3370#else
3371 seq = lockseq_tas;
3372#endif
3373 break;
3374 case kmp_lock_hint_adaptive:
3375#if KMP_USE_ADAPTIVE_LOCKS
3376 seq = lockseq_adaptive;
3377#else
3378 seq = lockseq_queuing;
3379#endif
3380 break;
3381 // Defaults to queuing locks.
3382 case kmp_lock_hint_contended:
3383 case kmp_lock_hint_nonspeculative:
3384 default:
3385 seq = lockseq_queuing;
3386 break;
3387 }
3388 if (DYNA_IS_D_LOCK(seq)) {
3389 DYNA_INIT_D_LOCK(lock, seq);
3390#if USE_ITT_BUILD
3391 __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL);
3392#endif
3393 } else {
3394 DYNA_INIT_I_LOCK(lock, seq);
3395#if USE_ITT_BUILD
3396 kmp_indirect_lock_t *ilk = DYNA_LOOKUP_I_LOCK(lock);
3397 __kmp_itt_lock_creating(ilk->lock, NULL);
3398#endif
3399 }
3400}
3401
3402// This is used only in kmp_error.c when consistency checking is on.
3403kmp_int32
3404__kmp_get_user_lock_owner(kmp_user_lock_p lck, kmp_uint32 seq)
3405{
3406 switch (seq) {
3407 case lockseq_tas:
3408 case lockseq_nested_tas:
3409 return __kmp_get_tas_lock_owner((kmp_tas_lock_t *)lck);
3410#if DYNA_HAS_FUTEX
3411 case lockseq_futex:
3412 case lockseq_nested_futex:
3413 return __kmp_get_futex_lock_owner((kmp_futex_lock_t *)lck);
3414#endif
3415 case lockseq_ticket:
3416 case lockseq_nested_ticket:
3417 return __kmp_get_ticket_lock_owner((kmp_ticket_lock_t *)lck);
3418 case lockseq_queuing:
3419 case lockseq_nested_queuing:
3420#if KMP_USE_ADAPTIVE_LOCKS
3421 case lockseq_adaptive:
3422 return __kmp_get_queuing_lock_owner((kmp_queuing_lock_t *)lck);
3423#endif
3424 case lockseq_drdpa:
3425 case lockseq_nested_drdpa:
3426 return __kmp_get_drdpa_lock_owner((kmp_drdpa_lock_t *)lck);
3427 default:
3428 return 0;
3429 }
3430}
3431
3432// The value initialized from KMP_LOCK_KIND needs to be translated to its
3433// nested version.
3434void
3435__kmp_init_nest_lock_hinted(void **lock, int hint)
3436{
3437 kmp_dyna_lockseq_t seq;
3438 switch (hint) {
3439 case kmp_lock_hint_uncontended:
3440 seq = lockseq_nested_tas;
3441 break;
3442 // Defaults to queuing locks.
3443 case kmp_lock_hint_contended:
3444 case kmp_lock_hint_nonspeculative:
3445 default:
3446 seq = lockseq_nested_queuing;
3447 break;
3448 }
3449 DYNA_INIT_I_LOCK(lock, seq);
3450#if USE_ITT_BUILD
3451 kmp_indirect_lock_t *ilk = DYNA_LOOKUP_I_LOCK(lock);
3452 __kmp_itt_lock_creating(ilk->lock, NULL);
3453#endif
3454}
3455
3456// Initializes the lock table for indirect locks.
3457static void
3458__kmp_init_indirect_lock_table()
3459{
3460 __kmp_indirect_lock_table = (kmp_indirect_lock_t **)__kmp_allocate(sizeof(kmp_indirect_lock_t *)*1024);
3461 __kmp_indirect_lock_table_size = 1024;
3462 __kmp_indirect_lock_table_next = 0;
3463}
3464
3465#if KMP_USE_ADAPTIVE_LOCKS
3466# define init_lock_func(table, expand) { \
3467 table[locktag_ticket] = expand(ticket); \
3468 table[locktag_queuing] = expand(queuing); \
3469 table[locktag_adaptive] = expand(queuing); \
3470 table[locktag_drdpa] = expand(drdpa); \
3471 table[locktag_nested_ticket] = expand(ticket); \
3472 table[locktag_nested_queuing] = expand(queuing); \
3473 table[locktag_nested_drdpa] = expand(drdpa); \
3474}
3475#else
3476# define init_lock_func(table, expand) { \
3477 table[locktag_ticket] = expand(ticket); \
3478 table[locktag_queuing] = expand(queuing); \
3479 table[locktag_drdpa] = expand(drdpa); \
3480 table[locktag_nested_ticket] = expand(ticket); \
3481 table[locktag_nested_queuing] = expand(queuing); \
3482 table[locktag_nested_drdpa] = expand(drdpa); \
3483}
3484#endif // KMP_USE_ADAPTIVE_LOCKS
3485
3486// Initializes data for dynamic user locks.
3487void
3488__kmp_init_dynamic_user_locks()
3489{
3490 // Initialize jump table location
3491 int offset = (__kmp_env_consistency_check)? 1: 0;
3492 __kmp_direct_set_ops = direct_set_tab[offset];
3493 __kmp_direct_unset_ops = direct_unset_tab[offset];
3494 __kmp_direct_test_ops = direct_test_tab[offset];
3495 __kmp_indirect_set_ops = indirect_set_tab[offset];
3496 __kmp_indirect_unset_ops = indirect_unset_tab[offset];
3497 __kmp_indirect_test_ops = indirect_test_tab[offset];
3498 __kmp_init_indirect_lock_table();
3499
3500 // Initialize lock accessor/modifier
3501 // Could have used designated initializer, but -TP /Qstd=c99 did not work with icl.exe.
3502#define expand_func(l) (void (*)(kmp_user_lock_p, const ident_t *))__kmp_set_##l##_lock_location
3503 init_lock_func(__kmp_indirect_set_location, expand_func);
3504#undef expand_func
3505#define expand_func(l) (void (*)(kmp_user_lock_p, kmp_lock_flags_t))__kmp_set_##l##_lock_flags
3506 init_lock_func(__kmp_indirect_set_flags, expand_func);
3507#undef expand_func
3508#define expand_func(l) (const ident_t * (*)(kmp_user_lock_p))__kmp_get_##l##_lock_location
3509 init_lock_func(__kmp_indirect_get_location, expand_func);
3510#undef expand_func
3511#define expand_func(l) (kmp_lock_flags_t (*)(kmp_user_lock_p))__kmp_get_##l##_lock_flags
3512 init_lock_func(__kmp_indirect_get_flags, expand_func);
3513#undef expand_func
3514
3515 __kmp_init_user_locks = TRUE;
3516}
3517
3518// Clean up the lock table.
3519void
3520__kmp_cleanup_indirect_user_locks()
3521{
3522 kmp_lock_index_t i;
3523 int k;
3524
3525 // Clean up locks in the pools first (they were already destroyed before going into the pools).
3526 for (k = 0; k < DYNA_NUM_I_LOCKS; ++k) {
3527 kmp_indirect_lock_t *l = __kmp_indirect_lock_pool[k];
3528 while (l != NULL) {
3529 kmp_indirect_lock_t *ll = l;
3530 l = (kmp_indirect_lock_t *)l->lock->pool.next;
3531 if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3532 __kmp_indirect_lock_table[ll->lock->pool.index] = NULL;
3533 }
3534 __kmp_free(ll->lock);
3535 __kmp_free(ll);
3536 }
3537 }
3538 // Clean up the remaining undestroyed locks.
3539 for (i = 0; i < __kmp_indirect_lock_table_next; i++) {
3540 kmp_indirect_lock_t *l = __kmp_indirect_lock_table[i];
3541 if (l != NULL) {
3542 // Locks not destroyed explicitly need to be destroyed here.
3543 DYNA_I_LOCK_FUNC(l, destroy)(l->lock);
3544 __kmp_free(l->lock);
3545 __kmp_free(l);
3546 }
3547 }
3548 // Free the table
3549 __kmp_free(__kmp_indirect_lock_table);
3550
3551 __kmp_init_user_locks = FALSE;
3552}
3553
3554enum kmp_lock_kind __kmp_user_lock_kind = lk_default;
3555int __kmp_num_locks_in_block = 1; // FIXME - tune this value
3556
3557#else // KMP_USE_DYNAMIC_LOCK
3558
Jim Cownie5e8470a2013-09-27 10:38:44 +00003559/* ------------------------------------------------------------------------ */
3560/* user locks
3561 *
3562 * They are implemented as a table of function pointers which are set to the
3563 * lock functions of the appropriate kind, once that has been determined.
3564 */
3565
3566enum kmp_lock_kind __kmp_user_lock_kind = lk_default;
3567
3568size_t __kmp_base_user_lock_size = 0;
3569size_t __kmp_user_lock_size = 0;
3570
3571kmp_int32 ( *__kmp_get_user_lock_owner_ )( kmp_user_lock_p lck ) = NULL;
3572void ( *__kmp_acquire_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
3573
3574int ( *__kmp_test_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00003575int ( *__kmp_release_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003576void ( *__kmp_init_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL;
3577void ( *__kmp_destroy_user_lock_ )( kmp_user_lock_p lck ) = NULL;
3578void ( *__kmp_destroy_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL;
3579void ( *__kmp_acquire_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
3580
3581int ( *__kmp_test_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
Andrey Churbanov8d09fac2015-04-29 15:52:19 +00003582int ( *__kmp_release_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00003583void ( *__kmp_init_nested_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL;
3584void ( *__kmp_destroy_nested_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL;
3585
3586int ( *__kmp_is_user_lock_initialized_ )( kmp_user_lock_p lck ) = NULL;
3587const ident_t * ( *__kmp_get_user_lock_location_ )( kmp_user_lock_p lck ) = NULL;
3588void ( *__kmp_set_user_lock_location_ )( kmp_user_lock_p lck, const ident_t *loc ) = NULL;
3589kmp_lock_flags_t ( *__kmp_get_user_lock_flags_ )( kmp_user_lock_p lck ) = NULL;
3590void ( *__kmp_set_user_lock_flags_ )( kmp_user_lock_p lck, kmp_lock_flags_t flags ) = NULL;
3591
3592void __kmp_set_user_lock_vptrs( kmp_lock_kind_t user_lock_kind )
3593{
3594 switch ( user_lock_kind ) {
3595 case lk_default:
3596 default:
3597 KMP_ASSERT( 0 );
3598
3599 case lk_tas: {
3600 __kmp_base_user_lock_size = sizeof( kmp_base_tas_lock_t );
3601 __kmp_user_lock_size = sizeof( kmp_tas_lock_t );
3602
3603 __kmp_get_user_lock_owner_ =
3604 ( kmp_int32 ( * )( kmp_user_lock_p ) )
3605 ( &__kmp_get_tas_lock_owner );
3606
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003607 if ( __kmp_env_consistency_check ) {
3608 KMP_BIND_USER_LOCK_WITH_CHECKS(tas);
3609 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(tas);
3610 }
3611 else {
3612 KMP_BIND_USER_LOCK(tas);
3613 KMP_BIND_NESTED_USER_LOCK(tas);
3614 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003615
3616 __kmp_destroy_user_lock_ =
3617 ( void ( * )( kmp_user_lock_p ) )
3618 ( &__kmp_destroy_tas_lock );
3619
Jim Cownie5e8470a2013-09-27 10:38:44 +00003620 __kmp_is_user_lock_initialized_ =
3621 ( int ( * )( kmp_user_lock_p ) ) NULL;
3622
3623 __kmp_get_user_lock_location_ =
3624 ( const ident_t * ( * )( kmp_user_lock_p ) ) NULL;
3625
3626 __kmp_set_user_lock_location_ =
3627 ( void ( * )( kmp_user_lock_p, const ident_t * ) ) NULL;
3628
3629 __kmp_get_user_lock_flags_ =
3630 ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) NULL;
3631
3632 __kmp_set_user_lock_flags_ =
3633 ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) NULL;
3634 }
3635 break;
3636
Jim Cownie181b4bb2013-12-23 17:28:57 +00003637#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
Jim Cownie5e8470a2013-09-27 10:38:44 +00003638
3639 case lk_futex: {
3640 __kmp_base_user_lock_size = sizeof( kmp_base_futex_lock_t );
3641 __kmp_user_lock_size = sizeof( kmp_futex_lock_t );
3642
3643 __kmp_get_user_lock_owner_ =
3644 ( kmp_int32 ( * )( kmp_user_lock_p ) )
3645 ( &__kmp_get_futex_lock_owner );
3646
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003647 if ( __kmp_env_consistency_check ) {
3648 KMP_BIND_USER_LOCK_WITH_CHECKS(futex);
3649 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(futex);
3650 }
3651 else {
3652 KMP_BIND_USER_LOCK(futex);
3653 KMP_BIND_NESTED_USER_LOCK(futex);
3654 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003655
3656 __kmp_destroy_user_lock_ =
3657 ( void ( * )( kmp_user_lock_p ) )
3658 ( &__kmp_destroy_futex_lock );
3659
Jim Cownie5e8470a2013-09-27 10:38:44 +00003660 __kmp_is_user_lock_initialized_ =
3661 ( int ( * )( kmp_user_lock_p ) ) NULL;
3662
3663 __kmp_get_user_lock_location_ =
3664 ( const ident_t * ( * )( kmp_user_lock_p ) ) NULL;
3665
3666 __kmp_set_user_lock_location_ =
3667 ( void ( * )( kmp_user_lock_p, const ident_t * ) ) NULL;
3668
3669 __kmp_get_user_lock_flags_ =
3670 ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) NULL;
3671
3672 __kmp_set_user_lock_flags_ =
3673 ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) NULL;
3674 }
3675 break;
3676
Jim Cownie181b4bb2013-12-23 17:28:57 +00003677#endif // KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
Jim Cownie5e8470a2013-09-27 10:38:44 +00003678
3679 case lk_ticket: {
3680 __kmp_base_user_lock_size = sizeof( kmp_base_ticket_lock_t );
3681 __kmp_user_lock_size = sizeof( kmp_ticket_lock_t );
3682
3683 __kmp_get_user_lock_owner_ =
3684 ( kmp_int32 ( * )( kmp_user_lock_p ) )
3685 ( &__kmp_get_ticket_lock_owner );
3686
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003687 if ( __kmp_env_consistency_check ) {
3688 KMP_BIND_USER_LOCK_WITH_CHECKS(ticket);
3689 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(ticket);
3690 }
3691 else {
3692 KMP_BIND_USER_LOCK(ticket);
3693 KMP_BIND_NESTED_USER_LOCK(ticket);
3694 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003695
3696 __kmp_destroy_user_lock_ =
3697 ( void ( * )( kmp_user_lock_p ) )
3698 ( &__kmp_destroy_ticket_lock );
3699
Jim Cownie5e8470a2013-09-27 10:38:44 +00003700 __kmp_is_user_lock_initialized_ =
3701 ( int ( * )( kmp_user_lock_p ) )
3702 ( &__kmp_is_ticket_lock_initialized );
3703
3704 __kmp_get_user_lock_location_ =
3705 ( const ident_t * ( * )( kmp_user_lock_p ) )
3706 ( &__kmp_get_ticket_lock_location );
3707
3708 __kmp_set_user_lock_location_ =
3709 ( void ( * )( kmp_user_lock_p, const ident_t * ) )
3710 ( &__kmp_set_ticket_lock_location );
3711
3712 __kmp_get_user_lock_flags_ =
3713 ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) )
3714 ( &__kmp_get_ticket_lock_flags );
3715
3716 __kmp_set_user_lock_flags_ =
3717 ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) )
3718 ( &__kmp_set_ticket_lock_flags );
3719 }
3720 break;
3721
3722 case lk_queuing: {
3723 __kmp_base_user_lock_size = sizeof( kmp_base_queuing_lock_t );
3724 __kmp_user_lock_size = sizeof( kmp_queuing_lock_t );
3725
3726 __kmp_get_user_lock_owner_ =
3727 ( kmp_int32 ( * )( kmp_user_lock_p ) )
3728 ( &__kmp_get_queuing_lock_owner );
3729
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003730 if ( __kmp_env_consistency_check ) {
3731 KMP_BIND_USER_LOCK_WITH_CHECKS(queuing);
3732 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(queuing);
3733 }
3734 else {
3735 KMP_BIND_USER_LOCK(queuing);
3736 KMP_BIND_NESTED_USER_LOCK(queuing);
3737 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003738
3739 __kmp_destroy_user_lock_ =
3740 ( void ( * )( kmp_user_lock_p ) )
3741 ( &__kmp_destroy_queuing_lock );
3742
Jim Cownie5e8470a2013-09-27 10:38:44 +00003743 __kmp_is_user_lock_initialized_ =
3744 ( int ( * )( kmp_user_lock_p ) )
3745 ( &__kmp_is_queuing_lock_initialized );
3746
3747 __kmp_get_user_lock_location_ =
3748 ( const ident_t * ( * )( kmp_user_lock_p ) )
3749 ( &__kmp_get_queuing_lock_location );
3750
3751 __kmp_set_user_lock_location_ =
3752 ( void ( * )( kmp_user_lock_p, const ident_t * ) )
3753 ( &__kmp_set_queuing_lock_location );
3754
3755 __kmp_get_user_lock_flags_ =
3756 ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) )
3757 ( &__kmp_get_queuing_lock_flags );
3758
3759 __kmp_set_user_lock_flags_ =
3760 ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) )
3761 ( &__kmp_set_queuing_lock_flags );
3762 }
3763 break;
3764
3765#if KMP_USE_ADAPTIVE_LOCKS
3766 case lk_adaptive: {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003767 __kmp_base_user_lock_size = sizeof( kmp_base_adaptive_lock_t );
3768 __kmp_user_lock_size = sizeof( kmp_adaptive_lock_t );
Jim Cownie5e8470a2013-09-27 10:38:44 +00003769
3770 __kmp_get_user_lock_owner_ =
3771 ( kmp_int32 ( * )( kmp_user_lock_p ) )
3772 ( &__kmp_get_queuing_lock_owner );
3773
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003774 if ( __kmp_env_consistency_check ) {
3775 KMP_BIND_USER_LOCK_WITH_CHECKS(adaptive);
3776 }
3777 else {
3778 KMP_BIND_USER_LOCK(adaptive);
3779 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003780
3781 __kmp_destroy_user_lock_ =
3782 ( void ( * )( kmp_user_lock_p ) )
3783 ( &__kmp_destroy_adaptive_lock );
3784
3785 __kmp_is_user_lock_initialized_ =
3786 ( int ( * )( kmp_user_lock_p ) )
3787 ( &__kmp_is_queuing_lock_initialized );
3788
3789 __kmp_get_user_lock_location_ =
3790 ( const ident_t * ( * )( kmp_user_lock_p ) )
3791 ( &__kmp_get_queuing_lock_location );
3792
3793 __kmp_set_user_lock_location_ =
3794 ( void ( * )( kmp_user_lock_p, const ident_t * ) )
3795 ( &__kmp_set_queuing_lock_location );
3796
3797 __kmp_get_user_lock_flags_ =
3798 ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) )
3799 ( &__kmp_get_queuing_lock_flags );
3800
3801 __kmp_set_user_lock_flags_ =
3802 ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) )
3803 ( &__kmp_set_queuing_lock_flags );
3804
3805 }
3806 break;
3807#endif // KMP_USE_ADAPTIVE_LOCKS
3808
3809 case lk_drdpa: {
3810 __kmp_base_user_lock_size = sizeof( kmp_base_drdpa_lock_t );
3811 __kmp_user_lock_size = sizeof( kmp_drdpa_lock_t );
3812
3813 __kmp_get_user_lock_owner_ =
3814 ( kmp_int32 ( * )( kmp_user_lock_p ) )
3815 ( &__kmp_get_drdpa_lock_owner );
3816
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003817 if ( __kmp_env_consistency_check ) {
3818 KMP_BIND_USER_LOCK_WITH_CHECKS(drdpa);
3819 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(drdpa);
3820 }
3821 else {
3822 KMP_BIND_USER_LOCK(drdpa);
3823 KMP_BIND_NESTED_USER_LOCK(drdpa);
3824 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003825
3826 __kmp_destroy_user_lock_ =
3827 ( void ( * )( kmp_user_lock_p ) )
3828 ( &__kmp_destroy_drdpa_lock );
3829
Jim Cownie5e8470a2013-09-27 10:38:44 +00003830 __kmp_is_user_lock_initialized_ =
3831 ( int ( * )( kmp_user_lock_p ) )
3832 ( &__kmp_is_drdpa_lock_initialized );
3833
3834 __kmp_get_user_lock_location_ =
3835 ( const ident_t * ( * )( kmp_user_lock_p ) )
3836 ( &__kmp_get_drdpa_lock_location );
3837
3838 __kmp_set_user_lock_location_ =
3839 ( void ( * )( kmp_user_lock_p, const ident_t * ) )
3840 ( &__kmp_set_drdpa_lock_location );
3841
3842 __kmp_get_user_lock_flags_ =
3843 ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) )
3844 ( &__kmp_get_drdpa_lock_flags );
3845
3846 __kmp_set_user_lock_flags_ =
3847 ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) )
3848 ( &__kmp_set_drdpa_lock_flags );
3849 }
3850 break;
3851 }
3852}
3853
3854
3855// ----------------------------------------------------------------------------
3856// User lock table & lock allocation
3857
3858kmp_lock_table_t __kmp_user_lock_table = { 1, 0, NULL };
3859kmp_user_lock_p __kmp_lock_pool = NULL;
3860
3861// Lock block-allocation support.
3862kmp_block_of_locks* __kmp_lock_blocks = NULL;
3863int __kmp_num_locks_in_block = 1; // FIXME - tune this value
3864
3865static kmp_lock_index_t
3866__kmp_lock_table_insert( kmp_user_lock_p lck )
3867{
3868 // Assume that kmp_global_lock is held upon entry/exit.
3869 kmp_lock_index_t index;
3870 if ( __kmp_user_lock_table.used >= __kmp_user_lock_table.allocated ) {
3871 kmp_lock_index_t size;
3872 kmp_user_lock_p *table;
3873 kmp_lock_index_t i;
3874 // Reallocate lock table.
3875 if ( __kmp_user_lock_table.allocated == 0 ) {
3876 size = 1024;
3877 }
3878 else {
3879 size = __kmp_user_lock_table.allocated * 2;
3880 }
3881 table = (kmp_user_lock_p *)__kmp_allocate( sizeof( kmp_user_lock_p ) * size );
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00003882 KMP_MEMCPY( table + 1, __kmp_user_lock_table.table + 1, sizeof( kmp_user_lock_p ) * ( __kmp_user_lock_table.used - 1 ) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00003883 table[ 0 ] = (kmp_user_lock_p)__kmp_user_lock_table.table;
Jonathan Peyton1406f012015-05-22 22:35:51 +00003884 // We cannot free the previous table now, since it may be in use by other
Jim Cownie5e8470a2013-09-27 10:38:44 +00003885 // threads. So save the pointer to the previous table in in the first element of the
3886 // new table. All the tables will be organized into a list, and could be freed when
3887 // library shutting down.
3888 __kmp_user_lock_table.table = table;
3889 __kmp_user_lock_table.allocated = size;
3890 }
3891 KMP_DEBUG_ASSERT( __kmp_user_lock_table.used < __kmp_user_lock_table.allocated );
3892 index = __kmp_user_lock_table.used;
3893 __kmp_user_lock_table.table[ index ] = lck;
3894 ++ __kmp_user_lock_table.used;
3895 return index;
3896}
3897
3898static kmp_user_lock_p
3899__kmp_lock_block_allocate()
3900{
3901 // Assume that kmp_global_lock is held upon entry/exit.
3902 static int last_index = 0;
3903 if ( ( last_index >= __kmp_num_locks_in_block )
3904 || ( __kmp_lock_blocks == NULL ) ) {
3905 // Restart the index.
3906 last_index = 0;
3907 // Need to allocate a new block.
3908 KMP_DEBUG_ASSERT( __kmp_user_lock_size > 0 );
3909 size_t space_for_locks = __kmp_user_lock_size * __kmp_num_locks_in_block;
3910 char* buffer = (char*)__kmp_allocate( space_for_locks + sizeof( kmp_block_of_locks ) );
3911 // Set up the new block.
3912 kmp_block_of_locks *new_block = (kmp_block_of_locks *)(& buffer[space_for_locks]);
3913 new_block->next_block = __kmp_lock_blocks;
3914 new_block->locks = (void *)buffer;
3915 // Publish the new block.
3916 KMP_MB();
3917 __kmp_lock_blocks = new_block;
3918 }
3919 kmp_user_lock_p ret = (kmp_user_lock_p)(& ( ( (char *)( __kmp_lock_blocks->locks ) )
3920 [ last_index * __kmp_user_lock_size ] ) );
3921 last_index++;
3922 return ret;
3923}
3924
3925//
3926// Get memory for a lock. It may be freshly allocated memory or reused memory
3927// from lock pool.
3928//
3929kmp_user_lock_p
3930__kmp_user_lock_allocate( void **user_lock, kmp_int32 gtid,
3931 kmp_lock_flags_t flags )
3932{
3933 kmp_user_lock_p lck;
3934 kmp_lock_index_t index;
3935 KMP_DEBUG_ASSERT( user_lock );
3936
3937 __kmp_acquire_lock( &__kmp_global_lock, gtid );
3938
3939 if ( __kmp_lock_pool == NULL ) {
3940 // Lock pool is empty. Allocate new memory.
3941 if ( __kmp_num_locks_in_block <= 1 ) { // Tune this cutoff point.
3942 lck = (kmp_user_lock_p) __kmp_allocate( __kmp_user_lock_size );
3943 }
3944 else {
3945 lck = __kmp_lock_block_allocate();
3946 }
3947
3948 // Insert lock in the table so that it can be freed in __kmp_cleanup,
3949 // and debugger has info on all allocated locks.
3950 index = __kmp_lock_table_insert( lck );
3951 }
3952 else {
3953 // Pick up lock from pool.
3954 lck = __kmp_lock_pool;
3955 index = __kmp_lock_pool->pool.index;
3956 __kmp_lock_pool = __kmp_lock_pool->pool.next;
3957 }
3958
3959 //
3960 // We could potentially differentiate between nested and regular locks
3961 // here, and do the lock table lookup for regular locks only.
3962 //
3963 if ( OMP_LOCK_T_SIZE < sizeof(void *) ) {
3964 * ( (kmp_lock_index_t *) user_lock ) = index;
3965 }
3966 else {
3967 * ( (kmp_user_lock_p *) user_lock ) = lck;
3968 }
3969
3970 // mark the lock if it is critical section lock.
3971 __kmp_set_user_lock_flags( lck, flags );
3972
3973 __kmp_release_lock( & __kmp_global_lock, gtid ); // AC: TODO: move this line upper
3974
3975 return lck;
3976}
3977
3978// Put lock's memory to pool for reusing.
3979void
3980__kmp_user_lock_free( void **user_lock, kmp_int32 gtid, kmp_user_lock_p lck )
3981{
3982 kmp_lock_pool_t * lock_pool;
3983
3984 KMP_DEBUG_ASSERT( user_lock != NULL );
3985 KMP_DEBUG_ASSERT( lck != NULL );
3986
3987 __kmp_acquire_lock( & __kmp_global_lock, gtid );
3988
3989 lck->pool.next = __kmp_lock_pool;
3990 __kmp_lock_pool = lck;
3991 if ( OMP_LOCK_T_SIZE < sizeof(void *) ) {
3992 kmp_lock_index_t index = * ( (kmp_lock_index_t *) user_lock );
3993 KMP_DEBUG_ASSERT( 0 < index && index <= __kmp_user_lock_table.used );
3994 lck->pool.index = index;
3995 }
3996
3997 __kmp_release_lock( & __kmp_global_lock, gtid );
3998}
3999
4000kmp_user_lock_p
4001__kmp_lookup_user_lock( void **user_lock, char const *func )
4002{
4003 kmp_user_lock_p lck = NULL;
4004
4005 if ( __kmp_env_consistency_check ) {
4006 if ( user_lock == NULL ) {
4007 KMP_FATAL( LockIsUninitialized, func );
4008 }
4009 }
4010
4011 if ( OMP_LOCK_T_SIZE < sizeof(void *) ) {
4012 kmp_lock_index_t index = *( (kmp_lock_index_t *)user_lock );
4013 if ( __kmp_env_consistency_check ) {
4014 if ( ! ( 0 < index && index < __kmp_user_lock_table.used ) ) {
4015 KMP_FATAL( LockIsUninitialized, func );
4016 }
4017 }
4018 KMP_DEBUG_ASSERT( 0 < index && index < __kmp_user_lock_table.used );
4019 KMP_DEBUG_ASSERT( __kmp_user_lock_size > 0 );
4020 lck = __kmp_user_lock_table.table[index];
4021 }
4022 else {
4023 lck = *( (kmp_user_lock_p *)user_lock );
4024 }
4025
4026 if ( __kmp_env_consistency_check ) {
4027 if ( lck == NULL ) {
4028 KMP_FATAL( LockIsUninitialized, func );
4029 }
4030 }
4031
4032 return lck;
4033}
4034
4035void
4036__kmp_cleanup_user_locks( void )
4037{
4038 //
4039 // Reset lock pool. Do not worry about lock in the pool -- we will free
4040 // them when iterating through lock table (it includes all the locks,
4041 // dead or alive).
4042 //
4043 __kmp_lock_pool = NULL;
4044
4045#define IS_CRITICAL(lck) \
4046 ( ( __kmp_get_user_lock_flags_ != NULL ) && \
4047 ( ( *__kmp_get_user_lock_flags_ )( lck ) & kmp_lf_critical_section ) )
4048
4049 //
4050 // Loop through lock table, free all locks.
4051 //
4052 // Do not free item [0], it is reserved for lock tables list.
4053 //
4054 // FIXME - we are iterating through a list of (pointers to) objects of
4055 // type union kmp_user_lock, but we have no way of knowing whether the
4056 // base type is currently "pool" or whatever the global user lock type
4057 // is.
4058 //
4059 // We are relying on the fact that for all of the user lock types
4060 // (except "tas"), the first field in the lock struct is the "initialized"
4061 // field, which is set to the address of the lock object itself when
4062 // the lock is initialized. When the union is of type "pool", the
4063 // first field is a pointer to the next object in the free list, which
4064 // will not be the same address as the object itself.
4065 //
4066 // This means that the check ( *__kmp_is_user_lock_initialized_ )( lck )
4067 // will fail for "pool" objects on the free list. This must happen as
4068 // the "location" field of real user locks overlaps the "index" field
4069 // of "pool" objects.
4070 //
4071 // It would be better to run through the free list, and remove all "pool"
4072 // objects from the lock table before executing this loop. However,
4073 // "pool" objects do not always have their index field set (only on
4074 // lin_32e), and I don't want to search the lock table for the address
4075 // of every "pool" object on the free list.
4076 //
4077 while ( __kmp_user_lock_table.used > 1 ) {
4078 const ident *loc;
4079
4080 //
4081 // reduce __kmp_user_lock_table.used before freeing the lock,
4082 // so that state of locks is consistent
4083 //
4084 kmp_user_lock_p lck = __kmp_user_lock_table.table[
4085 --__kmp_user_lock_table.used ];
4086
4087 if ( ( __kmp_is_user_lock_initialized_ != NULL ) &&
4088 ( *__kmp_is_user_lock_initialized_ )( lck ) ) {
4089 //
4090 // Issue a warning if: KMP_CONSISTENCY_CHECK AND lock is
4091 // initialized AND it is NOT a critical section (user is not
4092 // responsible for destroying criticals) AND we know source
4093 // location to report.
4094 //
4095 if ( __kmp_env_consistency_check && ( ! IS_CRITICAL( lck ) ) &&
4096 ( ( loc = __kmp_get_user_lock_location( lck ) ) != NULL ) &&
4097 ( loc->psource != NULL ) ) {
4098 kmp_str_loc_t str_loc = __kmp_str_loc_init( loc->psource, 0 );
Andrey Churbanove8595de2015-02-20 18:19:41 +00004099 KMP_WARNING( CnsLockNotDestroyed, str_loc.file, str_loc.line );
Jim Cownie5e8470a2013-09-27 10:38:44 +00004100 __kmp_str_loc_free( &str_loc);
4101 }
4102
4103#ifdef KMP_DEBUG
4104 if ( IS_CRITICAL( lck ) ) {
4105 KA_TRACE( 20, ("__kmp_cleanup_user_locks: free critical section lock %p (%p)\n", lck, *(void**)lck ) );
4106 }
4107 else {
4108 KA_TRACE( 20, ("__kmp_cleanup_user_locks: free lock %p (%p)\n", lck, *(void**)lck ) );
4109 }
4110#endif // KMP_DEBUG
4111
4112 //
4113 // Cleanup internal lock dynamic resources
4114 // (for drdpa locks particularly).
4115 //
4116 __kmp_destroy_user_lock( lck );
4117 }
4118
4119 //
4120 // Free the lock if block allocation of locks is not used.
4121 //
4122 if ( __kmp_lock_blocks == NULL ) {
4123 __kmp_free( lck );
4124 }
4125 }
4126
4127#undef IS_CRITICAL
4128
4129 //
4130 // delete lock table(s).
4131 //
4132 kmp_user_lock_p *table_ptr = __kmp_user_lock_table.table;
4133 __kmp_user_lock_table.table = NULL;
4134 __kmp_user_lock_table.allocated = 0;
4135
4136 while ( table_ptr != NULL ) {
4137 //
4138 // In the first element we saved the pointer to the previous
4139 // (smaller) lock table.
4140 //
4141 kmp_user_lock_p *next = (kmp_user_lock_p *)( table_ptr[ 0 ] );
4142 __kmp_free( table_ptr );
4143 table_ptr = next;
4144 }
4145
4146 //
4147 // Free buffers allocated for blocks of locks.
4148 //
4149 kmp_block_of_locks_t *block_ptr = __kmp_lock_blocks;
4150 __kmp_lock_blocks = NULL;
4151
4152 while ( block_ptr != NULL ) {
4153 kmp_block_of_locks_t *next = block_ptr->next_block;
4154 __kmp_free( block_ptr->locks );
4155 //
4156 // *block_ptr itself was allocated at the end of the locks vector.
4157 //
4158 block_ptr = next;
4159 }
4160
4161 TCW_4(__kmp_init_user_locks, FALSE);
4162}
4163
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00004164#endif // KMP_USE_DYNAMIC_LOCK