blob: 203f12b4fbce7d5f1a939108bf9f44bdf7ccc19c [file] [log] [blame]
Jim Cownie5e8470a2013-09-27 10:38:44 +00001/*
2 * kmp_lock.cpp -- lock-related functions
Jim Cownie5e8470a2013-09-27 10:38:44 +00003 */
4
5
6//===----------------------------------------------------------------------===//
7//
8// The LLVM Compiler Infrastructure
9//
10// This file is dual licensed under the MIT and the University of Illinois Open
11// Source Licenses. See LICENSE.txt for details.
12//
13//===----------------------------------------------------------------------===//
14
15
16#include <stddef.h>
17
18#include "kmp.h"
19#include "kmp_itt.h"
20#include "kmp_i18n.h"
21#include "kmp_lock.h"
22#include "kmp_io.h"
23
Andrey Churbanovcbda8682015-01-13 14:43:35 +000024#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +000025# include <unistd.h>
26# include <sys/syscall.h>
27// We should really include <futex.h>, but that causes compatibility problems on different
28// Linux* OS distributions that either require that you include (or break when you try to include)
29// <pci/types.h>.
30// Since all we need is the two macros below (which are part of the kernel ABI, so can't change)
31// we just define the constants here and don't include <futex.h>
32# ifndef FUTEX_WAIT
33# define FUTEX_WAIT 0
34# endif
35# ifndef FUTEX_WAKE
36# define FUTEX_WAKE 1
37# endif
38#endif
39
Jim Cownie5e8470a2013-09-27 10:38:44 +000040/* Implement spin locks for internal library use. */
41/* The algorithm implemented is Lamport's bakery lock [1974]. */
42
43void
44__kmp_validate_locks( void )
45{
46 int i;
47 kmp_uint32 x, y;
48
49 /* Check to make sure unsigned arithmetic does wraps properly */
50 x = ~((kmp_uint32) 0) - 2;
51 y = x - 2;
52
53 for (i = 0; i < 8; ++i, ++x, ++y) {
54 kmp_uint32 z = (x - y);
55 KMP_ASSERT( z == 2 );
56 }
57
58 KMP_ASSERT( offsetof( kmp_base_queuing_lock, tail_id ) % 8 == 0 );
59}
60
61
62/* ------------------------------------------------------------------------ */
63/* test and set locks */
64
65//
66// For the non-nested locks, we can only assume that the first 4 bytes were
67// allocated, since gcc only allocates 4 bytes for omp_lock_t, and the Intel
68// compiler only allocates a 4 byte pointer on IA-32 architecture. On
69// Windows* OS on Intel(R) 64, we can assume that all 8 bytes were allocated.
70//
71// gcc reserves >= 8 bytes for nested locks, so we can assume that the
72// entire 8 bytes were allocated for nested locks on all 64-bit platforms.
73//
74
75static kmp_int32
76__kmp_get_tas_lock_owner( kmp_tas_lock_t *lck )
77{
78 return TCR_4( lck->lk.poll ) - 1;
79}
80
81static inline bool
82__kmp_is_tas_lock_nestable( kmp_tas_lock_t *lck )
83{
84 return lck->lk.depth_locked != -1;
85}
86
87__forceinline static void
88__kmp_acquire_tas_lock_timed_template( kmp_tas_lock_t *lck, kmp_int32 gtid )
89{
90 KMP_MB();
91
92#ifdef USE_LOCK_PROFILE
93 kmp_uint32 curr = TCR_4( lck->lk.poll );
94 if ( ( curr != 0 ) && ( curr != gtid + 1 ) )
95 __kmp_printf( "LOCK CONTENTION: %p\n", lck );
96 /* else __kmp_printf( "." );*/
97#endif /* USE_LOCK_PROFILE */
98
99 if ( ( lck->lk.poll == 0 )
100 && KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), 0, gtid + 1 ) ) {
101 KMP_FSYNC_ACQUIRED(lck);
102 return;
103 }
104
105 kmp_uint32 spins;
106 KMP_FSYNC_PREPARE( lck );
107 KMP_INIT_YIELD( spins );
108 if ( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc :
109 __kmp_xproc ) ) {
110 KMP_YIELD( TRUE );
111 }
112 else {
113 KMP_YIELD_SPIN( spins );
114 }
115
116 while ( ( lck->lk.poll != 0 ) ||
117 ( ! KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), 0, gtid + 1 ) ) ) {
118 //
119 // FIXME - use exponential backoff here
120 //
121 if ( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc :
122 __kmp_xproc ) ) {
123 KMP_YIELD( TRUE );
124 }
125 else {
126 KMP_YIELD_SPIN( spins );
127 }
128 }
129 KMP_FSYNC_ACQUIRED( lck );
130}
131
132void
133__kmp_acquire_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
134{
135 __kmp_acquire_tas_lock_timed_template( lck, gtid );
136}
137
138static void
139__kmp_acquire_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
140{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000141 char const * const func = "omp_set_lock";
142 if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE )
143 && __kmp_is_tas_lock_nestable( lck ) ) {
144 KMP_FATAL( LockNestableUsedAsSimple, func );
145 }
146 if ( ( gtid >= 0 ) && ( __kmp_get_tas_lock_owner( lck ) == gtid ) ) {
147 KMP_FATAL( LockIsAlreadyOwned, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000148 }
149 __kmp_acquire_tas_lock( lck, gtid );
150}
151
152int
153__kmp_test_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
154{
155 if ( ( lck->lk.poll == 0 )
156 && KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), 0, gtid + 1 ) ) {
157 KMP_FSYNC_ACQUIRED( lck );
158 return TRUE;
159 }
160 return FALSE;
161}
162
163static int
164__kmp_test_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
165{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000166 char const * const func = "omp_test_lock";
167 if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE )
168 && __kmp_is_tas_lock_nestable( lck ) ) {
169 KMP_FATAL( LockNestableUsedAsSimple, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000170 }
171 return __kmp_test_tas_lock( lck, gtid );
172}
173
174void
175__kmp_release_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
176{
177 KMP_MB(); /* Flush all pending memory write invalidates. */
178
179 KMP_FSYNC_RELEASING(lck);
180 KMP_ST_REL32( &(lck->lk.poll), 0 );
181
182 KMP_MB(); /* Flush all pending memory write invalidates. */
183
184 KMP_YIELD( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc :
185 __kmp_xproc ) );
186}
187
188static void
189__kmp_release_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
190{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000191 char const * const func = "omp_unset_lock";
192 KMP_MB(); /* in case another processor initialized lock */
193 if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE )
194 && __kmp_is_tas_lock_nestable( lck ) ) {
195 KMP_FATAL( LockNestableUsedAsSimple, func );
196 }
197 if ( __kmp_get_tas_lock_owner( lck ) == -1 ) {
198 KMP_FATAL( LockUnsettingFree, func );
199 }
200 if ( ( gtid >= 0 ) && ( __kmp_get_tas_lock_owner( lck ) >= 0 )
201 && ( __kmp_get_tas_lock_owner( lck ) != gtid ) ) {
202 KMP_FATAL( LockUnsettingSetByAnother, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000203 }
204 __kmp_release_tas_lock( lck, gtid );
205}
206
207void
208__kmp_init_tas_lock( kmp_tas_lock_t * lck )
209{
210 TCW_4( lck->lk.poll, 0 );
211}
212
213static void
214__kmp_init_tas_lock_with_checks( kmp_tas_lock_t * lck )
215{
216 __kmp_init_tas_lock( lck );
217}
218
219void
220__kmp_destroy_tas_lock( kmp_tas_lock_t *lck )
221{
222 lck->lk.poll = 0;
223}
224
225static void
226__kmp_destroy_tas_lock_with_checks( kmp_tas_lock_t *lck )
227{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000228 char const * const func = "omp_destroy_lock";
229 if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE )
230 && __kmp_is_tas_lock_nestable( lck ) ) {
231 KMP_FATAL( LockNestableUsedAsSimple, func );
232 }
233 if ( __kmp_get_tas_lock_owner( lck ) != -1 ) {
234 KMP_FATAL( LockStillOwned, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000235 }
236 __kmp_destroy_tas_lock( lck );
237}
238
239
240//
241// nested test and set locks
242//
243
244void
245__kmp_acquire_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
246{
247 KMP_DEBUG_ASSERT( gtid >= 0 );
248
249 if ( __kmp_get_tas_lock_owner( lck ) == gtid ) {
250 lck->lk.depth_locked += 1;
251 }
252 else {
253 __kmp_acquire_tas_lock_timed_template( lck, gtid );
254 lck->lk.depth_locked = 1;
255 }
256}
257
258static void
259__kmp_acquire_nested_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
260{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000261 char const * const func = "omp_set_nest_lock";
262 if ( ! __kmp_is_tas_lock_nestable( lck ) ) {
263 KMP_FATAL( LockSimpleUsedAsNestable, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000264 }
265 __kmp_acquire_nested_tas_lock( lck, gtid );
266}
267
268int
269__kmp_test_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
270{
271 int retval;
272
273 KMP_DEBUG_ASSERT( gtid >= 0 );
274
275 if ( __kmp_get_tas_lock_owner( lck ) == gtid ) {
276 retval = ++lck->lk.depth_locked;
277 }
278 else if ( !__kmp_test_tas_lock( lck, gtid ) ) {
279 retval = 0;
280 }
281 else {
282 KMP_MB();
283 retval = lck->lk.depth_locked = 1;
284 }
285 return retval;
286}
287
288static int
289__kmp_test_nested_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
290{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000291 char const * const func = "omp_test_nest_lock";
292 if ( ! __kmp_is_tas_lock_nestable( lck ) ) {
293 KMP_FATAL( LockSimpleUsedAsNestable, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000294 }
295 return __kmp_test_nested_tas_lock( lck, gtid );
296}
297
298void
299__kmp_release_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
300{
301 KMP_DEBUG_ASSERT( gtid >= 0 );
302
303 KMP_MB();
304 if ( --(lck->lk.depth_locked) == 0 ) {
305 __kmp_release_tas_lock( lck, gtid );
306 }
307}
308
309static void
310__kmp_release_nested_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
311{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000312 char const * const func = "omp_unset_nest_lock";
313 KMP_MB(); /* in case another processor initialized lock */
314 if ( ! __kmp_is_tas_lock_nestable( lck ) ) {
315 KMP_FATAL( LockSimpleUsedAsNestable, func );
316 }
317 if ( __kmp_get_tas_lock_owner( lck ) == -1 ) {
318 KMP_FATAL( LockUnsettingFree, func );
319 }
320 if ( __kmp_get_tas_lock_owner( lck ) != gtid ) {
321 KMP_FATAL( LockUnsettingSetByAnother, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000322 }
323 __kmp_release_nested_tas_lock( lck, gtid );
324}
325
326void
327__kmp_init_nested_tas_lock( kmp_tas_lock_t * lck )
328{
329 __kmp_init_tas_lock( lck );
330 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
331}
332
333static void
334__kmp_init_nested_tas_lock_with_checks( kmp_tas_lock_t * lck )
335{
336 __kmp_init_nested_tas_lock( lck );
337}
338
339void
340__kmp_destroy_nested_tas_lock( kmp_tas_lock_t *lck )
341{
342 __kmp_destroy_tas_lock( lck );
343 lck->lk.depth_locked = 0;
344}
345
346static void
347__kmp_destroy_nested_tas_lock_with_checks( kmp_tas_lock_t *lck )
348{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000349 char const * const func = "omp_destroy_nest_lock";
350 if ( ! __kmp_is_tas_lock_nestable( lck ) ) {
351 KMP_FATAL( LockSimpleUsedAsNestable, func );
352 }
353 if ( __kmp_get_tas_lock_owner( lck ) != -1 ) {
354 KMP_FATAL( LockStillOwned, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000355 }
356 __kmp_destroy_nested_tas_lock( lck );
357}
358
359
Andrey Churbanovcbda8682015-01-13 14:43:35 +0000360#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +0000361
362/* ------------------------------------------------------------------------ */
363/* futex locks */
364
365// futex locks are really just test and set locks, with a different method
366// of handling contention. They take the same amount of space as test and
367// set locks, and are allocated the same way (i.e. use the area allocated by
368// the compiler for non-nested locks / allocate nested locks on the heap).
369
370static kmp_int32
371__kmp_get_futex_lock_owner( kmp_futex_lock_t *lck )
372{
373 return ( TCR_4( lck->lk.poll ) >> 1 ) - 1;
374}
375
376static inline bool
377__kmp_is_futex_lock_nestable( kmp_futex_lock_t *lck )
378{
379 return lck->lk.depth_locked != -1;
380}
381
382__forceinline static void
383__kmp_acquire_futex_lock_timed_template( kmp_futex_lock_t *lck, kmp_int32 gtid )
384{
385 kmp_int32 gtid_code = ( gtid + 1 ) << 1;
386
387 KMP_MB();
388
389#ifdef USE_LOCK_PROFILE
390 kmp_uint32 curr = TCR_4( lck->lk.poll );
391 if ( ( curr != 0 ) && ( curr != gtid_code ) )
392 __kmp_printf( "LOCK CONTENTION: %p\n", lck );
393 /* else __kmp_printf( "." );*/
394#endif /* USE_LOCK_PROFILE */
395
396 KMP_FSYNC_PREPARE( lck );
397 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d entering\n",
398 lck, lck->lk.poll, gtid ) );
399
400 kmp_int32 poll_val;
401 while ( ( poll_val = KMP_COMPARE_AND_STORE_RET32( & ( lck->lk.poll ), 0,
402 gtid_code ) ) != 0 ) {
403 kmp_int32 cond = poll_val & 1;
404 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d poll_val = 0x%x cond = 0x%x\n",
405 lck, gtid, poll_val, cond ) );
406
407 //
408 // NOTE: if you try to use the following condition for this branch
409 //
410 // if ( poll_val & 1 == 0 )
411 //
412 // Then the 12.0 compiler has a bug where the following block will
413 // always be skipped, regardless of the value of the LSB of poll_val.
414 //
415 if ( ! cond ) {
416 //
417 // Try to set the lsb in the poll to indicate to the owner
418 // thread that they need to wake this thread up.
419 //
420 if ( ! KMP_COMPARE_AND_STORE_REL32( & ( lck->lk.poll ),
421 poll_val, poll_val | 1 ) ) {
422 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d can't set bit 0\n",
423 lck, lck->lk.poll, gtid ) );
424 continue;
425 }
426 poll_val |= 1;
427
428 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d bit 0 set\n",
429 lck, lck->lk.poll, gtid ) );
430 }
431
432 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d before futex_wait(0x%x)\n",
433 lck, gtid, poll_val ) );
434
435 kmp_int32 rc;
436 if ( ( rc = syscall( __NR_futex, & ( lck->lk.poll ), FUTEX_WAIT,
437 poll_val, NULL, NULL, 0 ) ) != 0 ) {
438 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d futex_wait(0x%x) failed (rc=%d errno=%d)\n",
439 lck, gtid, poll_val, rc, errno ) );
440 continue;
441 }
442
443 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d after futex_wait(0x%x)\n",
444 lck, gtid, poll_val ) );
445 //
Alp Toker8f2d3f02014-02-24 10:40:15 +0000446 // This thread has now done a successful futex wait call and was
Jim Cownie5e8470a2013-09-27 10:38:44 +0000447 // entered on the OS futex queue. We must now perform a futex
448 // wake call when releasing the lock, as we have no idea how many
449 // other threads are in the queue.
450 //
451 gtid_code |= 1;
452 }
453
454 KMP_FSYNC_ACQUIRED( lck );
455 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d exiting\n",
456 lck, lck->lk.poll, gtid ) );
457}
458
459void
460__kmp_acquire_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
461{
462 __kmp_acquire_futex_lock_timed_template( lck, gtid );
463}
464
465static void
466__kmp_acquire_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
467{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000468 char const * const func = "omp_set_lock";
469 if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE )
470 && __kmp_is_futex_lock_nestable( lck ) ) {
471 KMP_FATAL( LockNestableUsedAsSimple, func );
472 }
473 if ( ( gtid >= 0 ) && ( __kmp_get_futex_lock_owner( lck ) == gtid ) ) {
474 KMP_FATAL( LockIsAlreadyOwned, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000475 }
476 __kmp_acquire_futex_lock( lck, gtid );
477}
478
479int
480__kmp_test_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
481{
482 if ( KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), 0, ( gtid + 1 ) << 1 ) ) {
483 KMP_FSYNC_ACQUIRED( lck );
484 return TRUE;
485 }
486 return FALSE;
487}
488
489static int
490__kmp_test_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
491{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000492 char const * const func = "omp_test_lock";
493 if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE )
494 && __kmp_is_futex_lock_nestable( lck ) ) {
495 KMP_FATAL( LockNestableUsedAsSimple, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000496 }
497 return __kmp_test_futex_lock( lck, gtid );
498}
499
500void
501__kmp_release_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
502{
503 KMP_MB(); /* Flush all pending memory write invalidates. */
504
505 KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d entering\n",
506 lck, lck->lk.poll, gtid ) );
507
508 KMP_FSYNC_RELEASING(lck);
509
510 kmp_int32 poll_val = KMP_XCHG_FIXED32( & ( lck->lk.poll ), 0 );
511
512 KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p, T#%d released poll_val = 0x%x\n",
513 lck, gtid, poll_val ) );
514
515 if ( poll_val & 1 ) {
516 KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p, T#%d futex_wake 1 thread\n",
517 lck, gtid ) );
518 syscall( __NR_futex, & ( lck->lk.poll ), FUTEX_WAKE, 1, NULL, NULL, 0 );
519 }
520
521 KMP_MB(); /* Flush all pending memory write invalidates. */
522
523 KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d exiting\n",
524 lck, lck->lk.poll, gtid ) );
525
526 KMP_YIELD( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc :
527 __kmp_xproc ) );
528}
529
530static void
531__kmp_release_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
532{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000533 char const * const func = "omp_unset_lock";
534 KMP_MB(); /* in case another processor initialized lock */
535 if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE )
536 && __kmp_is_futex_lock_nestable( lck ) ) {
537 KMP_FATAL( LockNestableUsedAsSimple, func );
538 }
539 if ( __kmp_get_futex_lock_owner( lck ) == -1 ) {
540 KMP_FATAL( LockUnsettingFree, func );
541 }
542 if ( ( gtid >= 0 ) && ( __kmp_get_futex_lock_owner( lck ) >= 0 )
543 && ( __kmp_get_futex_lock_owner( lck ) != gtid ) ) {
544 KMP_FATAL( LockUnsettingSetByAnother, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000545 }
546 __kmp_release_futex_lock( lck, gtid );
547}
548
549void
550__kmp_init_futex_lock( kmp_futex_lock_t * lck )
551{
552 TCW_4( lck->lk.poll, 0 );
553}
554
555static void
556__kmp_init_futex_lock_with_checks( kmp_futex_lock_t * lck )
557{
558 __kmp_init_futex_lock( lck );
559}
560
561void
562__kmp_destroy_futex_lock( kmp_futex_lock_t *lck )
563{
564 lck->lk.poll = 0;
565}
566
567static void
568__kmp_destroy_futex_lock_with_checks( kmp_futex_lock_t *lck )
569{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000570 char const * const func = "omp_destroy_lock";
571 if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE )
572 && __kmp_is_futex_lock_nestable( lck ) ) {
573 KMP_FATAL( LockNestableUsedAsSimple, func );
574 }
575 if ( __kmp_get_futex_lock_owner( lck ) != -1 ) {
576 KMP_FATAL( LockStillOwned, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000577 }
578 __kmp_destroy_futex_lock( lck );
579}
580
581
582//
583// nested futex locks
584//
585
586void
587__kmp_acquire_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
588{
589 KMP_DEBUG_ASSERT( gtid >= 0 );
590
591 if ( __kmp_get_futex_lock_owner( lck ) == gtid ) {
592 lck->lk.depth_locked += 1;
593 }
594 else {
595 __kmp_acquire_futex_lock_timed_template( lck, gtid );
596 lck->lk.depth_locked = 1;
597 }
598}
599
600static void
601__kmp_acquire_nested_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
602{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000603 char const * const func = "omp_set_nest_lock";
604 if ( ! __kmp_is_futex_lock_nestable( lck ) ) {
605 KMP_FATAL( LockSimpleUsedAsNestable, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000606 }
607 __kmp_acquire_nested_futex_lock( lck, gtid );
608}
609
610int
611__kmp_test_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
612{
613 int retval;
614
615 KMP_DEBUG_ASSERT( gtid >= 0 );
616
617 if ( __kmp_get_futex_lock_owner( lck ) == gtid ) {
618 retval = ++lck->lk.depth_locked;
619 }
620 else if ( !__kmp_test_futex_lock( lck, gtid ) ) {
621 retval = 0;
622 }
623 else {
624 KMP_MB();
625 retval = lck->lk.depth_locked = 1;
626 }
627 return retval;
628}
629
630static int
631__kmp_test_nested_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
632{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000633 char const * const func = "omp_test_nest_lock";
634 if ( ! __kmp_is_futex_lock_nestable( lck ) ) {
635 KMP_FATAL( LockSimpleUsedAsNestable, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000636 }
637 return __kmp_test_nested_futex_lock( lck, gtid );
638}
639
640void
641__kmp_release_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
642{
643 KMP_DEBUG_ASSERT( gtid >= 0 );
644
645 KMP_MB();
646 if ( --(lck->lk.depth_locked) == 0 ) {
647 __kmp_release_futex_lock( lck, gtid );
648 }
649}
650
651static void
652__kmp_release_nested_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
653{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000654 char const * const func = "omp_unset_nest_lock";
655 KMP_MB(); /* in case another processor initialized lock */
656 if ( ! __kmp_is_futex_lock_nestable( lck ) ) {
657 KMP_FATAL( LockSimpleUsedAsNestable, func );
658 }
659 if ( __kmp_get_futex_lock_owner( lck ) == -1 ) {
660 KMP_FATAL( LockUnsettingFree, func );
661 }
662 if ( __kmp_get_futex_lock_owner( lck ) != gtid ) {
663 KMP_FATAL( LockUnsettingSetByAnother, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000664 }
665 __kmp_release_nested_futex_lock( lck, gtid );
666}
667
668void
669__kmp_init_nested_futex_lock( kmp_futex_lock_t * lck )
670{
671 __kmp_init_futex_lock( lck );
672 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
673}
674
675static void
676__kmp_init_nested_futex_lock_with_checks( kmp_futex_lock_t * lck )
677{
678 __kmp_init_nested_futex_lock( lck );
679}
680
681void
682__kmp_destroy_nested_futex_lock( kmp_futex_lock_t *lck )
683{
684 __kmp_destroy_futex_lock( lck );
685 lck->lk.depth_locked = 0;
686}
687
688static void
689__kmp_destroy_nested_futex_lock_with_checks( kmp_futex_lock_t *lck )
690{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000691 char const * const func = "omp_destroy_nest_lock";
692 if ( ! __kmp_is_futex_lock_nestable( lck ) ) {
693 KMP_FATAL( LockSimpleUsedAsNestable, func );
694 }
695 if ( __kmp_get_futex_lock_owner( lck ) != -1 ) {
696 KMP_FATAL( LockStillOwned, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000697 }
698 __kmp_destroy_nested_futex_lock( lck );
699}
700
Jim Cownie181b4bb2013-12-23 17:28:57 +0000701#endif // KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
Jim Cownie5e8470a2013-09-27 10:38:44 +0000702
703
704/* ------------------------------------------------------------------------ */
705/* ticket (bakery) locks */
706
707static kmp_int32
708__kmp_get_ticket_lock_owner( kmp_ticket_lock_t *lck )
709{
710 return TCR_4( lck->lk.owner_id ) - 1;
711}
712
713static inline bool
714__kmp_is_ticket_lock_nestable( kmp_ticket_lock_t *lck )
715{
716 return lck->lk.depth_locked != -1;
717}
718
719static kmp_uint32
720__kmp_bakery_check(kmp_uint value, kmp_uint checker)
721{
722 register kmp_uint32 pause;
723
724 if (value == checker) {
725 return TRUE;
726 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000727 for (pause = checker - value; pause != 0; --pause);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000728 return FALSE;
729}
730
731__forceinline static void
732__kmp_acquire_ticket_lock_timed_template( kmp_ticket_lock_t *lck, kmp_int32 gtid )
733{
734 kmp_uint32 my_ticket;
735 KMP_MB();
736
737 my_ticket = KMP_TEST_THEN_INC32( (kmp_int32 *) &lck->lk.next_ticket );
738
739#ifdef USE_LOCK_PROFILE
740 if ( TCR_4( lck->lk.now_serving ) != my_ticket )
741 __kmp_printf( "LOCK CONTENTION: %p\n", lck );
742 /* else __kmp_printf( "." );*/
743#endif /* USE_LOCK_PROFILE */
744
745 if ( TCR_4( lck->lk.now_serving ) == my_ticket ) {
746 KMP_FSYNC_ACQUIRED(lck);
747 return;
748 }
749 KMP_WAIT_YIELD( &lck->lk.now_serving, my_ticket, __kmp_bakery_check, lck );
750 KMP_FSYNC_ACQUIRED(lck);
751}
752
753void
754__kmp_acquire_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
755{
756 __kmp_acquire_ticket_lock_timed_template( lck, gtid );
757}
758
759static void
760__kmp_acquire_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
761{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000762 char const * const func = "omp_set_lock";
763 if ( lck->lk.initialized != lck ) {
764 KMP_FATAL( LockIsUninitialized, func );
765 }
766 if ( __kmp_is_ticket_lock_nestable( lck ) ) {
767 KMP_FATAL( LockNestableUsedAsSimple, func );
768 }
769 if ( ( gtid >= 0 ) && ( __kmp_get_ticket_lock_owner( lck ) == gtid ) ) {
770 KMP_FATAL( LockIsAlreadyOwned, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000771 }
772
773 __kmp_acquire_ticket_lock( lck, gtid );
774
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000775 lck->lk.owner_id = gtid + 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000776}
777
778int
779__kmp_test_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
780{
781 kmp_uint32 my_ticket = TCR_4( lck->lk.next_ticket );
782 if ( TCR_4( lck->lk.now_serving ) == my_ticket ) {
783 kmp_uint32 next_ticket = my_ticket + 1;
784 if ( KMP_COMPARE_AND_STORE_ACQ32( (kmp_int32 *) &lck->lk.next_ticket,
785 my_ticket, next_ticket ) ) {
786 KMP_FSYNC_ACQUIRED( lck );
787 return TRUE;
788 }
789 }
790 return FALSE;
791}
792
793static int
794__kmp_test_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
795{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000796 char const * const func = "omp_test_lock";
797 if ( lck->lk.initialized != lck ) {
798 KMP_FATAL( LockIsUninitialized, func );
799 }
800 if ( __kmp_is_ticket_lock_nestable( lck ) ) {
801 KMP_FATAL( LockNestableUsedAsSimple, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000802 }
803
804 int retval = __kmp_test_ticket_lock( lck, gtid );
805
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000806 if ( retval ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000807 lck->lk.owner_id = gtid + 1;
808 }
809 return retval;
810}
811
812void
813__kmp_release_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
814{
815 kmp_uint32 distance;
816
817 KMP_MB(); /* Flush all pending memory write invalidates. */
818
819 KMP_FSYNC_RELEASING(lck);
820 distance = ( TCR_4( lck->lk.next_ticket ) - TCR_4( lck->lk.now_serving ) );
821
822 KMP_ST_REL32( &(lck->lk.now_serving), lck->lk.now_serving + 1 );
823
824 KMP_MB(); /* Flush all pending memory write invalidates. */
825
826 KMP_YIELD( distance
827 > (kmp_uint32) (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc) );
828}
829
830static void
831__kmp_release_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
832{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000833 char const * const func = "omp_unset_lock";
834 KMP_MB(); /* in case another processor initialized lock */
835 if ( lck->lk.initialized != lck ) {
836 KMP_FATAL( LockIsUninitialized, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000837 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000838 if ( __kmp_is_ticket_lock_nestable( lck ) ) {
839 KMP_FATAL( LockNestableUsedAsSimple, func );
840 }
841 if ( __kmp_get_ticket_lock_owner( lck ) == -1 ) {
842 KMP_FATAL( LockUnsettingFree, func );
843 }
844 if ( ( gtid >= 0 ) && ( __kmp_get_ticket_lock_owner( lck ) >= 0 )
845 && ( __kmp_get_ticket_lock_owner( lck ) != gtid ) ) {
846 KMP_FATAL( LockUnsettingSetByAnother, func );
847 }
848 lck->lk.owner_id = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000849 __kmp_release_ticket_lock( lck, gtid );
850}
851
852void
853__kmp_init_ticket_lock( kmp_ticket_lock_t * lck )
854{
855 lck->lk.location = NULL;
856 TCW_4( lck->lk.next_ticket, 0 );
857 TCW_4( lck->lk.now_serving, 0 );
858 lck->lk.owner_id = 0; // no thread owns the lock.
859 lck->lk.depth_locked = -1; // -1 => not a nested lock.
860 lck->lk.initialized = (kmp_ticket_lock *)lck;
861}
862
863static void
864__kmp_init_ticket_lock_with_checks( kmp_ticket_lock_t * lck )
865{
866 __kmp_init_ticket_lock( lck );
867}
868
869void
870__kmp_destroy_ticket_lock( kmp_ticket_lock_t *lck )
871{
872 lck->lk.initialized = NULL;
873 lck->lk.location = NULL;
874 lck->lk.next_ticket = 0;
875 lck->lk.now_serving = 0;
876 lck->lk.owner_id = 0;
877 lck->lk.depth_locked = -1;
878}
879
880static void
881__kmp_destroy_ticket_lock_with_checks( kmp_ticket_lock_t *lck )
882{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000883 char const * const func = "omp_destroy_lock";
884 if ( lck->lk.initialized != lck ) {
885 KMP_FATAL( LockIsUninitialized, func );
886 }
887 if ( __kmp_is_ticket_lock_nestable( lck ) ) {
888 KMP_FATAL( LockNestableUsedAsSimple, func );
889 }
890 if ( __kmp_get_ticket_lock_owner( lck ) != -1 ) {
891 KMP_FATAL( LockStillOwned, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000892 }
893 __kmp_destroy_ticket_lock( lck );
894}
895
896
897//
898// nested ticket locks
899//
900
901void
902__kmp_acquire_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
903{
904 KMP_DEBUG_ASSERT( gtid >= 0 );
905
906 if ( __kmp_get_ticket_lock_owner( lck ) == gtid ) {
907 lck->lk.depth_locked += 1;
908 }
909 else {
910 __kmp_acquire_ticket_lock_timed_template( lck, gtid );
911 KMP_MB();
912 lck->lk.depth_locked = 1;
913 KMP_MB();
914 lck->lk.owner_id = gtid + 1;
915 }
916}
917
918static void
919__kmp_acquire_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
920{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000921 char const * const func = "omp_set_nest_lock";
922 if ( lck->lk.initialized != lck ) {
923 KMP_FATAL( LockIsUninitialized, func );
924 }
925 if ( ! __kmp_is_ticket_lock_nestable( lck ) ) {
926 KMP_FATAL( LockSimpleUsedAsNestable, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000927 }
928 __kmp_acquire_nested_ticket_lock( lck, gtid );
929}
930
931int
932__kmp_test_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
933{
934 int retval;
935
936 KMP_DEBUG_ASSERT( gtid >= 0 );
937
938 if ( __kmp_get_ticket_lock_owner( lck ) == gtid ) {
939 retval = ++lck->lk.depth_locked;
940 }
941 else if ( !__kmp_test_ticket_lock( lck, gtid ) ) {
942 retval = 0;
943 }
944 else {
945 KMP_MB();
946 retval = lck->lk.depth_locked = 1;
947 KMP_MB();
948 lck->lk.owner_id = gtid + 1;
949 }
950 return retval;
951}
952
953static int
954__kmp_test_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck,
955 kmp_int32 gtid )
956{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000957 char const * const func = "omp_test_nest_lock";
958 if ( lck->lk.initialized != lck ) {
959 KMP_FATAL( LockIsUninitialized, func );
960 }
961 if ( ! __kmp_is_ticket_lock_nestable( lck ) ) {
962 KMP_FATAL( LockSimpleUsedAsNestable, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000963 }
964 return __kmp_test_nested_ticket_lock( lck, gtid );
965}
966
967void
968__kmp_release_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
969{
970 KMP_DEBUG_ASSERT( gtid >= 0 );
971
972 KMP_MB();
973 if ( --(lck->lk.depth_locked) == 0 ) {
974 KMP_MB();
975 lck->lk.owner_id = 0;
976 __kmp_release_ticket_lock( lck, gtid );
977 }
978}
979
980static void
981__kmp_release_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
982{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000983 char const * const func = "omp_unset_nest_lock";
984 KMP_MB(); /* in case another processor initialized lock */
985 if ( lck->lk.initialized != lck ) {
986 KMP_FATAL( LockIsUninitialized, func );
987 }
988 if ( ! __kmp_is_ticket_lock_nestable( lck ) ) {
989 KMP_FATAL( LockSimpleUsedAsNestable, func );
990 }
991 if ( __kmp_get_ticket_lock_owner( lck ) == -1 ) {
992 KMP_FATAL( LockUnsettingFree, func );
993 }
994 if ( __kmp_get_ticket_lock_owner( lck ) != gtid ) {
995 KMP_FATAL( LockUnsettingSetByAnother, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000996 }
997 __kmp_release_nested_ticket_lock( lck, gtid );
998}
999
1000void
1001__kmp_init_nested_ticket_lock( kmp_ticket_lock_t * lck )
1002{
1003 __kmp_init_ticket_lock( lck );
1004 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
1005}
1006
1007static void
1008__kmp_init_nested_ticket_lock_with_checks( kmp_ticket_lock_t * lck )
1009{
1010 __kmp_init_nested_ticket_lock( lck );
1011}
1012
1013void
1014__kmp_destroy_nested_ticket_lock( kmp_ticket_lock_t *lck )
1015{
1016 __kmp_destroy_ticket_lock( lck );
1017 lck->lk.depth_locked = 0;
1018}
1019
1020static void
1021__kmp_destroy_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck )
1022{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001023 char const * const func = "omp_destroy_nest_lock";
1024 if ( lck->lk.initialized != lck ) {
1025 KMP_FATAL( LockIsUninitialized, func );
1026 }
1027 if ( ! __kmp_is_ticket_lock_nestable( lck ) ) {
1028 KMP_FATAL( LockSimpleUsedAsNestable, func );
1029 }
1030 if ( __kmp_get_ticket_lock_owner( lck ) != -1 ) {
1031 KMP_FATAL( LockStillOwned, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001032 }
1033 __kmp_destroy_nested_ticket_lock( lck );
1034}
1035
1036
1037//
1038// access functions to fields which don't exist for all lock kinds.
1039//
1040
1041static int
1042__kmp_is_ticket_lock_initialized( kmp_ticket_lock_t *lck )
1043{
1044 return lck == lck->lk.initialized;
1045}
1046
1047static const ident_t *
1048__kmp_get_ticket_lock_location( kmp_ticket_lock_t *lck )
1049{
1050 return lck->lk.location;
1051}
1052
1053static void
1054__kmp_set_ticket_lock_location( kmp_ticket_lock_t *lck, const ident_t *loc )
1055{
1056 lck->lk.location = loc;
1057}
1058
1059static kmp_lock_flags_t
1060__kmp_get_ticket_lock_flags( kmp_ticket_lock_t *lck )
1061{
1062 return lck->lk.flags;
1063}
1064
1065static void
1066__kmp_set_ticket_lock_flags( kmp_ticket_lock_t *lck, kmp_lock_flags_t flags )
1067{
1068 lck->lk.flags = flags;
1069}
1070
1071/* ------------------------------------------------------------------------ */
1072/* queuing locks */
1073
1074/*
1075 * First the states
1076 * (head,tail) = 0, 0 means lock is unheld, nobody on queue
1077 * UINT_MAX or -1, 0 means lock is held, nobody on queue
1078 * h, h means lock is held or about to transition, 1 element on queue
1079 * h, t h <> t, means lock is held or about to transition, >1 elements on queue
1080 *
1081 * Now the transitions
1082 * Acquire(0,0) = -1 ,0
1083 * Release(0,0) = Error
1084 * Acquire(-1,0) = h ,h h > 0
1085 * Release(-1,0) = 0 ,0
1086 * Acquire(h,h) = h ,t h > 0, t > 0, h <> t
1087 * Release(h,h) = -1 ,0 h > 0
1088 * Acquire(h,t) = h ,t' h > 0, t > 0, t' > 0, h <> t, h <> t', t <> t'
1089 * Release(h,t) = h',t h > 0, t > 0, h <> t, h <> h', h' maybe = t
1090 *
1091 * And pictorially
1092 *
1093 *
1094 * +-----+
1095 * | 0, 0|------- release -------> Error
1096 * +-----+
1097 * | ^
1098 * acquire| |release
1099 * | |
1100 * | |
1101 * v |
1102 * +-----+
1103 * |-1, 0|
1104 * +-----+
1105 * | ^
1106 * acquire| |release
1107 * | |
1108 * | |
1109 * v |
1110 * +-----+
1111 * | h, h|
1112 * +-----+
1113 * | ^
1114 * acquire| |release
1115 * | |
1116 * | |
1117 * v |
1118 * +-----+
1119 * | h, t|----- acquire, release loopback ---+
1120 * +-----+ |
1121 * ^ |
1122 * | |
1123 * +------------------------------------+
1124 *
1125 */
1126
1127#ifdef DEBUG_QUEUING_LOCKS
1128
1129/* Stuff for circular trace buffer */
1130#define TRACE_BUF_ELE 1024
1131static char traces[TRACE_BUF_ELE][128] = { 0 }
1132static int tc = 0;
1133#define TRACE_LOCK(X,Y) sprintf( traces[tc++ % TRACE_BUF_ELE], "t%d at %s\n", X, Y );
1134#define TRACE_LOCK_T(X,Y,Z) sprintf( traces[tc++ % TRACE_BUF_ELE], "t%d at %s%d\n", X,Y,Z );
1135#define TRACE_LOCK_HT(X,Y,Z,Q) sprintf( traces[tc++ % TRACE_BUF_ELE], "t%d at %s %d,%d\n", X, Y, Z, Q );
1136
1137static void
1138__kmp_dump_queuing_lock( kmp_info_t *this_thr, kmp_int32 gtid,
1139 kmp_queuing_lock_t *lck, kmp_int32 head_id, kmp_int32 tail_id )
1140{
1141 kmp_int32 t, i;
1142
1143 __kmp_printf_no_lock( "\n__kmp_dump_queuing_lock: TRACE BEGINS HERE! \n" );
1144
1145 i = tc % TRACE_BUF_ELE;
1146 __kmp_printf_no_lock( "%s\n", traces[i] );
1147 i = (i+1) % TRACE_BUF_ELE;
1148 while ( i != (tc % TRACE_BUF_ELE) ) {
1149 __kmp_printf_no_lock( "%s", traces[i] );
1150 i = (i+1) % TRACE_BUF_ELE;
1151 }
1152 __kmp_printf_no_lock( "\n" );
1153
1154 __kmp_printf_no_lock(
1155 "\n__kmp_dump_queuing_lock: gtid+1:%d, spin_here:%d, next_wait:%d, head_id:%d, tail_id:%d\n",
1156 gtid+1, this_thr->th.th_spin_here, this_thr->th.th_next_waiting,
1157 head_id, tail_id );
1158
1159 __kmp_printf_no_lock( "\t\thead: %d ", lck->lk.head_id );
1160
1161 if ( lck->lk.head_id >= 1 ) {
1162 t = __kmp_threads[lck->lk.head_id-1]->th.th_next_waiting;
1163 while (t > 0) {
1164 __kmp_printf_no_lock( "-> %d ", t );
1165 t = __kmp_threads[t-1]->th.th_next_waiting;
1166 }
1167 }
1168 __kmp_printf_no_lock( "; tail: %d ", lck->lk.tail_id );
1169 __kmp_printf_no_lock( "\n\n" );
1170}
1171
1172#endif /* DEBUG_QUEUING_LOCKS */
1173
1174static kmp_int32
1175__kmp_get_queuing_lock_owner( kmp_queuing_lock_t *lck )
1176{
1177 return TCR_4( lck->lk.owner_id ) - 1;
1178}
1179
1180static inline bool
1181__kmp_is_queuing_lock_nestable( kmp_queuing_lock_t *lck )
1182{
1183 return lck->lk.depth_locked != -1;
1184}
1185
1186/* Acquire a lock using a the queuing lock implementation */
1187template <bool takeTime>
1188/* [TLW] The unused template above is left behind because of what BEB believes is a
1189 potential compiler problem with __forceinline. */
1190__forceinline static void
1191__kmp_acquire_queuing_lock_timed_template( kmp_queuing_lock_t *lck,
1192 kmp_int32 gtid )
1193{
1194 register kmp_info_t *this_thr = __kmp_thread_from_gtid( gtid );
1195 volatile kmp_int32 *head_id_p = & lck->lk.head_id;
1196 volatile kmp_int32 *tail_id_p = & lck->lk.tail_id;
1197 volatile kmp_uint32 *spin_here_p;
1198 kmp_int32 need_mf = 1;
1199
1200 KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d entering\n", lck, gtid ));
1201
1202 KMP_FSYNC_PREPARE( lck );
1203 KMP_DEBUG_ASSERT( this_thr != NULL );
1204 spin_here_p = & this_thr->th.th_spin_here;
1205
1206#ifdef DEBUG_QUEUING_LOCKS
1207 TRACE_LOCK( gtid+1, "acq ent" );
1208 if ( *spin_here_p )
1209 __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1210 if ( this_thr->th.th_next_waiting != 0 )
1211 __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1212#endif
1213 KMP_DEBUG_ASSERT( !*spin_here_p );
1214 KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 );
1215
1216
1217 /* The following st.rel to spin_here_p needs to precede the cmpxchg.acq to head_id_p
1218 that may follow, not just in execution order, but also in visibility order. This way,
1219 when a releasing thread observes the changes to the queue by this thread, it can
1220 rightly assume that spin_here_p has already been set to TRUE, so that when it sets
1221 spin_here_p to FALSE, it is not premature. If the releasing thread sets spin_here_p
1222 to FALSE before this thread sets it to TRUE, this thread will hang.
1223 */
1224 *spin_here_p = TRUE; /* before enqueuing to prevent race */
1225
1226 while( 1 ) {
1227 kmp_int32 enqueued;
1228 kmp_int32 head;
1229 kmp_int32 tail;
1230
1231 head = *head_id_p;
1232
1233 switch ( head ) {
1234
1235 case -1:
1236 {
1237#ifdef DEBUG_QUEUING_LOCKS
1238 tail = *tail_id_p;
1239 TRACE_LOCK_HT( gtid+1, "acq read: ", head, tail );
1240#endif
1241 tail = 0; /* to make sure next link asynchronously read is not set accidentally;
1242 this assignment prevents us from entering the if ( t > 0 )
1243 condition in the enqueued case below, which is not necessary for
1244 this state transition */
1245
1246 need_mf = 0;
1247 /* try (-1,0)->(tid,tid) */
1248 enqueued = KMP_COMPARE_AND_STORE_ACQ64( (volatile kmp_int64 *) tail_id_p,
1249 KMP_PACK_64( -1, 0 ),
1250 KMP_PACK_64( gtid+1, gtid+1 ) );
1251#ifdef DEBUG_QUEUING_LOCKS
1252 if ( enqueued ) TRACE_LOCK( gtid+1, "acq enq: (-1,0)->(tid,tid)" );
1253#endif
1254 }
1255 break;
1256
1257 default:
1258 {
1259 tail = *tail_id_p;
1260 KMP_DEBUG_ASSERT( tail != gtid + 1 );
1261
1262#ifdef DEBUG_QUEUING_LOCKS
1263 TRACE_LOCK_HT( gtid+1, "acq read: ", head, tail );
1264#endif
1265
1266 if ( tail == 0 ) {
1267 enqueued = FALSE;
1268 }
1269 else {
1270 need_mf = 0;
1271 /* try (h,t) or (h,h)->(h,tid) */
1272 enqueued = KMP_COMPARE_AND_STORE_ACQ32( tail_id_p, tail, gtid+1 );
1273
1274#ifdef DEBUG_QUEUING_LOCKS
1275 if ( enqueued ) TRACE_LOCK( gtid+1, "acq enq: (h,t)->(h,tid)" );
1276#endif
1277 }
1278 }
1279 break;
1280
1281 case 0: /* empty queue */
1282 {
1283 kmp_int32 grabbed_lock;
1284
1285#ifdef DEBUG_QUEUING_LOCKS
1286 tail = *tail_id_p;
1287 TRACE_LOCK_HT( gtid+1, "acq read: ", head, tail );
1288#endif
1289 /* try (0,0)->(-1,0) */
1290
1291 /* only legal transition out of head = 0 is head = -1 with no change to tail */
1292 grabbed_lock = KMP_COMPARE_AND_STORE_ACQ32( head_id_p, 0, -1 );
1293
1294 if ( grabbed_lock ) {
1295
1296 *spin_here_p = FALSE;
1297
1298 KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: no queuing\n",
1299 lck, gtid ));
1300#ifdef DEBUG_QUEUING_LOCKS
1301 TRACE_LOCK_HT( gtid+1, "acq exit: ", head, 0 );
1302#endif
1303 KMP_FSYNC_ACQUIRED( lck );
1304 return; /* lock holder cannot be on queue */
1305 }
1306 enqueued = FALSE;
1307 }
1308 break;
1309 }
1310
1311 if ( enqueued ) {
1312 if ( tail > 0 ) {
1313 kmp_info_t *tail_thr = __kmp_thread_from_gtid( tail - 1 );
1314 KMP_ASSERT( tail_thr != NULL );
1315 tail_thr->th.th_next_waiting = gtid+1;
1316 /* corresponding wait for this write in release code */
1317 }
1318 KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d waiting for lock\n", lck, gtid ));
1319
1320
1321 /* ToDo: May want to consider using __kmp_wait_sleep or something that sleeps for
1322 * throughput only here.
1323 */
1324 KMP_MB();
1325 KMP_WAIT_YIELD(spin_here_p, FALSE, KMP_EQ, lck);
1326
1327#ifdef DEBUG_QUEUING_LOCKS
1328 TRACE_LOCK( gtid+1, "acq spin" );
1329
1330 if ( this_thr->th.th_next_waiting != 0 )
1331 __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1332#endif
1333 KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 );
1334 KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: after waiting on queue\n",
1335 lck, gtid ));
1336
1337#ifdef DEBUG_QUEUING_LOCKS
1338 TRACE_LOCK( gtid+1, "acq exit 2" );
1339#endif
1340 /* got lock, we were dequeued by the thread that released lock */
1341 return;
1342 }
1343
1344 /* Yield if number of threads > number of logical processors */
1345 /* ToDo: Not sure why this should only be in oversubscription case,
1346 maybe should be traditional YIELD_INIT/YIELD_WHEN loop */
1347 KMP_YIELD( TCR_4( __kmp_nth ) > (__kmp_avail_proc ? __kmp_avail_proc :
1348 __kmp_xproc ) );
1349#ifdef DEBUG_QUEUING_LOCKS
1350 TRACE_LOCK( gtid+1, "acq retry" );
1351#endif
1352
1353 }
1354 KMP_ASSERT2( 0, "should not get here" );
1355}
1356
1357void
1358__kmp_acquire_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1359{
1360 KMP_DEBUG_ASSERT( gtid >= 0 );
1361
1362 __kmp_acquire_queuing_lock_timed_template<false>( lck, gtid );
1363}
1364
1365static void
1366__kmp_acquire_queuing_lock_with_checks( kmp_queuing_lock_t *lck,
1367 kmp_int32 gtid )
1368{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001369 char const * const func = "omp_set_lock";
1370 if ( lck->lk.initialized != lck ) {
1371 KMP_FATAL( LockIsUninitialized, func );
1372 }
1373 if ( __kmp_is_queuing_lock_nestable( lck ) ) {
1374 KMP_FATAL( LockNestableUsedAsSimple, func );
1375 }
1376 if ( __kmp_get_queuing_lock_owner( lck ) == gtid ) {
1377 KMP_FATAL( LockIsAlreadyOwned, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001378 }
1379
1380 __kmp_acquire_queuing_lock( lck, gtid );
1381
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001382 lck->lk.owner_id = gtid + 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001383}
1384
1385int
1386__kmp_test_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1387{
1388 volatile kmp_int32 *head_id_p = & lck->lk.head_id;
1389 kmp_int32 head;
1390#ifdef KMP_DEBUG
1391 kmp_info_t *this_thr;
1392#endif
1393
1394 KA_TRACE( 1000, ("__kmp_test_queuing_lock: T#%d entering\n", gtid ));
1395 KMP_DEBUG_ASSERT( gtid >= 0 );
1396#ifdef KMP_DEBUG
1397 this_thr = __kmp_thread_from_gtid( gtid );
1398 KMP_DEBUG_ASSERT( this_thr != NULL );
1399 KMP_DEBUG_ASSERT( !this_thr->th.th_spin_here );
1400#endif
1401
1402 head = *head_id_p;
1403
1404 if ( head == 0 ) { /* nobody on queue, nobody holding */
1405
1406 /* try (0,0)->(-1,0) */
1407
1408 if ( KMP_COMPARE_AND_STORE_ACQ32( head_id_p, 0, -1 ) ) {
1409 KA_TRACE( 1000, ("__kmp_test_queuing_lock: T#%d exiting: holding lock\n", gtid ));
1410 KMP_FSYNC_ACQUIRED(lck);
1411 return TRUE;
1412 }
1413 }
1414
1415 KA_TRACE( 1000, ("__kmp_test_queuing_lock: T#%d exiting: without lock\n", gtid ));
1416 return FALSE;
1417}
1418
1419static int
1420__kmp_test_queuing_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1421{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001422 char const * const func = "omp_test_lock";
1423 if ( lck->lk.initialized != lck ) {
1424 KMP_FATAL( LockIsUninitialized, func );
1425 }
1426 if ( __kmp_is_queuing_lock_nestable( lck ) ) {
1427 KMP_FATAL( LockNestableUsedAsSimple, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001428 }
1429
1430 int retval = __kmp_test_queuing_lock( lck, gtid );
1431
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001432 if ( retval ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001433 lck->lk.owner_id = gtid + 1;
1434 }
1435 return retval;
1436}
1437
1438void
1439__kmp_release_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1440{
1441 register kmp_info_t *this_thr;
1442 volatile kmp_int32 *head_id_p = & lck->lk.head_id;
1443 volatile kmp_int32 *tail_id_p = & lck->lk.tail_id;
1444
1445 KA_TRACE( 1000, ("__kmp_release_queuing_lock: lck:%p, T#%d entering\n", lck, gtid ));
1446 KMP_DEBUG_ASSERT( gtid >= 0 );
1447 this_thr = __kmp_thread_from_gtid( gtid );
1448 KMP_DEBUG_ASSERT( this_thr != NULL );
1449#ifdef DEBUG_QUEUING_LOCKS
1450 TRACE_LOCK( gtid+1, "rel ent" );
1451
1452 if ( this_thr->th.th_spin_here )
1453 __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1454 if ( this_thr->th.th_next_waiting != 0 )
1455 __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1456#endif
1457 KMP_DEBUG_ASSERT( !this_thr->th.th_spin_here );
1458 KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 );
1459
1460 KMP_FSYNC_RELEASING(lck);
1461
1462 while( 1 ) {
1463 kmp_int32 dequeued;
1464 kmp_int32 head;
1465 kmp_int32 tail;
1466
1467 head = *head_id_p;
1468
1469#ifdef DEBUG_QUEUING_LOCKS
1470 tail = *tail_id_p;
1471 TRACE_LOCK_HT( gtid+1, "rel read: ", head, tail );
1472 if ( head == 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail );
1473#endif
1474 KMP_DEBUG_ASSERT( head != 0 ); /* holding the lock, head must be -1 or queue head */
1475
1476 if ( head == -1 ) { /* nobody on queue */
1477
1478 /* try (-1,0)->(0,0) */
1479 if ( KMP_COMPARE_AND_STORE_REL32( head_id_p, -1, 0 ) ) {
1480 KA_TRACE( 1000, ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: queue empty\n",
1481 lck, gtid ));
1482#ifdef DEBUG_QUEUING_LOCKS
1483 TRACE_LOCK_HT( gtid+1, "rel exit: ", 0, 0 );
1484#endif
1485 return;
1486 }
1487 dequeued = FALSE;
1488
1489 }
1490 else {
1491
1492 tail = *tail_id_p;
1493 if ( head == tail ) { /* only one thread on the queue */
1494
1495#ifdef DEBUG_QUEUING_LOCKS
1496 if ( head <= 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail );
1497#endif
1498 KMP_DEBUG_ASSERT( head > 0 );
1499
1500 /* try (h,h)->(-1,0) */
1501 dequeued = KMP_COMPARE_AND_STORE_REL64( (kmp_int64 *) tail_id_p,
1502 KMP_PACK_64( head, head ), KMP_PACK_64( -1, 0 ) );
1503#ifdef DEBUG_QUEUING_LOCKS
1504 TRACE_LOCK( gtid+1, "rel deq: (h,h)->(-1,0)" );
1505#endif
1506
1507 }
1508 else {
1509 volatile kmp_int32 *waiting_id_p;
1510 kmp_info_t *head_thr = __kmp_thread_from_gtid( head - 1 );
1511 KMP_DEBUG_ASSERT( head_thr != NULL );
1512 waiting_id_p = & head_thr->th.th_next_waiting;
1513
1514 /* Does this require synchronous reads? */
1515#ifdef DEBUG_QUEUING_LOCKS
1516 if ( head <= 0 || tail <= 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail );
1517#endif
1518 KMP_DEBUG_ASSERT( head > 0 && tail > 0 );
1519
1520 /* try (h,t)->(h',t) or (t,t) */
1521
1522 KMP_MB();
1523 /* make sure enqueuing thread has time to update next waiting thread field */
1524 *head_id_p = (kmp_int32) KMP_WAIT_YIELD((volatile kmp_uint*) waiting_id_p, 0, KMP_NEQ, NULL);
1525#ifdef DEBUG_QUEUING_LOCKS
1526 TRACE_LOCK( gtid+1, "rel deq: (h,t)->(h',t)" );
1527#endif
1528 dequeued = TRUE;
1529 }
1530 }
1531
1532 if ( dequeued ) {
1533 kmp_info_t *head_thr = __kmp_thread_from_gtid( head - 1 );
1534 KMP_DEBUG_ASSERT( head_thr != NULL );
1535
1536 /* Does this require synchronous reads? */
1537#ifdef DEBUG_QUEUING_LOCKS
1538 if ( head <= 0 || tail <= 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail );
1539#endif
1540 KMP_DEBUG_ASSERT( head > 0 && tail > 0 );
1541
1542 /* For clean code only.
1543 * Thread not released until next statement prevents race with acquire code.
1544 */
1545 head_thr->th.th_next_waiting = 0;
1546#ifdef DEBUG_QUEUING_LOCKS
1547 TRACE_LOCK_T( gtid+1, "rel nw=0 for t=", head );
1548#endif
1549
1550 KMP_MB();
1551 /* reset spin value */
1552 head_thr->th.th_spin_here = FALSE;
1553
1554 KA_TRACE( 1000, ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: after dequeuing\n",
1555 lck, gtid ));
1556#ifdef DEBUG_QUEUING_LOCKS
1557 TRACE_LOCK( gtid+1, "rel exit 2" );
1558#endif
1559 return;
1560 }
1561 /* KMP_CPU_PAUSE( ); don't want to make releasing thread hold up acquiring threads */
1562
1563#ifdef DEBUG_QUEUING_LOCKS
1564 TRACE_LOCK( gtid+1, "rel retry" );
1565#endif
1566
1567 } /* while */
1568 KMP_ASSERT2( 0, "should not get here" );
1569}
1570
1571static void
1572__kmp_release_queuing_lock_with_checks( kmp_queuing_lock_t *lck,
1573 kmp_int32 gtid )
1574{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001575 char const * const func = "omp_unset_lock";
1576 KMP_MB(); /* in case another processor initialized lock */
1577 if ( lck->lk.initialized != lck ) {
1578 KMP_FATAL( LockIsUninitialized, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001579 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001580 if ( __kmp_is_queuing_lock_nestable( lck ) ) {
1581 KMP_FATAL( LockNestableUsedAsSimple, func );
1582 }
1583 if ( __kmp_get_queuing_lock_owner( lck ) == -1 ) {
1584 KMP_FATAL( LockUnsettingFree, func );
1585 }
1586 if ( __kmp_get_queuing_lock_owner( lck ) != gtid ) {
1587 KMP_FATAL( LockUnsettingSetByAnother, func );
1588 }
1589 lck->lk.owner_id = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001590 __kmp_release_queuing_lock( lck, gtid );
1591}
1592
1593void
1594__kmp_init_queuing_lock( kmp_queuing_lock_t *lck )
1595{
1596 lck->lk.location = NULL;
1597 lck->lk.head_id = 0;
1598 lck->lk.tail_id = 0;
1599 lck->lk.next_ticket = 0;
1600 lck->lk.now_serving = 0;
1601 lck->lk.owner_id = 0; // no thread owns the lock.
1602 lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks.
1603 lck->lk.initialized = lck;
1604
1605 KA_TRACE(1000, ("__kmp_init_queuing_lock: lock %p initialized\n", lck));
1606}
1607
1608static void
1609__kmp_init_queuing_lock_with_checks( kmp_queuing_lock_t * lck )
1610{
1611 __kmp_init_queuing_lock( lck );
1612}
1613
1614void
1615__kmp_destroy_queuing_lock( kmp_queuing_lock_t *lck )
1616{
1617 lck->lk.initialized = NULL;
1618 lck->lk.location = NULL;
1619 lck->lk.head_id = 0;
1620 lck->lk.tail_id = 0;
1621 lck->lk.next_ticket = 0;
1622 lck->lk.now_serving = 0;
1623 lck->lk.owner_id = 0;
1624 lck->lk.depth_locked = -1;
1625}
1626
1627static void
1628__kmp_destroy_queuing_lock_with_checks( kmp_queuing_lock_t *lck )
1629{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001630 char const * const func = "omp_destroy_lock";
1631 if ( lck->lk.initialized != lck ) {
1632 KMP_FATAL( LockIsUninitialized, func );
1633 }
1634 if ( __kmp_is_queuing_lock_nestable( lck ) ) {
1635 KMP_FATAL( LockNestableUsedAsSimple, func );
1636 }
1637 if ( __kmp_get_queuing_lock_owner( lck ) != -1 ) {
1638 KMP_FATAL( LockStillOwned, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001639 }
1640 __kmp_destroy_queuing_lock( lck );
1641}
1642
1643
1644//
1645// nested queuing locks
1646//
1647
1648void
1649__kmp_acquire_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1650{
1651 KMP_DEBUG_ASSERT( gtid >= 0 );
1652
1653 if ( __kmp_get_queuing_lock_owner( lck ) == gtid ) {
1654 lck->lk.depth_locked += 1;
1655 }
1656 else {
1657 __kmp_acquire_queuing_lock_timed_template<false>( lck, gtid );
1658 KMP_MB();
1659 lck->lk.depth_locked = 1;
1660 KMP_MB();
1661 lck->lk.owner_id = gtid + 1;
1662 }
1663}
1664
1665static void
1666__kmp_acquire_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1667{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001668 char const * const func = "omp_set_nest_lock";
1669 if ( lck->lk.initialized != lck ) {
1670 KMP_FATAL( LockIsUninitialized, func );
1671 }
1672 if ( ! __kmp_is_queuing_lock_nestable( lck ) ) {
1673 KMP_FATAL( LockSimpleUsedAsNestable, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001674 }
1675 __kmp_acquire_nested_queuing_lock( lck, gtid );
1676}
1677
1678int
1679__kmp_test_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1680{
1681 int retval;
1682
1683 KMP_DEBUG_ASSERT( gtid >= 0 );
1684
1685 if ( __kmp_get_queuing_lock_owner( lck ) == gtid ) {
1686 retval = ++lck->lk.depth_locked;
1687 }
1688 else if ( !__kmp_test_queuing_lock( lck, gtid ) ) {
1689 retval = 0;
1690 }
1691 else {
1692 KMP_MB();
1693 retval = lck->lk.depth_locked = 1;
1694 KMP_MB();
1695 lck->lk.owner_id = gtid + 1;
1696 }
1697 return retval;
1698}
1699
1700static int
1701__kmp_test_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck,
1702 kmp_int32 gtid )
1703{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001704 char const * const func = "omp_test_nest_lock";
1705 if ( lck->lk.initialized != lck ) {
1706 KMP_FATAL( LockIsUninitialized, func );
1707 }
1708 if ( ! __kmp_is_queuing_lock_nestable( lck ) ) {
1709 KMP_FATAL( LockSimpleUsedAsNestable, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001710 }
1711 return __kmp_test_nested_queuing_lock( lck, gtid );
1712}
1713
1714void
1715__kmp_release_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1716{
1717 KMP_DEBUG_ASSERT( gtid >= 0 );
1718
1719 KMP_MB();
1720 if ( --(lck->lk.depth_locked) == 0 ) {
1721 KMP_MB();
1722 lck->lk.owner_id = 0;
1723 __kmp_release_queuing_lock( lck, gtid );
1724 }
1725}
1726
1727static void
1728__kmp_release_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1729{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001730 char const * const func = "omp_unset_nest_lock";
1731 KMP_MB(); /* in case another processor initialized lock */
1732 if ( lck->lk.initialized != lck ) {
1733 KMP_FATAL( LockIsUninitialized, func );
1734 }
1735 if ( ! __kmp_is_queuing_lock_nestable( lck ) ) {
1736 KMP_FATAL( LockSimpleUsedAsNestable, func );
1737 }
1738 if ( __kmp_get_queuing_lock_owner( lck ) == -1 ) {
1739 KMP_FATAL( LockUnsettingFree, func );
1740 }
1741 if ( __kmp_get_queuing_lock_owner( lck ) != gtid ) {
1742 KMP_FATAL( LockUnsettingSetByAnother, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001743 }
1744 __kmp_release_nested_queuing_lock( lck, gtid );
1745}
1746
1747void
1748__kmp_init_nested_queuing_lock( kmp_queuing_lock_t * lck )
1749{
1750 __kmp_init_queuing_lock( lck );
1751 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
1752}
1753
1754static void
1755__kmp_init_nested_queuing_lock_with_checks( kmp_queuing_lock_t * lck )
1756{
1757 __kmp_init_nested_queuing_lock( lck );
1758}
1759
1760void
1761__kmp_destroy_nested_queuing_lock( kmp_queuing_lock_t *lck )
1762{
1763 __kmp_destroy_queuing_lock( lck );
1764 lck->lk.depth_locked = 0;
1765}
1766
1767static void
1768__kmp_destroy_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck )
1769{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001770 char const * const func = "omp_destroy_nest_lock";
1771 if ( lck->lk.initialized != lck ) {
1772 KMP_FATAL( LockIsUninitialized, func );
1773 }
1774 if ( ! __kmp_is_queuing_lock_nestable( lck ) ) {
1775 KMP_FATAL( LockSimpleUsedAsNestable, func );
1776 }
1777 if ( __kmp_get_queuing_lock_owner( lck ) != -1 ) {
1778 KMP_FATAL( LockStillOwned, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001779 }
1780 __kmp_destroy_nested_queuing_lock( lck );
1781}
1782
1783
1784//
1785// access functions to fields which don't exist for all lock kinds.
1786//
1787
1788static int
1789__kmp_is_queuing_lock_initialized( kmp_queuing_lock_t *lck )
1790{
1791 return lck == lck->lk.initialized;
1792}
1793
1794static const ident_t *
1795__kmp_get_queuing_lock_location( kmp_queuing_lock_t *lck )
1796{
1797 return lck->lk.location;
1798}
1799
1800static void
1801__kmp_set_queuing_lock_location( kmp_queuing_lock_t *lck, const ident_t *loc )
1802{
1803 lck->lk.location = loc;
1804}
1805
1806static kmp_lock_flags_t
1807__kmp_get_queuing_lock_flags( kmp_queuing_lock_t *lck )
1808{
1809 return lck->lk.flags;
1810}
1811
1812static void
1813__kmp_set_queuing_lock_flags( kmp_queuing_lock_t *lck, kmp_lock_flags_t flags )
1814{
1815 lck->lk.flags = flags;
1816}
1817
1818#if KMP_USE_ADAPTIVE_LOCKS
1819
1820/*
1821 RTM Adaptive locks
1822*/
1823
1824// TODO: Use the header for intrinsics below with the compiler 13.0
1825//#include <immintrin.h>
1826
1827// Values from the status register after failed speculation.
1828#define _XBEGIN_STARTED (~0u)
1829#define _XABORT_EXPLICIT (1 << 0)
1830#define _XABORT_RETRY (1 << 1)
1831#define _XABORT_CONFLICT (1 << 2)
1832#define _XABORT_CAPACITY (1 << 3)
1833#define _XABORT_DEBUG (1 << 4)
1834#define _XABORT_NESTED (1 << 5)
1835#define _XABORT_CODE(x) ((unsigned char)(((x) >> 24) & 0xFF))
1836
1837// Aborts for which it's worth trying again immediately
1838#define SOFT_ABORT_MASK (_XABORT_RETRY | _XABORT_CONFLICT | _XABORT_EXPLICIT)
1839
1840#define STRINGIZE_INTERNAL(arg) #arg
1841#define STRINGIZE(arg) STRINGIZE_INTERNAL(arg)
1842
1843// Access to RTM instructions
1844
1845/*
1846 A version of XBegin which returns -1 on speculation, and the value of EAX on an abort.
1847 This is the same definition as the compiler intrinsic that will be supported at some point.
1848*/
1849static __inline int _xbegin()
1850{
1851 int res = -1;
1852
1853#if KMP_OS_WINDOWS
1854#if KMP_ARCH_X86_64
1855 _asm {
1856 _emit 0xC7
1857 _emit 0xF8
1858 _emit 2
1859 _emit 0
1860 _emit 0
1861 _emit 0
1862 jmp L2
1863 mov res, eax
1864 L2:
1865 }
1866#else /* IA32 */
1867 _asm {
1868 _emit 0xC7
1869 _emit 0xF8
1870 _emit 2
1871 _emit 0
1872 _emit 0
1873 _emit 0
1874 jmp L2
1875 mov res, eax
1876 L2:
1877 }
1878#endif // KMP_ARCH_X86_64
1879#else
1880 /* Note that %eax must be noted as killed (clobbered), because
1881 * the XSR is returned in %eax(%rax) on abort. Other register
1882 * values are restored, so don't need to be killed.
1883 *
1884 * We must also mark 'res' as an input and an output, since otherwise
1885 * 'res=-1' may be dropped as being dead, whereas we do need the
1886 * assignment on the successful (i.e., non-abort) path.
1887 */
1888 __asm__ volatile ("1: .byte 0xC7; .byte 0xF8;\n"
1889 " .long 1f-1b-6\n"
1890 " jmp 2f\n"
1891 "1: movl %%eax,%0\n"
1892 "2:"
1893 :"+r"(res)::"memory","%eax");
1894#endif // KMP_OS_WINDOWS
1895 return res;
1896}
1897
1898/*
1899 Transaction end
1900*/
1901static __inline void _xend()
1902{
1903#if KMP_OS_WINDOWS
1904 __asm {
1905 _emit 0x0f
1906 _emit 0x01
1907 _emit 0xd5
1908 }
1909#else
1910 __asm__ volatile (".byte 0x0f; .byte 0x01; .byte 0xd5" :::"memory");
1911#endif
1912}
1913
1914/*
1915 This is a macro, the argument must be a single byte constant which
1916 can be evaluated by the inline assembler, since it is emitted as a
1917 byte into the assembly code.
1918*/
1919#if KMP_OS_WINDOWS
1920#define _xabort(ARG) \
1921 _asm _emit 0xc6 \
1922 _asm _emit 0xf8 \
1923 _asm _emit ARG
1924#else
1925#define _xabort(ARG) \
1926 __asm__ volatile (".byte 0xC6; .byte 0xF8; .byte " STRINGIZE(ARG) :::"memory");
1927#endif
1928
1929//
1930// Statistics is collected for testing purpose
1931//
1932#if KMP_DEBUG_ADAPTIVE_LOCKS
1933
1934// We accumulate speculative lock statistics when the lock is destroyed.
1935// We keep locks that haven't been destroyed in the liveLocks list
1936// so that we can grab their statistics too.
1937static kmp_adaptive_lock_statistics_t destroyedStats;
1938
1939// To hold the list of live locks.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001940static kmp_adaptive_lock_info_t liveLocks;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001941
1942// A lock so we can safely update the list of locks.
1943static kmp_bootstrap_lock_t chain_lock;
1944
1945// Initialize the list of stats.
1946void
1947__kmp_init_speculative_stats()
1948{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001949 kmp_adaptive_lock_info_t *lck = &liveLocks;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001950
1951 memset( ( void * ) & ( lck->stats ), 0, sizeof( lck->stats ) );
1952 lck->stats.next = lck;
1953 lck->stats.prev = lck;
1954
1955 KMP_ASSERT( lck->stats.next->stats.prev == lck );
1956 KMP_ASSERT( lck->stats.prev->stats.next == lck );
1957
1958 __kmp_init_bootstrap_lock( &chain_lock );
1959
1960}
1961
1962// Insert the lock into the circular list
1963static void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001964__kmp_remember_lock( kmp_adaptive_lock_info_t * lck )
Jim Cownie5e8470a2013-09-27 10:38:44 +00001965{
1966 __kmp_acquire_bootstrap_lock( &chain_lock );
1967
1968 lck->stats.next = liveLocks.stats.next;
1969 lck->stats.prev = &liveLocks;
1970
1971 liveLocks.stats.next = lck;
1972 lck->stats.next->stats.prev = lck;
1973
1974 KMP_ASSERT( lck->stats.next->stats.prev == lck );
1975 KMP_ASSERT( lck->stats.prev->stats.next == lck );
1976
1977 __kmp_release_bootstrap_lock( &chain_lock );
1978}
1979
1980static void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001981__kmp_forget_lock( kmp_adaptive_lock_info_t * lck )
Jim Cownie5e8470a2013-09-27 10:38:44 +00001982{
1983 KMP_ASSERT( lck->stats.next->stats.prev == lck );
1984 KMP_ASSERT( lck->stats.prev->stats.next == lck );
1985
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001986 kmp_adaptive_lock_info_t * n = lck->stats.next;
1987 kmp_adaptive_lock_info_t * p = lck->stats.prev;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001988
1989 n->stats.prev = p;
1990 p->stats.next = n;
1991}
1992
1993static void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001994__kmp_zero_speculative_stats( kmp_adaptive_lock_info_t * lck )
Jim Cownie5e8470a2013-09-27 10:38:44 +00001995{
1996 memset( ( void * )&lck->stats, 0, sizeof( lck->stats ) );
1997 __kmp_remember_lock( lck );
1998}
1999
2000static void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002001__kmp_add_stats( kmp_adaptive_lock_statistics_t * t, kmp_adaptive_lock_info_t * lck )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002002{
2003 kmp_adaptive_lock_statistics_t volatile *s = &lck->stats;
2004
2005 t->nonSpeculativeAcquireAttempts += lck->acquire_attempts;
2006 t->successfulSpeculations += s->successfulSpeculations;
2007 t->hardFailedSpeculations += s->hardFailedSpeculations;
2008 t->softFailedSpeculations += s->softFailedSpeculations;
2009 t->nonSpeculativeAcquires += s->nonSpeculativeAcquires;
2010 t->lemmingYields += s->lemmingYields;
2011}
2012
2013static void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002014__kmp_accumulate_speculative_stats( kmp_adaptive_lock_info_t * lck)
Jim Cownie5e8470a2013-09-27 10:38:44 +00002015{
2016 kmp_adaptive_lock_statistics_t *t = &destroyedStats;
2017
2018 __kmp_acquire_bootstrap_lock( &chain_lock );
2019
2020 __kmp_add_stats( &destroyedStats, lck );
2021 __kmp_forget_lock( lck );
2022
2023 __kmp_release_bootstrap_lock( &chain_lock );
2024}
2025
2026static float
2027percent (kmp_uint32 count, kmp_uint32 total)
2028{
2029 return (total == 0) ? 0.0: (100.0 * count)/total;
2030}
2031
2032static
2033FILE * __kmp_open_stats_file()
2034{
2035 if (strcmp (__kmp_speculative_statsfile, "-") == 0)
2036 return stdout;
2037
2038 size_t buffLen = strlen( __kmp_speculative_statsfile ) + 20;
2039 char buffer[buffLen];
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002040 snprintf (&buffer[0], buffLen, __kmp_speculative_statsfile,
2041 (kmp_int32)getpid());
Jim Cownie5e8470a2013-09-27 10:38:44 +00002042 FILE * result = fopen(&buffer[0], "w");
2043
2044 // Maybe we should issue a warning here...
2045 return result ? result : stdout;
2046}
2047
2048void
2049__kmp_print_speculative_stats()
2050{
2051 if (__kmp_user_lock_kind != lk_adaptive)
2052 return;
2053
2054 FILE * statsFile = __kmp_open_stats_file();
2055
2056 kmp_adaptive_lock_statistics_t total = destroyedStats;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002057 kmp_adaptive_lock_info_t *lck;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002058
2059 for (lck = liveLocks.stats.next; lck != &liveLocks; lck = lck->stats.next) {
2060 __kmp_add_stats( &total, lck );
2061 }
2062 kmp_adaptive_lock_statistics_t *t = &total;
2063 kmp_uint32 totalSections = t->nonSpeculativeAcquires + t->successfulSpeculations;
2064 kmp_uint32 totalSpeculations = t->successfulSpeculations + t->hardFailedSpeculations +
2065 t->softFailedSpeculations;
2066
2067 fprintf ( statsFile, "Speculative lock statistics (all approximate!)\n");
2068 fprintf ( statsFile, " Lock parameters: \n"
2069 " max_soft_retries : %10d\n"
2070 " max_badness : %10d\n",
2071 __kmp_adaptive_backoff_params.max_soft_retries,
2072 __kmp_adaptive_backoff_params.max_badness);
2073 fprintf( statsFile, " Non-speculative acquire attempts : %10d\n", t->nonSpeculativeAcquireAttempts );
2074 fprintf( statsFile, " Total critical sections : %10d\n", totalSections );
2075 fprintf( statsFile, " Successful speculations : %10d (%5.1f%%)\n",
2076 t->successfulSpeculations, percent( t->successfulSpeculations, totalSections ) );
2077 fprintf( statsFile, " Non-speculative acquires : %10d (%5.1f%%)\n",
2078 t->nonSpeculativeAcquires, percent( t->nonSpeculativeAcquires, totalSections ) );
2079 fprintf( statsFile, " Lemming yields : %10d\n\n", t->lemmingYields );
2080
2081 fprintf( statsFile, " Speculative acquire attempts : %10d\n", totalSpeculations );
2082 fprintf( statsFile, " Successes : %10d (%5.1f%%)\n",
2083 t->successfulSpeculations, percent( t->successfulSpeculations, totalSpeculations ) );
2084 fprintf( statsFile, " Soft failures : %10d (%5.1f%%)\n",
2085 t->softFailedSpeculations, percent( t->softFailedSpeculations, totalSpeculations ) );
2086 fprintf( statsFile, " Hard failures : %10d (%5.1f%%)\n",
2087 t->hardFailedSpeculations, percent( t->hardFailedSpeculations, totalSpeculations ) );
2088
2089 if (statsFile != stdout)
2090 fclose( statsFile );
2091}
2092
2093# define KMP_INC_STAT(lck,stat) ( lck->lk.adaptive.stats.stat++ )
2094#else
2095# define KMP_INC_STAT(lck,stat)
2096
2097#endif // KMP_DEBUG_ADAPTIVE_LOCKS
2098
2099static inline bool
2100__kmp_is_unlocked_queuing_lock( kmp_queuing_lock_t *lck )
2101{
2102 // It is enough to check that the head_id is zero.
2103 // We don't also need to check the tail.
2104 bool res = lck->lk.head_id == 0;
2105
2106 // We need a fence here, since we must ensure that no memory operations
2107 // from later in this thread float above that read.
Jim Cownie181b4bb2013-12-23 17:28:57 +00002108#if KMP_COMPILER_ICC
Jim Cownie5e8470a2013-09-27 10:38:44 +00002109 _mm_mfence();
Jim Cownie181b4bb2013-12-23 17:28:57 +00002110#else
2111 __sync_synchronize();
Jim Cownie5e8470a2013-09-27 10:38:44 +00002112#endif
2113
2114 return res;
2115}
2116
2117// Functions for manipulating the badness
2118static __inline void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002119__kmp_update_badness_after_success( kmp_adaptive_lock_t *lck )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002120{
2121 // Reset the badness to zero so we eagerly try to speculate again
2122 lck->lk.adaptive.badness = 0;
2123 KMP_INC_STAT(lck,successfulSpeculations);
2124}
2125
2126// Create a bit mask with one more set bit.
2127static __inline void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002128__kmp_step_badness( kmp_adaptive_lock_t *lck )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002129{
2130 kmp_uint32 newBadness = ( lck->lk.adaptive.badness << 1 ) | 1;
2131 if ( newBadness > lck->lk.adaptive.max_badness) {
2132 return;
2133 } else {
2134 lck->lk.adaptive.badness = newBadness;
2135 }
2136}
2137
2138// Check whether speculation should be attempted.
2139static __inline int
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002140__kmp_should_speculate( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002141{
2142 kmp_uint32 badness = lck->lk.adaptive.badness;
2143 kmp_uint32 attempts= lck->lk.adaptive.acquire_attempts;
2144 int res = (attempts & badness) == 0;
2145 return res;
2146}
2147
2148// Attempt to acquire only the speculative lock.
2149// Does not back off to the non-speculative lock.
2150//
2151static int
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002152__kmp_test_adaptive_lock_only( kmp_adaptive_lock_t * lck, kmp_int32 gtid )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002153{
2154 int retries = lck->lk.adaptive.max_soft_retries;
2155
2156 // We don't explicitly count the start of speculation, rather we record
2157 // the results (success, hard fail, soft fail). The sum of all of those
2158 // is the total number of times we started speculation since all
2159 // speculations must end one of those ways.
2160 do
2161 {
2162 kmp_uint32 status = _xbegin();
2163 // Switch this in to disable actual speculation but exercise
2164 // at least some of the rest of the code. Useful for debugging...
2165 // kmp_uint32 status = _XABORT_NESTED;
2166
2167 if (status == _XBEGIN_STARTED )
2168 { /* We have successfully started speculation
2169 * Check that no-one acquired the lock for real between when we last looked
2170 * and now. This also gets the lock cache line into our read-set,
2171 * which we need so that we'll abort if anyone later claims it for real.
2172 */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002173 if (! __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002174 {
2175 // Lock is now visibly acquired, so someone beat us to it.
2176 // Abort the transaction so we'll restart from _xbegin with the
2177 // failure status.
2178 _xabort(0x01)
2179 KMP_ASSERT2( 0, "should not get here" );
2180 }
2181 return 1; // Lock has been acquired (speculatively)
2182 } else {
2183 // We have aborted, update the statistics
2184 if ( status & SOFT_ABORT_MASK)
2185 {
2186 KMP_INC_STAT(lck,softFailedSpeculations);
2187 // and loop round to retry.
2188 }
2189 else
2190 {
2191 KMP_INC_STAT(lck,hardFailedSpeculations);
2192 // Give up if we had a hard failure.
2193 break;
2194 }
2195 }
2196 } while( retries-- ); // Loop while we have retries, and didn't fail hard.
2197
2198 // Either we had a hard failure or we didn't succeed softly after
2199 // the full set of attempts, so back off the badness.
2200 __kmp_step_badness( lck );
2201 return 0;
2202}
2203
2204// Attempt to acquire the speculative lock, or back off to the non-speculative one
2205// if the speculative lock cannot be acquired.
2206// We can succeed speculatively, non-speculatively, or fail.
2207static int
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002208__kmp_test_adaptive_lock( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002209{
2210 // First try to acquire the lock speculatively
2211 if ( __kmp_should_speculate( lck, gtid ) && __kmp_test_adaptive_lock_only( lck, gtid ) )
2212 return 1;
2213
2214 // Speculative acquisition failed, so try to acquire it non-speculatively.
2215 // Count the non-speculative acquire attempt
2216 lck->lk.adaptive.acquire_attempts++;
2217
2218 // Use base, non-speculative lock.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002219 if ( __kmp_test_queuing_lock( GET_QLK_PTR(lck), gtid ) )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002220 {
2221 KMP_INC_STAT(lck,nonSpeculativeAcquires);
2222 return 1; // Lock is acquired (non-speculatively)
2223 }
2224 else
2225 {
2226 return 0; // Failed to acquire the lock, it's already visibly locked.
2227 }
2228}
2229
2230static int
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002231__kmp_test_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002232{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002233 char const * const func = "omp_test_lock";
2234 if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) {
2235 KMP_FATAL( LockIsUninitialized, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002236 }
2237
2238 int retval = __kmp_test_adaptive_lock( lck, gtid );
2239
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002240 if ( retval ) {
2241 lck->lk.qlk.owner_id = gtid + 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002242 }
2243 return retval;
2244}
2245
2246// Block until we can acquire a speculative, adaptive lock.
2247// We check whether we should be trying to speculate.
2248// If we should be, we check the real lock to see if it is free,
2249// and, if not, pause without attempting to acquire it until it is.
2250// Then we try the speculative acquire.
2251// This means that although we suffer from lemmings a little (
2252// because all we can't acquire the lock speculatively until
2253// the queue of threads waiting has cleared), we don't get into a
2254// state where we can never acquire the lock speculatively (because we
2255// force the queue to clear by preventing new arrivals from entering the
2256// queue).
2257// This does mean that when we're trying to break lemmings, the lock
2258// is no longer fair. However OpenMP makes no guarantee that its
2259// locks are fair, so this isn't a real problem.
2260static void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002261__kmp_acquire_adaptive_lock( kmp_adaptive_lock_t * lck, kmp_int32 gtid )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002262{
2263 if ( __kmp_should_speculate( lck, gtid ) )
2264 {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002265 if ( __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002266 {
2267 if ( __kmp_test_adaptive_lock_only( lck , gtid ) )
2268 return;
2269 // We tried speculation and failed, so give up.
2270 }
2271 else
2272 {
2273 // We can't try speculation until the lock is free, so we
2274 // pause here (without suspending on the queueing lock,
2275 // to allow it to drain, then try again.
2276 // All other threads will also see the same result for
2277 // shouldSpeculate, so will be doing the same if they
2278 // try to claim the lock from now on.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002279 while ( ! __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002280 {
2281 KMP_INC_STAT(lck,lemmingYields);
2282 __kmp_yield (TRUE);
2283 }
2284
2285 if ( __kmp_test_adaptive_lock_only( lck, gtid ) )
2286 return;
2287 }
2288 }
2289
2290 // Speculative acquisition failed, so acquire it non-speculatively.
2291 // Count the non-speculative acquire attempt
2292 lck->lk.adaptive.acquire_attempts++;
2293
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002294 __kmp_acquire_queuing_lock_timed_template<FALSE>( GET_QLK_PTR(lck), gtid );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002295 // We have acquired the base lock, so count that.
2296 KMP_INC_STAT(lck,nonSpeculativeAcquires );
2297}
2298
2299static void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002300__kmp_acquire_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002301{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002302 char const * const func = "omp_set_lock";
2303 if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) {
2304 KMP_FATAL( LockIsUninitialized, func );
2305 }
2306 if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) == gtid ) {
2307 KMP_FATAL( LockIsAlreadyOwned, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002308 }
2309
2310 __kmp_acquire_adaptive_lock( lck, gtid );
2311
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002312 lck->lk.qlk.owner_id = gtid + 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002313}
2314
2315static void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002316__kmp_release_adaptive_lock( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002317{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002318 if ( __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002319 { // If the lock doesn't look claimed we must be speculating.
2320 // (Or the user's code is buggy and they're releasing without locking;
2321 // if we had XTEST we'd be able to check that case...)
2322 _xend(); // Exit speculation
2323 __kmp_update_badness_after_success( lck );
2324 }
2325 else
2326 { // Since the lock *is* visibly locked we're not speculating,
2327 // so should use the underlying lock's release scheme.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002328 __kmp_release_queuing_lock( GET_QLK_PTR(lck), gtid );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002329 }
2330}
2331
2332static void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002333__kmp_release_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002334{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002335 char const * const func = "omp_unset_lock";
2336 KMP_MB(); /* in case another processor initialized lock */
2337 if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) {
2338 KMP_FATAL( LockIsUninitialized, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002339 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002340 if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) == -1 ) {
2341 KMP_FATAL( LockUnsettingFree, func );
2342 }
2343 if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) != gtid ) {
2344 KMP_FATAL( LockUnsettingSetByAnother, func );
2345 }
2346 lck->lk.qlk.owner_id = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002347 __kmp_release_adaptive_lock( lck, gtid );
2348}
2349
2350static void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002351__kmp_init_adaptive_lock( kmp_adaptive_lock_t *lck )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002352{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002353 __kmp_init_queuing_lock( GET_QLK_PTR(lck) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002354 lck->lk.adaptive.badness = 0;
2355 lck->lk.adaptive.acquire_attempts = 0; //nonSpeculativeAcquireAttempts = 0;
2356 lck->lk.adaptive.max_soft_retries = __kmp_adaptive_backoff_params.max_soft_retries;
2357 lck->lk.adaptive.max_badness = __kmp_adaptive_backoff_params.max_badness;
2358#if KMP_DEBUG_ADAPTIVE_LOCKS
2359 __kmp_zero_speculative_stats( &lck->lk.adaptive );
2360#endif
2361 KA_TRACE(1000, ("__kmp_init_adaptive_lock: lock %p initialized\n", lck));
2362}
2363
2364static void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002365__kmp_init_adaptive_lock_with_checks( kmp_adaptive_lock_t * lck )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002366{
2367 __kmp_init_adaptive_lock( lck );
2368}
2369
2370static void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002371__kmp_destroy_adaptive_lock( kmp_adaptive_lock_t *lck )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002372{
2373#if KMP_DEBUG_ADAPTIVE_LOCKS
2374 __kmp_accumulate_speculative_stats( &lck->lk.adaptive );
2375#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002376 __kmp_destroy_queuing_lock (GET_QLK_PTR(lck));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002377 // Nothing needed for the speculative part.
2378}
2379
2380static void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002381__kmp_destroy_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002382{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002383 char const * const func = "omp_destroy_lock";
2384 if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) {
2385 KMP_FATAL( LockIsUninitialized, func );
2386 }
2387 if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) != -1 ) {
2388 KMP_FATAL( LockStillOwned, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002389 }
2390 __kmp_destroy_adaptive_lock( lck );
2391}
2392
2393
2394#endif // KMP_USE_ADAPTIVE_LOCKS
2395
2396
2397/* ------------------------------------------------------------------------ */
2398/* DRDPA ticket locks */
2399/* "DRDPA" means Dynamically Reconfigurable Distributed Polling Area */
2400
2401static kmp_int32
2402__kmp_get_drdpa_lock_owner( kmp_drdpa_lock_t *lck )
2403{
2404 return TCR_4( lck->lk.owner_id ) - 1;
2405}
2406
2407static inline bool
2408__kmp_is_drdpa_lock_nestable( kmp_drdpa_lock_t *lck )
2409{
2410 return lck->lk.depth_locked != -1;
2411}
2412
2413__forceinline static void
2414__kmp_acquire_drdpa_lock_timed_template( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2415{
2416 kmp_uint64 ticket = KMP_TEST_THEN_INC64((kmp_int64 *)&lck->lk.next_ticket);
2417 kmp_uint64 mask = TCR_8(lck->lk.mask); // volatile load
2418 volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls
2419 = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2420 TCR_PTR(lck->lk.polls); // volatile load
2421
2422#ifdef USE_LOCK_PROFILE
2423 if (TCR_8(polls[ticket & mask].poll) != ticket)
2424 __kmp_printf("LOCK CONTENTION: %p\n", lck);
2425 /* else __kmp_printf( "." );*/
2426#endif /* USE_LOCK_PROFILE */
2427
2428 //
2429 // Now spin-wait, but reload the polls pointer and mask, in case the
2430 // polling area has been reconfigured. Unless it is reconfigured, the
2431 // reloads stay in L1 cache and are cheap.
2432 //
2433 // Keep this code in sync with KMP_WAIT_YIELD, in kmp_dispatch.c !!!
2434 //
2435 // The current implementation of KMP_WAIT_YIELD doesn't allow for mask
2436 // and poll to be re-read every spin iteration.
2437 //
2438 kmp_uint32 spins;
2439
2440 KMP_FSYNC_PREPARE(lck);
2441 KMP_INIT_YIELD(spins);
2442 while (TCR_8(polls[ticket & mask]).poll < ticket) { // volatile load
Jim Cownie5e8470a2013-09-27 10:38:44 +00002443 // If we are oversubscribed,
Alp Toker8f2d3f02014-02-24 10:40:15 +00002444 // or have waited a bit (and KMP_LIBRARY=turnaround), then yield.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002445 // CPU Pause is in the macros for yield.
2446 //
2447 KMP_YIELD(TCR_4(__kmp_nth)
2448 > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc));
2449 KMP_YIELD_SPIN(spins);
2450
2451 // Re-read the mask and the poll pointer from the lock structure.
2452 //
2453 // Make certain that "mask" is read before "polls" !!!
2454 //
2455 // If another thread picks reconfigures the polling area and updates
2456 // their values, and we get the new value of mask and the old polls
2457 // pointer, we could access memory beyond the end of the old polling
2458 // area.
2459 //
2460 mask = TCR_8(lck->lk.mask); // volatile load
2461 polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2462 TCR_PTR(lck->lk.polls); // volatile load
2463 }
2464
2465 //
2466 // Critical section starts here
2467 //
2468 KMP_FSYNC_ACQUIRED(lck);
2469 KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld acquired lock %p\n",
2470 ticket, lck));
2471 lck->lk.now_serving = ticket; // non-volatile store
2472
2473 //
2474 // Deallocate a garbage polling area if we know that we are the last
2475 // thread that could possibly access it.
2476 //
2477 // The >= check is in case __kmp_test_drdpa_lock() allocated the cleanup
2478 // ticket.
2479 //
2480 if ((lck->lk.old_polls != NULL) && (ticket >= lck->lk.cleanup_ticket)) {
2481 __kmp_free((void *)lck->lk.old_polls);
2482 lck->lk.old_polls = NULL;
2483 lck->lk.cleanup_ticket = 0;
2484 }
2485
2486 //
2487 // Check to see if we should reconfigure the polling area.
2488 // If there is still a garbage polling area to be deallocated from a
2489 // previous reconfiguration, let a later thread reconfigure it.
2490 //
2491 if (lck->lk.old_polls == NULL) {
2492 bool reconfigure = false;
2493 volatile struct kmp_base_drdpa_lock::kmp_lock_poll *old_polls = polls;
2494 kmp_uint32 num_polls = TCR_4(lck->lk.num_polls);
2495
2496 if (TCR_4(__kmp_nth)
2497 > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) {
2498 //
2499 // We are in oversubscription mode. Contract the polling area
2500 // down to a single location, if that hasn't been done already.
2501 //
2502 if (num_polls > 1) {
2503 reconfigure = true;
2504 num_polls = TCR_4(lck->lk.num_polls);
2505 mask = 0;
2506 num_polls = 1;
2507 polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2508 __kmp_allocate(num_polls * sizeof(*polls));
2509 polls[0].poll = ticket;
2510 }
2511 }
2512 else {
2513 //
2514 // We are in under/fully subscribed mode. Check the number of
2515 // threads waiting on the lock. The size of the polling area
2516 // should be at least the number of threads waiting.
2517 //
2518 kmp_uint64 num_waiting = TCR_8(lck->lk.next_ticket) - ticket - 1;
2519 if (num_waiting > num_polls) {
2520 kmp_uint32 old_num_polls = num_polls;
2521 reconfigure = true;
2522 do {
2523 mask = (mask << 1) | 1;
2524 num_polls *= 2;
2525 } while (num_polls <= num_waiting);
2526
2527 //
2528 // Allocate the new polling area, and copy the relevant portion
2529 // of the old polling area to the new area. __kmp_allocate()
2530 // zeroes the memory it allocates, and most of the old area is
2531 // just zero padding, so we only copy the release counters.
2532 //
2533 polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2534 __kmp_allocate(num_polls * sizeof(*polls));
2535 kmp_uint32 i;
2536 for (i = 0; i < old_num_polls; i++) {
2537 polls[i].poll = old_polls[i].poll;
2538 }
2539 }
2540 }
2541
2542 if (reconfigure) {
2543 //
2544 // Now write the updated fields back to the lock structure.
2545 //
2546 // Make certain that "polls" is written before "mask" !!!
2547 //
2548 // If another thread picks up the new value of mask and the old
2549 // polls pointer , it could access memory beyond the end of the
2550 // old polling area.
2551 //
2552 // On x86, we need memory fences.
2553 //
2554 KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld reconfiguring lock %p to %d polls\n",
2555 ticket, lck, num_polls));
2556
2557 lck->lk.old_polls = old_polls; // non-volatile store
2558 lck->lk.polls = polls; // volatile store
2559
2560 KMP_MB();
2561
2562 lck->lk.num_polls = num_polls; // non-volatile store
2563 lck->lk.mask = mask; // volatile store
2564
2565 KMP_MB();
2566
2567 //
2568 // Only after the new polling area and mask have been flushed
2569 // to main memory can we update the cleanup ticket field.
2570 //
2571 // volatile load / non-volatile store
2572 //
2573 lck->lk.cleanup_ticket = TCR_8(lck->lk.next_ticket);
2574 }
2575 }
2576}
2577
2578void
2579__kmp_acquire_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2580{
2581 __kmp_acquire_drdpa_lock_timed_template( lck, gtid );
2582}
2583
2584static void
2585__kmp_acquire_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2586{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002587 char const * const func = "omp_set_lock";
2588 if ( lck->lk.initialized != lck ) {
2589 KMP_FATAL( LockIsUninitialized, func );
2590 }
2591 if ( __kmp_is_drdpa_lock_nestable( lck ) ) {
2592 KMP_FATAL( LockNestableUsedAsSimple, func );
2593 }
2594 if ( ( gtid >= 0 ) && ( __kmp_get_drdpa_lock_owner( lck ) == gtid ) ) {
2595 KMP_FATAL( LockIsAlreadyOwned, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002596 }
2597
2598 __kmp_acquire_drdpa_lock( lck, gtid );
2599
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002600 lck->lk.owner_id = gtid + 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002601}
2602
2603int
2604__kmp_test_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2605{
2606 //
2607 // First get a ticket, then read the polls pointer and the mask.
2608 // The polls pointer must be read before the mask!!! (See above)
2609 //
2610 kmp_uint64 ticket = TCR_8(lck->lk.next_ticket); // volatile load
2611 volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls
2612 = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2613 TCR_PTR(lck->lk.polls); // volatile load
2614 kmp_uint64 mask = TCR_8(lck->lk.mask); // volatile load
2615 if (TCR_8(polls[ticket & mask].poll) == ticket) {
2616 kmp_uint64 next_ticket = ticket + 1;
2617 if (KMP_COMPARE_AND_STORE_ACQ64((kmp_int64 *)&lck->lk.next_ticket,
2618 ticket, next_ticket)) {
2619 KMP_FSYNC_ACQUIRED(lck);
2620 KA_TRACE(1000, ("__kmp_test_drdpa_lock: ticket #%lld acquired lock %p\n",
2621 ticket, lck));
2622 lck->lk.now_serving = ticket; // non-volatile store
2623
2624 //
Alp Toker8f2d3f02014-02-24 10:40:15 +00002625 // Since no threads are waiting, there is no possibility that
Jim Cownie5e8470a2013-09-27 10:38:44 +00002626 // we would want to reconfigure the polling area. We might
2627 // have the cleanup ticket value (which says that it is now
2628 // safe to deallocate old_polls), but we'll let a later thread
2629 // which calls __kmp_acquire_lock do that - this routine
2630 // isn't supposed to block, and we would risk blocks if we
2631 // called __kmp_free() to do the deallocation.
2632 //
2633 return TRUE;
2634 }
2635 }
2636 return FALSE;
2637}
2638
2639static int
2640__kmp_test_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2641{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002642 char const * const func = "omp_test_lock";
2643 if ( lck->lk.initialized != lck ) {
2644 KMP_FATAL( LockIsUninitialized, func );
2645 }
2646 if ( __kmp_is_drdpa_lock_nestable( lck ) ) {
2647 KMP_FATAL( LockNestableUsedAsSimple, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002648 }
2649
2650 int retval = __kmp_test_drdpa_lock( lck, gtid );
2651
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002652 if ( retval ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002653 lck->lk.owner_id = gtid + 1;
2654 }
2655 return retval;
2656}
2657
2658void
2659__kmp_release_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2660{
2661 //
2662 // Read the ticket value from the lock data struct, then the polls
2663 // pointer and the mask. The polls pointer must be read before the
2664 // mask!!! (See above)
2665 //
2666 kmp_uint64 ticket = lck->lk.now_serving + 1; // non-volatile load
2667 volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls
2668 = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2669 TCR_PTR(lck->lk.polls); // volatile load
2670 kmp_uint64 mask = TCR_8(lck->lk.mask); // volatile load
2671 KA_TRACE(1000, ("__kmp_release_drdpa_lock: ticket #%lld released lock %p\n",
2672 ticket - 1, lck));
2673 KMP_FSYNC_RELEASING(lck);
2674 KMP_ST_REL64(&(polls[ticket & mask].poll), ticket); // volatile store
2675}
2676
2677static void
2678__kmp_release_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2679{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002680 char const * const func = "omp_unset_lock";
2681 KMP_MB(); /* in case another processor initialized lock */
2682 if ( lck->lk.initialized != lck ) {
2683 KMP_FATAL( LockIsUninitialized, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002684 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002685 if ( __kmp_is_drdpa_lock_nestable( lck ) ) {
2686 KMP_FATAL( LockNestableUsedAsSimple, func );
2687 }
2688 if ( __kmp_get_drdpa_lock_owner( lck ) == -1 ) {
2689 KMP_FATAL( LockUnsettingFree, func );
2690 }
2691 if ( ( gtid >= 0 ) && ( __kmp_get_drdpa_lock_owner( lck ) >= 0 )
2692 && ( __kmp_get_drdpa_lock_owner( lck ) != gtid ) ) {
2693 KMP_FATAL( LockUnsettingSetByAnother, func );
2694 }
2695 lck->lk.owner_id = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002696 __kmp_release_drdpa_lock( lck, gtid );
2697}
2698
2699void
2700__kmp_init_drdpa_lock( kmp_drdpa_lock_t *lck )
2701{
2702 lck->lk.location = NULL;
2703 lck->lk.mask = 0;
2704 lck->lk.num_polls = 1;
2705 lck->lk.polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2706 __kmp_allocate(lck->lk.num_polls * sizeof(*(lck->lk.polls)));
2707 lck->lk.cleanup_ticket = 0;
2708 lck->lk.old_polls = NULL;
2709 lck->lk.next_ticket = 0;
2710 lck->lk.now_serving = 0;
2711 lck->lk.owner_id = 0; // no thread owns the lock.
2712 lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks.
2713 lck->lk.initialized = lck;
2714
2715 KA_TRACE(1000, ("__kmp_init_drdpa_lock: lock %p initialized\n", lck));
2716}
2717
2718static void
2719__kmp_init_drdpa_lock_with_checks( kmp_drdpa_lock_t * lck )
2720{
2721 __kmp_init_drdpa_lock( lck );
2722}
2723
2724void
2725__kmp_destroy_drdpa_lock( kmp_drdpa_lock_t *lck )
2726{
2727 lck->lk.initialized = NULL;
2728 lck->lk.location = NULL;
2729 if (lck->lk.polls != NULL) {
2730 __kmp_free((void *)lck->lk.polls);
2731 lck->lk.polls = NULL;
2732 }
2733 if (lck->lk.old_polls != NULL) {
2734 __kmp_free((void *)lck->lk.old_polls);
2735 lck->lk.old_polls = NULL;
2736 }
2737 lck->lk.mask = 0;
2738 lck->lk.num_polls = 0;
2739 lck->lk.cleanup_ticket = 0;
2740 lck->lk.next_ticket = 0;
2741 lck->lk.now_serving = 0;
2742 lck->lk.owner_id = 0;
2743 lck->lk.depth_locked = -1;
2744}
2745
2746static void
2747__kmp_destroy_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck )
2748{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002749 char const * const func = "omp_destroy_lock";
2750 if ( lck->lk.initialized != lck ) {
2751 KMP_FATAL( LockIsUninitialized, func );
2752 }
2753 if ( __kmp_is_drdpa_lock_nestable( lck ) ) {
2754 KMP_FATAL( LockNestableUsedAsSimple, func );
2755 }
2756 if ( __kmp_get_drdpa_lock_owner( lck ) != -1 ) {
2757 KMP_FATAL( LockStillOwned, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002758 }
2759 __kmp_destroy_drdpa_lock( lck );
2760}
2761
2762
2763//
2764// nested drdpa ticket locks
2765//
2766
2767void
2768__kmp_acquire_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2769{
2770 KMP_DEBUG_ASSERT( gtid >= 0 );
2771
2772 if ( __kmp_get_drdpa_lock_owner( lck ) == gtid ) {
2773 lck->lk.depth_locked += 1;
2774 }
2775 else {
2776 __kmp_acquire_drdpa_lock_timed_template( lck, gtid );
2777 KMP_MB();
2778 lck->lk.depth_locked = 1;
2779 KMP_MB();
2780 lck->lk.owner_id = gtid + 1;
2781 }
2782}
2783
2784static void
2785__kmp_acquire_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2786{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002787 char const * const func = "omp_set_nest_lock";
2788 if ( lck->lk.initialized != lck ) {
2789 KMP_FATAL( LockIsUninitialized, func );
2790 }
2791 if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) {
2792 KMP_FATAL( LockSimpleUsedAsNestable, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002793 }
2794 __kmp_acquire_nested_drdpa_lock( lck, gtid );
2795}
2796
2797int
2798__kmp_test_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2799{
2800 int retval;
2801
2802 KMP_DEBUG_ASSERT( gtid >= 0 );
2803
2804 if ( __kmp_get_drdpa_lock_owner( lck ) == gtid ) {
2805 retval = ++lck->lk.depth_locked;
2806 }
2807 else if ( !__kmp_test_drdpa_lock( lck, gtid ) ) {
2808 retval = 0;
2809 }
2810 else {
2811 KMP_MB();
2812 retval = lck->lk.depth_locked = 1;
2813 KMP_MB();
2814 lck->lk.owner_id = gtid + 1;
2815 }
2816 return retval;
2817}
2818
2819static int
2820__kmp_test_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2821{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002822 char const * const func = "omp_test_nest_lock";
2823 if ( lck->lk.initialized != lck ) {
2824 KMP_FATAL( LockIsUninitialized, func );
2825 }
2826 if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) {
2827 KMP_FATAL( LockSimpleUsedAsNestable, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002828 }
2829 return __kmp_test_nested_drdpa_lock( lck, gtid );
2830}
2831
2832void
2833__kmp_release_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2834{
2835 KMP_DEBUG_ASSERT( gtid >= 0 );
2836
2837 KMP_MB();
2838 if ( --(lck->lk.depth_locked) == 0 ) {
2839 KMP_MB();
2840 lck->lk.owner_id = 0;
2841 __kmp_release_drdpa_lock( lck, gtid );
2842 }
2843}
2844
2845static void
2846__kmp_release_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2847{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002848 char const * const func = "omp_unset_nest_lock";
2849 KMP_MB(); /* in case another processor initialized lock */
2850 if ( lck->lk.initialized != lck ) {
2851 KMP_FATAL( LockIsUninitialized, func );
2852 }
2853 if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) {
2854 KMP_FATAL( LockSimpleUsedAsNestable, func );
2855 }
2856 if ( __kmp_get_drdpa_lock_owner( lck ) == -1 ) {
2857 KMP_FATAL( LockUnsettingFree, func );
2858 }
2859 if ( __kmp_get_drdpa_lock_owner( lck ) != gtid ) {
2860 KMP_FATAL( LockUnsettingSetByAnother, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002861 }
2862 __kmp_release_nested_drdpa_lock( lck, gtid );
2863}
2864
2865void
2866__kmp_init_nested_drdpa_lock( kmp_drdpa_lock_t * lck )
2867{
2868 __kmp_init_drdpa_lock( lck );
2869 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
2870}
2871
2872static void
2873__kmp_init_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t * lck )
2874{
2875 __kmp_init_nested_drdpa_lock( lck );
2876}
2877
2878void
2879__kmp_destroy_nested_drdpa_lock( kmp_drdpa_lock_t *lck )
2880{
2881 __kmp_destroy_drdpa_lock( lck );
2882 lck->lk.depth_locked = 0;
2883}
2884
2885static void
2886__kmp_destroy_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck )
2887{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002888 char const * const func = "omp_destroy_nest_lock";
2889 if ( lck->lk.initialized != lck ) {
2890 KMP_FATAL( LockIsUninitialized, func );
2891 }
2892 if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) {
2893 KMP_FATAL( LockSimpleUsedAsNestable, func );
2894 }
2895 if ( __kmp_get_drdpa_lock_owner( lck ) != -1 ) {
2896 KMP_FATAL( LockStillOwned, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002897 }
2898 __kmp_destroy_nested_drdpa_lock( lck );
2899}
2900
2901
2902//
2903// access functions to fields which don't exist for all lock kinds.
2904//
2905
2906static int
2907__kmp_is_drdpa_lock_initialized( kmp_drdpa_lock_t *lck )
2908{
2909 return lck == lck->lk.initialized;
2910}
2911
2912static const ident_t *
2913__kmp_get_drdpa_lock_location( kmp_drdpa_lock_t *lck )
2914{
2915 return lck->lk.location;
2916}
2917
2918static void
2919__kmp_set_drdpa_lock_location( kmp_drdpa_lock_t *lck, const ident_t *loc )
2920{
2921 lck->lk.location = loc;
2922}
2923
2924static kmp_lock_flags_t
2925__kmp_get_drdpa_lock_flags( kmp_drdpa_lock_t *lck )
2926{
2927 return lck->lk.flags;
2928}
2929
2930static void
2931__kmp_set_drdpa_lock_flags( kmp_drdpa_lock_t *lck, kmp_lock_flags_t flags )
2932{
2933 lck->lk.flags = flags;
2934}
2935
2936/* ------------------------------------------------------------------------ */
2937/* user locks
2938 *
2939 * They are implemented as a table of function pointers which are set to the
2940 * lock functions of the appropriate kind, once that has been determined.
2941 */
2942
2943enum kmp_lock_kind __kmp_user_lock_kind = lk_default;
2944
2945size_t __kmp_base_user_lock_size = 0;
2946size_t __kmp_user_lock_size = 0;
2947
2948kmp_int32 ( *__kmp_get_user_lock_owner_ )( kmp_user_lock_p lck ) = NULL;
2949void ( *__kmp_acquire_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
2950
2951int ( *__kmp_test_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
2952void ( *__kmp_release_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
2953void ( *__kmp_init_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL;
2954void ( *__kmp_destroy_user_lock_ )( kmp_user_lock_p lck ) = NULL;
2955void ( *__kmp_destroy_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL;
2956void ( *__kmp_acquire_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
2957
2958int ( *__kmp_test_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
2959void ( *__kmp_release_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
2960void ( *__kmp_init_nested_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL;
2961void ( *__kmp_destroy_nested_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL;
2962
2963int ( *__kmp_is_user_lock_initialized_ )( kmp_user_lock_p lck ) = NULL;
2964const ident_t * ( *__kmp_get_user_lock_location_ )( kmp_user_lock_p lck ) = NULL;
2965void ( *__kmp_set_user_lock_location_ )( kmp_user_lock_p lck, const ident_t *loc ) = NULL;
2966kmp_lock_flags_t ( *__kmp_get_user_lock_flags_ )( kmp_user_lock_p lck ) = NULL;
2967void ( *__kmp_set_user_lock_flags_ )( kmp_user_lock_p lck, kmp_lock_flags_t flags ) = NULL;
2968
2969void __kmp_set_user_lock_vptrs( kmp_lock_kind_t user_lock_kind )
2970{
2971 switch ( user_lock_kind ) {
2972 case lk_default:
2973 default:
2974 KMP_ASSERT( 0 );
2975
2976 case lk_tas: {
2977 __kmp_base_user_lock_size = sizeof( kmp_base_tas_lock_t );
2978 __kmp_user_lock_size = sizeof( kmp_tas_lock_t );
2979
2980 __kmp_get_user_lock_owner_ =
2981 ( kmp_int32 ( * )( kmp_user_lock_p ) )
2982 ( &__kmp_get_tas_lock_owner );
2983
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002984 if ( __kmp_env_consistency_check ) {
2985 KMP_BIND_USER_LOCK_WITH_CHECKS(tas);
2986 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(tas);
2987 }
2988 else {
2989 KMP_BIND_USER_LOCK(tas);
2990 KMP_BIND_NESTED_USER_LOCK(tas);
2991 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002992
2993 __kmp_destroy_user_lock_ =
2994 ( void ( * )( kmp_user_lock_p ) )
2995 ( &__kmp_destroy_tas_lock );
2996
Jim Cownie5e8470a2013-09-27 10:38:44 +00002997 __kmp_is_user_lock_initialized_ =
2998 ( int ( * )( kmp_user_lock_p ) ) NULL;
2999
3000 __kmp_get_user_lock_location_ =
3001 ( const ident_t * ( * )( kmp_user_lock_p ) ) NULL;
3002
3003 __kmp_set_user_lock_location_ =
3004 ( void ( * )( kmp_user_lock_p, const ident_t * ) ) NULL;
3005
3006 __kmp_get_user_lock_flags_ =
3007 ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) NULL;
3008
3009 __kmp_set_user_lock_flags_ =
3010 ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) NULL;
3011 }
3012 break;
3013
Jim Cownie181b4bb2013-12-23 17:28:57 +00003014#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
Jim Cownie5e8470a2013-09-27 10:38:44 +00003015
3016 case lk_futex: {
3017 __kmp_base_user_lock_size = sizeof( kmp_base_futex_lock_t );
3018 __kmp_user_lock_size = sizeof( kmp_futex_lock_t );
3019
3020 __kmp_get_user_lock_owner_ =
3021 ( kmp_int32 ( * )( kmp_user_lock_p ) )
3022 ( &__kmp_get_futex_lock_owner );
3023
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003024 if ( __kmp_env_consistency_check ) {
3025 KMP_BIND_USER_LOCK_WITH_CHECKS(futex);
3026 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(futex);
3027 }
3028 else {
3029 KMP_BIND_USER_LOCK(futex);
3030 KMP_BIND_NESTED_USER_LOCK(futex);
3031 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003032
3033 __kmp_destroy_user_lock_ =
3034 ( void ( * )( kmp_user_lock_p ) )
3035 ( &__kmp_destroy_futex_lock );
3036
Jim Cownie5e8470a2013-09-27 10:38:44 +00003037 __kmp_is_user_lock_initialized_ =
3038 ( int ( * )( kmp_user_lock_p ) ) NULL;
3039
3040 __kmp_get_user_lock_location_ =
3041 ( const ident_t * ( * )( kmp_user_lock_p ) ) NULL;
3042
3043 __kmp_set_user_lock_location_ =
3044 ( void ( * )( kmp_user_lock_p, const ident_t * ) ) NULL;
3045
3046 __kmp_get_user_lock_flags_ =
3047 ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) NULL;
3048
3049 __kmp_set_user_lock_flags_ =
3050 ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) NULL;
3051 }
3052 break;
3053
Jim Cownie181b4bb2013-12-23 17:28:57 +00003054#endif // KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
Jim Cownie5e8470a2013-09-27 10:38:44 +00003055
3056 case lk_ticket: {
3057 __kmp_base_user_lock_size = sizeof( kmp_base_ticket_lock_t );
3058 __kmp_user_lock_size = sizeof( kmp_ticket_lock_t );
3059
3060 __kmp_get_user_lock_owner_ =
3061 ( kmp_int32 ( * )( kmp_user_lock_p ) )
3062 ( &__kmp_get_ticket_lock_owner );
3063
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003064 if ( __kmp_env_consistency_check ) {
3065 KMP_BIND_USER_LOCK_WITH_CHECKS(ticket);
3066 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(ticket);
3067 }
3068 else {
3069 KMP_BIND_USER_LOCK(ticket);
3070 KMP_BIND_NESTED_USER_LOCK(ticket);
3071 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003072
3073 __kmp_destroy_user_lock_ =
3074 ( void ( * )( kmp_user_lock_p ) )
3075 ( &__kmp_destroy_ticket_lock );
3076
Jim Cownie5e8470a2013-09-27 10:38:44 +00003077 __kmp_is_user_lock_initialized_ =
3078 ( int ( * )( kmp_user_lock_p ) )
3079 ( &__kmp_is_ticket_lock_initialized );
3080
3081 __kmp_get_user_lock_location_ =
3082 ( const ident_t * ( * )( kmp_user_lock_p ) )
3083 ( &__kmp_get_ticket_lock_location );
3084
3085 __kmp_set_user_lock_location_ =
3086 ( void ( * )( kmp_user_lock_p, const ident_t * ) )
3087 ( &__kmp_set_ticket_lock_location );
3088
3089 __kmp_get_user_lock_flags_ =
3090 ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) )
3091 ( &__kmp_get_ticket_lock_flags );
3092
3093 __kmp_set_user_lock_flags_ =
3094 ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) )
3095 ( &__kmp_set_ticket_lock_flags );
3096 }
3097 break;
3098
3099 case lk_queuing: {
3100 __kmp_base_user_lock_size = sizeof( kmp_base_queuing_lock_t );
3101 __kmp_user_lock_size = sizeof( kmp_queuing_lock_t );
3102
3103 __kmp_get_user_lock_owner_ =
3104 ( kmp_int32 ( * )( kmp_user_lock_p ) )
3105 ( &__kmp_get_queuing_lock_owner );
3106
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003107 if ( __kmp_env_consistency_check ) {
3108 KMP_BIND_USER_LOCK_WITH_CHECKS(queuing);
3109 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(queuing);
3110 }
3111 else {
3112 KMP_BIND_USER_LOCK(queuing);
3113 KMP_BIND_NESTED_USER_LOCK(queuing);
3114 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003115
3116 __kmp_destroy_user_lock_ =
3117 ( void ( * )( kmp_user_lock_p ) )
3118 ( &__kmp_destroy_queuing_lock );
3119
Jim Cownie5e8470a2013-09-27 10:38:44 +00003120 __kmp_is_user_lock_initialized_ =
3121 ( int ( * )( kmp_user_lock_p ) )
3122 ( &__kmp_is_queuing_lock_initialized );
3123
3124 __kmp_get_user_lock_location_ =
3125 ( const ident_t * ( * )( kmp_user_lock_p ) )
3126 ( &__kmp_get_queuing_lock_location );
3127
3128 __kmp_set_user_lock_location_ =
3129 ( void ( * )( kmp_user_lock_p, const ident_t * ) )
3130 ( &__kmp_set_queuing_lock_location );
3131
3132 __kmp_get_user_lock_flags_ =
3133 ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) )
3134 ( &__kmp_get_queuing_lock_flags );
3135
3136 __kmp_set_user_lock_flags_ =
3137 ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) )
3138 ( &__kmp_set_queuing_lock_flags );
3139 }
3140 break;
3141
3142#if KMP_USE_ADAPTIVE_LOCKS
3143 case lk_adaptive: {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003144 __kmp_base_user_lock_size = sizeof( kmp_base_adaptive_lock_t );
3145 __kmp_user_lock_size = sizeof( kmp_adaptive_lock_t );
Jim Cownie5e8470a2013-09-27 10:38:44 +00003146
3147 __kmp_get_user_lock_owner_ =
3148 ( kmp_int32 ( * )( kmp_user_lock_p ) )
3149 ( &__kmp_get_queuing_lock_owner );
3150
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003151 if ( __kmp_env_consistency_check ) {
3152 KMP_BIND_USER_LOCK_WITH_CHECKS(adaptive);
3153 }
3154 else {
3155 KMP_BIND_USER_LOCK(adaptive);
3156 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003157
3158 __kmp_destroy_user_lock_ =
3159 ( void ( * )( kmp_user_lock_p ) )
3160 ( &__kmp_destroy_adaptive_lock );
3161
3162 __kmp_is_user_lock_initialized_ =
3163 ( int ( * )( kmp_user_lock_p ) )
3164 ( &__kmp_is_queuing_lock_initialized );
3165
3166 __kmp_get_user_lock_location_ =
3167 ( const ident_t * ( * )( kmp_user_lock_p ) )
3168 ( &__kmp_get_queuing_lock_location );
3169
3170 __kmp_set_user_lock_location_ =
3171 ( void ( * )( kmp_user_lock_p, const ident_t * ) )
3172 ( &__kmp_set_queuing_lock_location );
3173
3174 __kmp_get_user_lock_flags_ =
3175 ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) )
3176 ( &__kmp_get_queuing_lock_flags );
3177
3178 __kmp_set_user_lock_flags_ =
3179 ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) )
3180 ( &__kmp_set_queuing_lock_flags );
3181
3182 }
3183 break;
3184#endif // KMP_USE_ADAPTIVE_LOCKS
3185
3186 case lk_drdpa: {
3187 __kmp_base_user_lock_size = sizeof( kmp_base_drdpa_lock_t );
3188 __kmp_user_lock_size = sizeof( kmp_drdpa_lock_t );
3189
3190 __kmp_get_user_lock_owner_ =
3191 ( kmp_int32 ( * )( kmp_user_lock_p ) )
3192 ( &__kmp_get_drdpa_lock_owner );
3193
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003194 if ( __kmp_env_consistency_check ) {
3195 KMP_BIND_USER_LOCK_WITH_CHECKS(drdpa);
3196 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(drdpa);
3197 }
3198 else {
3199 KMP_BIND_USER_LOCK(drdpa);
3200 KMP_BIND_NESTED_USER_LOCK(drdpa);
3201 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003202
3203 __kmp_destroy_user_lock_ =
3204 ( void ( * )( kmp_user_lock_p ) )
3205 ( &__kmp_destroy_drdpa_lock );
3206
Jim Cownie5e8470a2013-09-27 10:38:44 +00003207 __kmp_is_user_lock_initialized_ =
3208 ( int ( * )( kmp_user_lock_p ) )
3209 ( &__kmp_is_drdpa_lock_initialized );
3210
3211 __kmp_get_user_lock_location_ =
3212 ( const ident_t * ( * )( kmp_user_lock_p ) )
3213 ( &__kmp_get_drdpa_lock_location );
3214
3215 __kmp_set_user_lock_location_ =
3216 ( void ( * )( kmp_user_lock_p, const ident_t * ) )
3217 ( &__kmp_set_drdpa_lock_location );
3218
3219 __kmp_get_user_lock_flags_ =
3220 ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) )
3221 ( &__kmp_get_drdpa_lock_flags );
3222
3223 __kmp_set_user_lock_flags_ =
3224 ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) )
3225 ( &__kmp_set_drdpa_lock_flags );
3226 }
3227 break;
3228 }
3229}
3230
3231
3232// ----------------------------------------------------------------------------
3233// User lock table & lock allocation
3234
3235kmp_lock_table_t __kmp_user_lock_table = { 1, 0, NULL };
3236kmp_user_lock_p __kmp_lock_pool = NULL;
3237
3238// Lock block-allocation support.
3239kmp_block_of_locks* __kmp_lock_blocks = NULL;
3240int __kmp_num_locks_in_block = 1; // FIXME - tune this value
3241
3242static kmp_lock_index_t
3243__kmp_lock_table_insert( kmp_user_lock_p lck )
3244{
3245 // Assume that kmp_global_lock is held upon entry/exit.
3246 kmp_lock_index_t index;
3247 if ( __kmp_user_lock_table.used >= __kmp_user_lock_table.allocated ) {
3248 kmp_lock_index_t size;
3249 kmp_user_lock_p *table;
3250 kmp_lock_index_t i;
3251 // Reallocate lock table.
3252 if ( __kmp_user_lock_table.allocated == 0 ) {
3253 size = 1024;
3254 }
3255 else {
3256 size = __kmp_user_lock_table.allocated * 2;
3257 }
3258 table = (kmp_user_lock_p *)__kmp_allocate( sizeof( kmp_user_lock_p ) * size );
3259 memcpy( table + 1, __kmp_user_lock_table.table + 1, sizeof( kmp_user_lock_p ) * ( __kmp_user_lock_table.used - 1 ) );
3260 table[ 0 ] = (kmp_user_lock_p)__kmp_user_lock_table.table;
3261 // We cannot free the previos table now, sinse it may be in use by other
3262 // threads. So save the pointer to the previous table in in the first element of the
3263 // new table. All the tables will be organized into a list, and could be freed when
3264 // library shutting down.
3265 __kmp_user_lock_table.table = table;
3266 __kmp_user_lock_table.allocated = size;
3267 }
3268 KMP_DEBUG_ASSERT( __kmp_user_lock_table.used < __kmp_user_lock_table.allocated );
3269 index = __kmp_user_lock_table.used;
3270 __kmp_user_lock_table.table[ index ] = lck;
3271 ++ __kmp_user_lock_table.used;
3272 return index;
3273}
3274
3275static kmp_user_lock_p
3276__kmp_lock_block_allocate()
3277{
3278 // Assume that kmp_global_lock is held upon entry/exit.
3279 static int last_index = 0;
3280 if ( ( last_index >= __kmp_num_locks_in_block )
3281 || ( __kmp_lock_blocks == NULL ) ) {
3282 // Restart the index.
3283 last_index = 0;
3284 // Need to allocate a new block.
3285 KMP_DEBUG_ASSERT( __kmp_user_lock_size > 0 );
3286 size_t space_for_locks = __kmp_user_lock_size * __kmp_num_locks_in_block;
3287 char* buffer = (char*)__kmp_allocate( space_for_locks + sizeof( kmp_block_of_locks ) );
3288 // Set up the new block.
3289 kmp_block_of_locks *new_block = (kmp_block_of_locks *)(& buffer[space_for_locks]);
3290 new_block->next_block = __kmp_lock_blocks;
3291 new_block->locks = (void *)buffer;
3292 // Publish the new block.
3293 KMP_MB();
3294 __kmp_lock_blocks = new_block;
3295 }
3296 kmp_user_lock_p ret = (kmp_user_lock_p)(& ( ( (char *)( __kmp_lock_blocks->locks ) )
3297 [ last_index * __kmp_user_lock_size ] ) );
3298 last_index++;
3299 return ret;
3300}
3301
3302//
3303// Get memory for a lock. It may be freshly allocated memory or reused memory
3304// from lock pool.
3305//
3306kmp_user_lock_p
3307__kmp_user_lock_allocate( void **user_lock, kmp_int32 gtid,
3308 kmp_lock_flags_t flags )
3309{
3310 kmp_user_lock_p lck;
3311 kmp_lock_index_t index;
3312 KMP_DEBUG_ASSERT( user_lock );
3313
3314 __kmp_acquire_lock( &__kmp_global_lock, gtid );
3315
3316 if ( __kmp_lock_pool == NULL ) {
3317 // Lock pool is empty. Allocate new memory.
3318 if ( __kmp_num_locks_in_block <= 1 ) { // Tune this cutoff point.
3319 lck = (kmp_user_lock_p) __kmp_allocate( __kmp_user_lock_size );
3320 }
3321 else {
3322 lck = __kmp_lock_block_allocate();
3323 }
3324
3325 // Insert lock in the table so that it can be freed in __kmp_cleanup,
3326 // and debugger has info on all allocated locks.
3327 index = __kmp_lock_table_insert( lck );
3328 }
3329 else {
3330 // Pick up lock from pool.
3331 lck = __kmp_lock_pool;
3332 index = __kmp_lock_pool->pool.index;
3333 __kmp_lock_pool = __kmp_lock_pool->pool.next;
3334 }
3335
3336 //
3337 // We could potentially differentiate between nested and regular locks
3338 // here, and do the lock table lookup for regular locks only.
3339 //
3340 if ( OMP_LOCK_T_SIZE < sizeof(void *) ) {
3341 * ( (kmp_lock_index_t *) user_lock ) = index;
3342 }
3343 else {
3344 * ( (kmp_user_lock_p *) user_lock ) = lck;
3345 }
3346
3347 // mark the lock if it is critical section lock.
3348 __kmp_set_user_lock_flags( lck, flags );
3349
3350 __kmp_release_lock( & __kmp_global_lock, gtid ); // AC: TODO: move this line upper
3351
3352 return lck;
3353}
3354
3355// Put lock's memory to pool for reusing.
3356void
3357__kmp_user_lock_free( void **user_lock, kmp_int32 gtid, kmp_user_lock_p lck )
3358{
3359 kmp_lock_pool_t * lock_pool;
3360
3361 KMP_DEBUG_ASSERT( user_lock != NULL );
3362 KMP_DEBUG_ASSERT( lck != NULL );
3363
3364 __kmp_acquire_lock( & __kmp_global_lock, gtid );
3365
3366 lck->pool.next = __kmp_lock_pool;
3367 __kmp_lock_pool = lck;
3368 if ( OMP_LOCK_T_SIZE < sizeof(void *) ) {
3369 kmp_lock_index_t index = * ( (kmp_lock_index_t *) user_lock );
3370 KMP_DEBUG_ASSERT( 0 < index && index <= __kmp_user_lock_table.used );
3371 lck->pool.index = index;
3372 }
3373
3374 __kmp_release_lock( & __kmp_global_lock, gtid );
3375}
3376
3377kmp_user_lock_p
3378__kmp_lookup_user_lock( void **user_lock, char const *func )
3379{
3380 kmp_user_lock_p lck = NULL;
3381
3382 if ( __kmp_env_consistency_check ) {
3383 if ( user_lock == NULL ) {
3384 KMP_FATAL( LockIsUninitialized, func );
3385 }
3386 }
3387
3388 if ( OMP_LOCK_T_SIZE < sizeof(void *) ) {
3389 kmp_lock_index_t index = *( (kmp_lock_index_t *)user_lock );
3390 if ( __kmp_env_consistency_check ) {
3391 if ( ! ( 0 < index && index < __kmp_user_lock_table.used ) ) {
3392 KMP_FATAL( LockIsUninitialized, func );
3393 }
3394 }
3395 KMP_DEBUG_ASSERT( 0 < index && index < __kmp_user_lock_table.used );
3396 KMP_DEBUG_ASSERT( __kmp_user_lock_size > 0 );
3397 lck = __kmp_user_lock_table.table[index];
3398 }
3399 else {
3400 lck = *( (kmp_user_lock_p *)user_lock );
3401 }
3402
3403 if ( __kmp_env_consistency_check ) {
3404 if ( lck == NULL ) {
3405 KMP_FATAL( LockIsUninitialized, func );
3406 }
3407 }
3408
3409 return lck;
3410}
3411
3412void
3413__kmp_cleanup_user_locks( void )
3414{
3415 //
3416 // Reset lock pool. Do not worry about lock in the pool -- we will free
3417 // them when iterating through lock table (it includes all the locks,
3418 // dead or alive).
3419 //
3420 __kmp_lock_pool = NULL;
3421
3422#define IS_CRITICAL(lck) \
3423 ( ( __kmp_get_user_lock_flags_ != NULL ) && \
3424 ( ( *__kmp_get_user_lock_flags_ )( lck ) & kmp_lf_critical_section ) )
3425
3426 //
3427 // Loop through lock table, free all locks.
3428 //
3429 // Do not free item [0], it is reserved for lock tables list.
3430 //
3431 // FIXME - we are iterating through a list of (pointers to) objects of
3432 // type union kmp_user_lock, but we have no way of knowing whether the
3433 // base type is currently "pool" or whatever the global user lock type
3434 // is.
3435 //
3436 // We are relying on the fact that for all of the user lock types
3437 // (except "tas"), the first field in the lock struct is the "initialized"
3438 // field, which is set to the address of the lock object itself when
3439 // the lock is initialized. When the union is of type "pool", the
3440 // first field is a pointer to the next object in the free list, which
3441 // will not be the same address as the object itself.
3442 //
3443 // This means that the check ( *__kmp_is_user_lock_initialized_ )( lck )
3444 // will fail for "pool" objects on the free list. This must happen as
3445 // the "location" field of real user locks overlaps the "index" field
3446 // of "pool" objects.
3447 //
3448 // It would be better to run through the free list, and remove all "pool"
3449 // objects from the lock table before executing this loop. However,
3450 // "pool" objects do not always have their index field set (only on
3451 // lin_32e), and I don't want to search the lock table for the address
3452 // of every "pool" object on the free list.
3453 //
3454 while ( __kmp_user_lock_table.used > 1 ) {
3455 const ident *loc;
3456
3457 //
3458 // reduce __kmp_user_lock_table.used before freeing the lock,
3459 // so that state of locks is consistent
3460 //
3461 kmp_user_lock_p lck = __kmp_user_lock_table.table[
3462 --__kmp_user_lock_table.used ];
3463
3464 if ( ( __kmp_is_user_lock_initialized_ != NULL ) &&
3465 ( *__kmp_is_user_lock_initialized_ )( lck ) ) {
3466 //
3467 // Issue a warning if: KMP_CONSISTENCY_CHECK AND lock is
3468 // initialized AND it is NOT a critical section (user is not
3469 // responsible for destroying criticals) AND we know source
3470 // location to report.
3471 //
3472 if ( __kmp_env_consistency_check && ( ! IS_CRITICAL( lck ) ) &&
3473 ( ( loc = __kmp_get_user_lock_location( lck ) ) != NULL ) &&
3474 ( loc->psource != NULL ) ) {
3475 kmp_str_loc_t str_loc = __kmp_str_loc_init( loc->psource, 0 );
3476 KMP_WARNING( CnsLockNotDestroyed, str_loc.file, str_loc.func,
3477 str_loc.line, str_loc.col );
3478 __kmp_str_loc_free( &str_loc);
3479 }
3480
3481#ifdef KMP_DEBUG
3482 if ( IS_CRITICAL( lck ) ) {
3483 KA_TRACE( 20, ("__kmp_cleanup_user_locks: free critical section lock %p (%p)\n", lck, *(void**)lck ) );
3484 }
3485 else {
3486 KA_TRACE( 20, ("__kmp_cleanup_user_locks: free lock %p (%p)\n", lck, *(void**)lck ) );
3487 }
3488#endif // KMP_DEBUG
3489
3490 //
3491 // Cleanup internal lock dynamic resources
3492 // (for drdpa locks particularly).
3493 //
3494 __kmp_destroy_user_lock( lck );
3495 }
3496
3497 //
3498 // Free the lock if block allocation of locks is not used.
3499 //
3500 if ( __kmp_lock_blocks == NULL ) {
3501 __kmp_free( lck );
3502 }
3503 }
3504
3505#undef IS_CRITICAL
3506
3507 //
3508 // delete lock table(s).
3509 //
3510 kmp_user_lock_p *table_ptr = __kmp_user_lock_table.table;
3511 __kmp_user_lock_table.table = NULL;
3512 __kmp_user_lock_table.allocated = 0;
3513
3514 while ( table_ptr != NULL ) {
3515 //
3516 // In the first element we saved the pointer to the previous
3517 // (smaller) lock table.
3518 //
3519 kmp_user_lock_p *next = (kmp_user_lock_p *)( table_ptr[ 0 ] );
3520 __kmp_free( table_ptr );
3521 table_ptr = next;
3522 }
3523
3524 //
3525 // Free buffers allocated for blocks of locks.
3526 //
3527 kmp_block_of_locks_t *block_ptr = __kmp_lock_blocks;
3528 __kmp_lock_blocks = NULL;
3529
3530 while ( block_ptr != NULL ) {
3531 kmp_block_of_locks_t *next = block_ptr->next_block;
3532 __kmp_free( block_ptr->locks );
3533 //
3534 // *block_ptr itself was allocated at the end of the locks vector.
3535 //
3536 block_ptr = next;
3537 }
3538
3539 TCW_4(__kmp_init_user_locks, FALSE);
3540}
3541