blob: 4c924635ecd16acbb09915fadc1a9ac426fde6ff [file] [log] [blame]
Jim Cownie5e8470a2013-09-27 10:38:44 +00001/*
2 * kmp_lock.cpp -- lock-related functions
Jim Cownie181b4bb2013-12-23 17:28:57 +00003 * $Revision: 42810 $
4 * $Date: 2013-11-07 12:06:33 -0600 (Thu, 07 Nov 2013) $
Jim Cownie5e8470a2013-09-27 10:38:44 +00005 */
6
7
8//===----------------------------------------------------------------------===//
9//
10// The LLVM Compiler Infrastructure
11//
12// This file is dual licensed under the MIT and the University of Illinois Open
13// Source Licenses. See LICENSE.txt for details.
14//
15//===----------------------------------------------------------------------===//
16
17
18#include <stddef.h>
19
20#include "kmp.h"
21#include "kmp_itt.h"
22#include "kmp_i18n.h"
23#include "kmp_lock.h"
24#include "kmp_io.h"
25
Jim Cownie181b4bb2013-12-23 17:28:57 +000026#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
Jim Cownie5e8470a2013-09-27 10:38:44 +000027# include <unistd.h>
28# include <sys/syscall.h>
29// We should really include <futex.h>, but that causes compatibility problems on different
30// Linux* OS distributions that either require that you include (or break when you try to include)
31// <pci/types.h>.
32// Since all we need is the two macros below (which are part of the kernel ABI, so can't change)
33// we just define the constants here and don't include <futex.h>
34# ifndef FUTEX_WAIT
35# define FUTEX_WAIT 0
36# endif
37# ifndef FUTEX_WAKE
38# define FUTEX_WAKE 1
39# endif
40#endif
41
42
43#ifndef KMP_DEBUG
44# define __kmp_static_delay( arg ) /* nothing to do */
45#else
46
47static void
48__kmp_static_delay( int arg )
49{
50/* Work around weird code-gen bug that causes assert to trip */
51# if KMP_ARCH_X86_64 && KMP_OS_LINUX
52 KMP_ASSERT( arg != 0 );
53# else
54 KMP_ASSERT( arg >= 0 );
55# endif
56}
57#endif /* KMP_DEBUG */
58
59static void
60__kmp_static_yield( int arg )
61{
62 __kmp_yield( arg );
63}
64
65/* Implement spin locks for internal library use. */
66/* The algorithm implemented is Lamport's bakery lock [1974]. */
67
68void
69__kmp_validate_locks( void )
70{
71 int i;
72 kmp_uint32 x, y;
73
74 /* Check to make sure unsigned arithmetic does wraps properly */
75 x = ~((kmp_uint32) 0) - 2;
76 y = x - 2;
77
78 for (i = 0; i < 8; ++i, ++x, ++y) {
79 kmp_uint32 z = (x - y);
80 KMP_ASSERT( z == 2 );
81 }
82
83 KMP_ASSERT( offsetof( kmp_base_queuing_lock, tail_id ) % 8 == 0 );
84}
85
86
87/* ------------------------------------------------------------------------ */
88/* test and set locks */
89
90//
91// For the non-nested locks, we can only assume that the first 4 bytes were
92// allocated, since gcc only allocates 4 bytes for omp_lock_t, and the Intel
93// compiler only allocates a 4 byte pointer on IA-32 architecture. On
94// Windows* OS on Intel(R) 64, we can assume that all 8 bytes were allocated.
95//
96// gcc reserves >= 8 bytes for nested locks, so we can assume that the
97// entire 8 bytes were allocated for nested locks on all 64-bit platforms.
98//
99
100static kmp_int32
101__kmp_get_tas_lock_owner( kmp_tas_lock_t *lck )
102{
103 return TCR_4( lck->lk.poll ) - 1;
104}
105
106static inline bool
107__kmp_is_tas_lock_nestable( kmp_tas_lock_t *lck )
108{
109 return lck->lk.depth_locked != -1;
110}
111
112__forceinline static void
113__kmp_acquire_tas_lock_timed_template( kmp_tas_lock_t *lck, kmp_int32 gtid )
114{
115 KMP_MB();
116
117#ifdef USE_LOCK_PROFILE
118 kmp_uint32 curr = TCR_4( lck->lk.poll );
119 if ( ( curr != 0 ) && ( curr != gtid + 1 ) )
120 __kmp_printf( "LOCK CONTENTION: %p\n", lck );
121 /* else __kmp_printf( "." );*/
122#endif /* USE_LOCK_PROFILE */
123
124 if ( ( lck->lk.poll == 0 )
125 && KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), 0, gtid + 1 ) ) {
126 KMP_FSYNC_ACQUIRED(lck);
127 return;
128 }
129
130 kmp_uint32 spins;
131 KMP_FSYNC_PREPARE( lck );
132 KMP_INIT_YIELD( spins );
133 if ( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc :
134 __kmp_xproc ) ) {
135 KMP_YIELD( TRUE );
136 }
137 else {
138 KMP_YIELD_SPIN( spins );
139 }
140
141 while ( ( lck->lk.poll != 0 ) ||
142 ( ! KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), 0, gtid + 1 ) ) ) {
143 //
144 // FIXME - use exponential backoff here
145 //
146 if ( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc :
147 __kmp_xproc ) ) {
148 KMP_YIELD( TRUE );
149 }
150 else {
151 KMP_YIELD_SPIN( spins );
152 }
153 }
154 KMP_FSYNC_ACQUIRED( lck );
155}
156
157void
158__kmp_acquire_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
159{
160 __kmp_acquire_tas_lock_timed_template( lck, gtid );
161}
162
163static void
164__kmp_acquire_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
165{
166 if ( __kmp_env_consistency_check ) {
167 char const * const func = "omp_set_lock";
168 if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE )
169 && __kmp_is_tas_lock_nestable( lck ) ) {
170 KMP_FATAL( LockNestableUsedAsSimple, func );
171 }
172 if ( ( gtid >= 0 ) && ( __kmp_get_tas_lock_owner( lck ) == gtid ) ) {
173 KMP_FATAL( LockIsAlreadyOwned, func );
174 }
175 }
176 __kmp_acquire_tas_lock( lck, gtid );
177}
178
179int
180__kmp_test_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
181{
182 if ( ( lck->lk.poll == 0 )
183 && KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), 0, gtid + 1 ) ) {
184 KMP_FSYNC_ACQUIRED( lck );
185 return TRUE;
186 }
187 return FALSE;
188}
189
190static int
191__kmp_test_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
192{
193 if ( __kmp_env_consistency_check ) {
194 char const * const func = "omp_test_lock";
195 if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE )
196 && __kmp_is_tas_lock_nestable( lck ) ) {
197 KMP_FATAL( LockNestableUsedAsSimple, func );
198 }
199 }
200 return __kmp_test_tas_lock( lck, gtid );
201}
202
203void
204__kmp_release_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
205{
206 KMP_MB(); /* Flush all pending memory write invalidates. */
207
208 KMP_FSYNC_RELEASING(lck);
209 KMP_ST_REL32( &(lck->lk.poll), 0 );
210
211 KMP_MB(); /* Flush all pending memory write invalidates. */
212
213 KMP_YIELD( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc :
214 __kmp_xproc ) );
215}
216
217static void
218__kmp_release_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
219{
220 if ( __kmp_env_consistency_check ) {
221 char const * const func = "omp_unset_lock";
222 KMP_MB(); /* in case another processor initialized lock */
223 if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE )
224 && __kmp_is_tas_lock_nestable( lck ) ) {
225 KMP_FATAL( LockNestableUsedAsSimple, func );
226 }
227 if ( __kmp_get_tas_lock_owner( lck ) == -1 ) {
228 KMP_FATAL( LockUnsettingFree, func );
229 }
230 if ( ( gtid >= 0 ) && ( __kmp_get_tas_lock_owner( lck ) >= 0 )
231 && ( __kmp_get_tas_lock_owner( lck ) != gtid ) ) {
232 KMP_FATAL( LockUnsettingSetByAnother, func );
233 }
234 }
235 __kmp_release_tas_lock( lck, gtid );
236}
237
238void
239__kmp_init_tas_lock( kmp_tas_lock_t * lck )
240{
241 TCW_4( lck->lk.poll, 0 );
242}
243
244static void
245__kmp_init_tas_lock_with_checks( kmp_tas_lock_t * lck )
246{
247 __kmp_init_tas_lock( lck );
248}
249
250void
251__kmp_destroy_tas_lock( kmp_tas_lock_t *lck )
252{
253 lck->lk.poll = 0;
254}
255
256static void
257__kmp_destroy_tas_lock_with_checks( kmp_tas_lock_t *lck )
258{
259 if ( __kmp_env_consistency_check ) {
260 char const * const func = "omp_destroy_lock";
261 if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE )
262 && __kmp_is_tas_lock_nestable( lck ) ) {
263 KMP_FATAL( LockNestableUsedAsSimple, func );
264 }
265 if ( __kmp_get_tas_lock_owner( lck ) != -1 ) {
266 KMP_FATAL( LockStillOwned, func );
267 }
268 }
269 __kmp_destroy_tas_lock( lck );
270}
271
272
273//
274// nested test and set locks
275//
276
277void
278__kmp_acquire_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
279{
280 KMP_DEBUG_ASSERT( gtid >= 0 );
281
282 if ( __kmp_get_tas_lock_owner( lck ) == gtid ) {
283 lck->lk.depth_locked += 1;
284 }
285 else {
286 __kmp_acquire_tas_lock_timed_template( lck, gtid );
287 lck->lk.depth_locked = 1;
288 }
289}
290
291static void
292__kmp_acquire_nested_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
293{
294 if ( __kmp_env_consistency_check ) {
295 char const * const func = "omp_set_nest_lock";
296 if ( ! __kmp_is_tas_lock_nestable( lck ) ) {
297 KMP_FATAL( LockSimpleUsedAsNestable, func );
298 }
299 }
300 __kmp_acquire_nested_tas_lock( lck, gtid );
301}
302
303int
304__kmp_test_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
305{
306 int retval;
307
308 KMP_DEBUG_ASSERT( gtid >= 0 );
309
310 if ( __kmp_get_tas_lock_owner( lck ) == gtid ) {
311 retval = ++lck->lk.depth_locked;
312 }
313 else if ( !__kmp_test_tas_lock( lck, gtid ) ) {
314 retval = 0;
315 }
316 else {
317 KMP_MB();
318 retval = lck->lk.depth_locked = 1;
319 }
320 return retval;
321}
322
323static int
324__kmp_test_nested_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
325{
326 if ( __kmp_env_consistency_check ) {
327 char const * const func = "omp_test_nest_lock";
328 if ( ! __kmp_is_tas_lock_nestable( lck ) ) {
329 KMP_FATAL( LockSimpleUsedAsNestable, func );
330 }
331 }
332 return __kmp_test_nested_tas_lock( lck, gtid );
333}
334
335void
336__kmp_release_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
337{
338 KMP_DEBUG_ASSERT( gtid >= 0 );
339
340 KMP_MB();
341 if ( --(lck->lk.depth_locked) == 0 ) {
342 __kmp_release_tas_lock( lck, gtid );
343 }
344}
345
346static void
347__kmp_release_nested_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
348{
349 if ( __kmp_env_consistency_check ) {
350 char const * const func = "omp_unset_nest_lock";
351 KMP_MB(); /* in case another processor initialized lock */
352 if ( ! __kmp_is_tas_lock_nestable( lck ) ) {
353 KMP_FATAL( LockSimpleUsedAsNestable, func );
354 }
355 if ( __kmp_get_tas_lock_owner( lck ) == -1 ) {
356 KMP_FATAL( LockUnsettingFree, func );
357 }
358 if ( __kmp_get_tas_lock_owner( lck ) != gtid ) {
359 KMP_FATAL( LockUnsettingSetByAnother, func );
360 }
361 }
362 __kmp_release_nested_tas_lock( lck, gtid );
363}
364
365void
366__kmp_init_nested_tas_lock( kmp_tas_lock_t * lck )
367{
368 __kmp_init_tas_lock( lck );
369 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
370}
371
372static void
373__kmp_init_nested_tas_lock_with_checks( kmp_tas_lock_t * lck )
374{
375 __kmp_init_nested_tas_lock( lck );
376}
377
378void
379__kmp_destroy_nested_tas_lock( kmp_tas_lock_t *lck )
380{
381 __kmp_destroy_tas_lock( lck );
382 lck->lk.depth_locked = 0;
383}
384
385static void
386__kmp_destroy_nested_tas_lock_with_checks( kmp_tas_lock_t *lck )
387{
388 if ( __kmp_env_consistency_check ) {
389 char const * const func = "omp_destroy_nest_lock";
390 if ( ! __kmp_is_tas_lock_nestable( lck ) ) {
391 KMP_FATAL( LockSimpleUsedAsNestable, func );
392 }
393 if ( __kmp_get_tas_lock_owner( lck ) != -1 ) {
394 KMP_FATAL( LockStillOwned, func );
395 }
396 }
397 __kmp_destroy_nested_tas_lock( lck );
398}
399
400
Jim Cownie181b4bb2013-12-23 17:28:57 +0000401#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
Jim Cownie5e8470a2013-09-27 10:38:44 +0000402
403/* ------------------------------------------------------------------------ */
404/* futex locks */
405
406// futex locks are really just test and set locks, with a different method
407// of handling contention. They take the same amount of space as test and
408// set locks, and are allocated the same way (i.e. use the area allocated by
409// the compiler for non-nested locks / allocate nested locks on the heap).
410
411static kmp_int32
412__kmp_get_futex_lock_owner( kmp_futex_lock_t *lck )
413{
414 return ( TCR_4( lck->lk.poll ) >> 1 ) - 1;
415}
416
417static inline bool
418__kmp_is_futex_lock_nestable( kmp_futex_lock_t *lck )
419{
420 return lck->lk.depth_locked != -1;
421}
422
423__forceinline static void
424__kmp_acquire_futex_lock_timed_template( kmp_futex_lock_t *lck, kmp_int32 gtid )
425{
426 kmp_int32 gtid_code = ( gtid + 1 ) << 1;
427
428 KMP_MB();
429
430#ifdef USE_LOCK_PROFILE
431 kmp_uint32 curr = TCR_4( lck->lk.poll );
432 if ( ( curr != 0 ) && ( curr != gtid_code ) )
433 __kmp_printf( "LOCK CONTENTION: %p\n", lck );
434 /* else __kmp_printf( "." );*/
435#endif /* USE_LOCK_PROFILE */
436
437 KMP_FSYNC_PREPARE( lck );
438 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d entering\n",
439 lck, lck->lk.poll, gtid ) );
440
441 kmp_int32 poll_val;
442 while ( ( poll_val = KMP_COMPARE_AND_STORE_RET32( & ( lck->lk.poll ), 0,
443 gtid_code ) ) != 0 ) {
444 kmp_int32 cond = poll_val & 1;
445 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d poll_val = 0x%x cond = 0x%x\n",
446 lck, gtid, poll_val, cond ) );
447
448 //
449 // NOTE: if you try to use the following condition for this branch
450 //
451 // if ( poll_val & 1 == 0 )
452 //
453 // Then the 12.0 compiler has a bug where the following block will
454 // always be skipped, regardless of the value of the LSB of poll_val.
455 //
456 if ( ! cond ) {
457 //
458 // Try to set the lsb in the poll to indicate to the owner
459 // thread that they need to wake this thread up.
460 //
461 if ( ! KMP_COMPARE_AND_STORE_REL32( & ( lck->lk.poll ),
462 poll_val, poll_val | 1 ) ) {
463 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d can't set bit 0\n",
464 lck, lck->lk.poll, gtid ) );
465 continue;
466 }
467 poll_val |= 1;
468
469 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d bit 0 set\n",
470 lck, lck->lk.poll, gtid ) );
471 }
472
473 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d before futex_wait(0x%x)\n",
474 lck, gtid, poll_val ) );
475
476 kmp_int32 rc;
477 if ( ( rc = syscall( __NR_futex, & ( lck->lk.poll ), FUTEX_WAIT,
478 poll_val, NULL, NULL, 0 ) ) != 0 ) {
479 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d futex_wait(0x%x) failed (rc=%d errno=%d)\n",
480 lck, gtid, poll_val, rc, errno ) );
481 continue;
482 }
483
484 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d after futex_wait(0x%x)\n",
485 lck, gtid, poll_val ) );
486 //
Alp Toker8f2d3f02014-02-24 10:40:15 +0000487 // This thread has now done a successful futex wait call and was
Jim Cownie5e8470a2013-09-27 10:38:44 +0000488 // entered on the OS futex queue. We must now perform a futex
489 // wake call when releasing the lock, as we have no idea how many
490 // other threads are in the queue.
491 //
492 gtid_code |= 1;
493 }
494
495 KMP_FSYNC_ACQUIRED( lck );
496 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d exiting\n",
497 lck, lck->lk.poll, gtid ) );
498}
499
500void
501__kmp_acquire_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
502{
503 __kmp_acquire_futex_lock_timed_template( lck, gtid );
504}
505
506static void
507__kmp_acquire_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
508{
509 if ( __kmp_env_consistency_check ) {
510 char const * const func = "omp_set_lock";
511 if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE )
512 && __kmp_is_futex_lock_nestable( lck ) ) {
513 KMP_FATAL( LockNestableUsedAsSimple, func );
514 }
515 if ( ( gtid >= 0 ) && ( __kmp_get_futex_lock_owner( lck ) == gtid ) ) {
516 KMP_FATAL( LockIsAlreadyOwned, func );
517 }
518 }
519 __kmp_acquire_futex_lock( lck, gtid );
520}
521
522int
523__kmp_test_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
524{
525 if ( KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), 0, ( gtid + 1 ) << 1 ) ) {
526 KMP_FSYNC_ACQUIRED( lck );
527 return TRUE;
528 }
529 return FALSE;
530}
531
532static int
533__kmp_test_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
534{
535 if ( __kmp_env_consistency_check ) {
536 char const * const func = "omp_test_lock";
537 if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE )
538 && __kmp_is_futex_lock_nestable( lck ) ) {
539 KMP_FATAL( LockNestableUsedAsSimple, func );
540 }
541 }
542 return __kmp_test_futex_lock( lck, gtid );
543}
544
545void
546__kmp_release_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
547{
548 KMP_MB(); /* Flush all pending memory write invalidates. */
549
550 KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d entering\n",
551 lck, lck->lk.poll, gtid ) );
552
553 KMP_FSYNC_RELEASING(lck);
554
555 kmp_int32 poll_val = KMP_XCHG_FIXED32( & ( lck->lk.poll ), 0 );
556
557 KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p, T#%d released poll_val = 0x%x\n",
558 lck, gtid, poll_val ) );
559
560 if ( poll_val & 1 ) {
561 KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p, T#%d futex_wake 1 thread\n",
562 lck, gtid ) );
563 syscall( __NR_futex, & ( lck->lk.poll ), FUTEX_WAKE, 1, NULL, NULL, 0 );
564 }
565
566 KMP_MB(); /* Flush all pending memory write invalidates. */
567
568 KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d exiting\n",
569 lck, lck->lk.poll, gtid ) );
570
571 KMP_YIELD( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc :
572 __kmp_xproc ) );
573}
574
575static void
576__kmp_release_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
577{
578 if ( __kmp_env_consistency_check ) {
579 char const * const func = "omp_unset_lock";
580 KMP_MB(); /* in case another processor initialized lock */
581 if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE )
582 && __kmp_is_futex_lock_nestable( lck ) ) {
583 KMP_FATAL( LockNestableUsedAsSimple, func );
584 }
585 if ( __kmp_get_futex_lock_owner( lck ) == -1 ) {
586 KMP_FATAL( LockUnsettingFree, func );
587 }
588 if ( ( gtid >= 0 ) && ( __kmp_get_futex_lock_owner( lck ) >= 0 )
589 && ( __kmp_get_futex_lock_owner( lck ) != gtid ) ) {
590 KMP_FATAL( LockUnsettingSetByAnother, func );
591 }
592 }
593 __kmp_release_futex_lock( lck, gtid );
594}
595
596void
597__kmp_init_futex_lock( kmp_futex_lock_t * lck )
598{
599 TCW_4( lck->lk.poll, 0 );
600}
601
602static void
603__kmp_init_futex_lock_with_checks( kmp_futex_lock_t * lck )
604{
605 __kmp_init_futex_lock( lck );
606}
607
608void
609__kmp_destroy_futex_lock( kmp_futex_lock_t *lck )
610{
611 lck->lk.poll = 0;
612}
613
614static void
615__kmp_destroy_futex_lock_with_checks( kmp_futex_lock_t *lck )
616{
617 if ( __kmp_env_consistency_check ) {
618 char const * const func = "omp_destroy_lock";
619 if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE )
620 && __kmp_is_futex_lock_nestable( lck ) ) {
621 KMP_FATAL( LockNestableUsedAsSimple, func );
622 }
623 if ( __kmp_get_futex_lock_owner( lck ) != -1 ) {
624 KMP_FATAL( LockStillOwned, func );
625 }
626 }
627 __kmp_destroy_futex_lock( lck );
628}
629
630
631//
632// nested futex locks
633//
634
635void
636__kmp_acquire_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
637{
638 KMP_DEBUG_ASSERT( gtid >= 0 );
639
640 if ( __kmp_get_futex_lock_owner( lck ) == gtid ) {
641 lck->lk.depth_locked += 1;
642 }
643 else {
644 __kmp_acquire_futex_lock_timed_template( lck, gtid );
645 lck->lk.depth_locked = 1;
646 }
647}
648
649static void
650__kmp_acquire_nested_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
651{
652 if ( __kmp_env_consistency_check ) {
653 char const * const func = "omp_set_nest_lock";
654 if ( ! __kmp_is_futex_lock_nestable( lck ) ) {
655 KMP_FATAL( LockSimpleUsedAsNestable, func );
656 }
657 }
658 __kmp_acquire_nested_futex_lock( lck, gtid );
659}
660
661int
662__kmp_test_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
663{
664 int retval;
665
666 KMP_DEBUG_ASSERT( gtid >= 0 );
667
668 if ( __kmp_get_futex_lock_owner( lck ) == gtid ) {
669 retval = ++lck->lk.depth_locked;
670 }
671 else if ( !__kmp_test_futex_lock( lck, gtid ) ) {
672 retval = 0;
673 }
674 else {
675 KMP_MB();
676 retval = lck->lk.depth_locked = 1;
677 }
678 return retval;
679}
680
681static int
682__kmp_test_nested_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
683{
684 if ( __kmp_env_consistency_check ) {
685 char const * const func = "omp_test_nest_lock";
686 if ( ! __kmp_is_futex_lock_nestable( lck ) ) {
687 KMP_FATAL( LockSimpleUsedAsNestable, func );
688 }
689 }
690 return __kmp_test_nested_futex_lock( lck, gtid );
691}
692
693void
694__kmp_release_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
695{
696 KMP_DEBUG_ASSERT( gtid >= 0 );
697
698 KMP_MB();
699 if ( --(lck->lk.depth_locked) == 0 ) {
700 __kmp_release_futex_lock( lck, gtid );
701 }
702}
703
704static void
705__kmp_release_nested_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
706{
707 if ( __kmp_env_consistency_check ) {
708 char const * const func = "omp_unset_nest_lock";
709 KMP_MB(); /* in case another processor initialized lock */
710 if ( ! __kmp_is_futex_lock_nestable( lck ) ) {
711 KMP_FATAL( LockSimpleUsedAsNestable, func );
712 }
713 if ( __kmp_get_futex_lock_owner( lck ) == -1 ) {
714 KMP_FATAL( LockUnsettingFree, func );
715 }
716 if ( __kmp_get_futex_lock_owner( lck ) != gtid ) {
717 KMP_FATAL( LockUnsettingSetByAnother, func );
718 }
719 }
720 __kmp_release_nested_futex_lock( lck, gtid );
721}
722
723void
724__kmp_init_nested_futex_lock( kmp_futex_lock_t * lck )
725{
726 __kmp_init_futex_lock( lck );
727 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
728}
729
730static void
731__kmp_init_nested_futex_lock_with_checks( kmp_futex_lock_t * lck )
732{
733 __kmp_init_nested_futex_lock( lck );
734}
735
736void
737__kmp_destroy_nested_futex_lock( kmp_futex_lock_t *lck )
738{
739 __kmp_destroy_futex_lock( lck );
740 lck->lk.depth_locked = 0;
741}
742
743static void
744__kmp_destroy_nested_futex_lock_with_checks( kmp_futex_lock_t *lck )
745{
746 if ( __kmp_env_consistency_check ) {
747 char const * const func = "omp_destroy_nest_lock";
748 if ( ! __kmp_is_futex_lock_nestable( lck ) ) {
749 KMP_FATAL( LockSimpleUsedAsNestable, func );
750 }
751 if ( __kmp_get_futex_lock_owner( lck ) != -1 ) {
752 KMP_FATAL( LockStillOwned, func );
753 }
754 }
755 __kmp_destroy_nested_futex_lock( lck );
756}
757
Jim Cownie181b4bb2013-12-23 17:28:57 +0000758#endif // KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
Jim Cownie5e8470a2013-09-27 10:38:44 +0000759
760
761/* ------------------------------------------------------------------------ */
762/* ticket (bakery) locks */
763
764static kmp_int32
765__kmp_get_ticket_lock_owner( kmp_ticket_lock_t *lck )
766{
767 return TCR_4( lck->lk.owner_id ) - 1;
768}
769
770static inline bool
771__kmp_is_ticket_lock_nestable( kmp_ticket_lock_t *lck )
772{
773 return lck->lk.depth_locked != -1;
774}
775
776static kmp_uint32
777__kmp_bakery_check(kmp_uint value, kmp_uint checker)
778{
779 register kmp_uint32 pause;
780
781 if (value == checker) {
782 return TRUE;
783 }
784 for (pause = checker - value; pause != 0; --pause) {
785 __kmp_static_delay(TRUE);
786 }
787 return FALSE;
788}
789
790__forceinline static void
791__kmp_acquire_ticket_lock_timed_template( kmp_ticket_lock_t *lck, kmp_int32 gtid )
792{
793 kmp_uint32 my_ticket;
794 KMP_MB();
795
796 my_ticket = KMP_TEST_THEN_INC32( (kmp_int32 *) &lck->lk.next_ticket );
797
798#ifdef USE_LOCK_PROFILE
799 if ( TCR_4( lck->lk.now_serving ) != my_ticket )
800 __kmp_printf( "LOCK CONTENTION: %p\n", lck );
801 /* else __kmp_printf( "." );*/
802#endif /* USE_LOCK_PROFILE */
803
804 if ( TCR_4( lck->lk.now_serving ) == my_ticket ) {
805 KMP_FSYNC_ACQUIRED(lck);
806 return;
807 }
808 KMP_WAIT_YIELD( &lck->lk.now_serving, my_ticket, __kmp_bakery_check, lck );
809 KMP_FSYNC_ACQUIRED(lck);
810}
811
812void
813__kmp_acquire_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
814{
815 __kmp_acquire_ticket_lock_timed_template( lck, gtid );
816}
817
818static void
819__kmp_acquire_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
820{
821 if ( __kmp_env_consistency_check ) {
822 char const * const func = "omp_set_lock";
823 if ( lck->lk.initialized != lck ) {
824 KMP_FATAL( LockIsUninitialized, func );
825 }
826 if ( __kmp_is_ticket_lock_nestable( lck ) ) {
827 KMP_FATAL( LockNestableUsedAsSimple, func );
828 }
829 if ( ( gtid >= 0 ) && ( __kmp_get_ticket_lock_owner( lck ) == gtid ) ) {
830 KMP_FATAL( LockIsAlreadyOwned, func );
831 }
832 }
833
834 __kmp_acquire_ticket_lock( lck, gtid );
835
836 if ( __kmp_env_consistency_check ) {
837 lck->lk.owner_id = gtid + 1;
838 }
839}
840
841int
842__kmp_test_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
843{
844 kmp_uint32 my_ticket = TCR_4( lck->lk.next_ticket );
845 if ( TCR_4( lck->lk.now_serving ) == my_ticket ) {
846 kmp_uint32 next_ticket = my_ticket + 1;
847 if ( KMP_COMPARE_AND_STORE_ACQ32( (kmp_int32 *) &lck->lk.next_ticket,
848 my_ticket, next_ticket ) ) {
849 KMP_FSYNC_ACQUIRED( lck );
850 return TRUE;
851 }
852 }
853 return FALSE;
854}
855
856static int
857__kmp_test_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
858{
859 if ( __kmp_env_consistency_check ) {
860 char const * const func = "omp_test_lock";
861 if ( lck->lk.initialized != lck ) {
862 KMP_FATAL( LockIsUninitialized, func );
863 }
864 if ( __kmp_is_ticket_lock_nestable( lck ) ) {
865 KMP_FATAL( LockNestableUsedAsSimple, func );
866 }
867 }
868
869 int retval = __kmp_test_ticket_lock( lck, gtid );
870
871 if ( __kmp_env_consistency_check && retval ) {
872 lck->lk.owner_id = gtid + 1;
873 }
874 return retval;
875}
876
877void
878__kmp_release_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
879{
880 kmp_uint32 distance;
881
882 KMP_MB(); /* Flush all pending memory write invalidates. */
883
884 KMP_FSYNC_RELEASING(lck);
885 distance = ( TCR_4( lck->lk.next_ticket ) - TCR_4( lck->lk.now_serving ) );
886
887 KMP_ST_REL32( &(lck->lk.now_serving), lck->lk.now_serving + 1 );
888
889 KMP_MB(); /* Flush all pending memory write invalidates. */
890
891 KMP_YIELD( distance
892 > (kmp_uint32) (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc) );
893}
894
895static void
896__kmp_release_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
897{
898 if ( __kmp_env_consistency_check ) {
899 char const * const func = "omp_unset_lock";
900 KMP_MB(); /* in case another processor initialized lock */
901 if ( lck->lk.initialized != lck ) {
902 KMP_FATAL( LockIsUninitialized, func );
903 }
904 if ( __kmp_is_ticket_lock_nestable( lck ) ) {
905 KMP_FATAL( LockNestableUsedAsSimple, func );
906 }
907 if ( __kmp_get_ticket_lock_owner( lck ) == -1 ) {
908 KMP_FATAL( LockUnsettingFree, func );
909 }
910 if ( ( gtid >= 0 ) && ( __kmp_get_ticket_lock_owner( lck ) >= 0 )
911 && ( __kmp_get_ticket_lock_owner( lck ) != gtid ) ) {
912 KMP_FATAL( LockUnsettingSetByAnother, func );
913 }
914 lck->lk.owner_id = 0;
915 }
916 __kmp_release_ticket_lock( lck, gtid );
917}
918
919void
920__kmp_init_ticket_lock( kmp_ticket_lock_t * lck )
921{
922 lck->lk.location = NULL;
923 TCW_4( lck->lk.next_ticket, 0 );
924 TCW_4( lck->lk.now_serving, 0 );
925 lck->lk.owner_id = 0; // no thread owns the lock.
926 lck->lk.depth_locked = -1; // -1 => not a nested lock.
927 lck->lk.initialized = (kmp_ticket_lock *)lck;
928}
929
930static void
931__kmp_init_ticket_lock_with_checks( kmp_ticket_lock_t * lck )
932{
933 __kmp_init_ticket_lock( lck );
934}
935
936void
937__kmp_destroy_ticket_lock( kmp_ticket_lock_t *lck )
938{
939 lck->lk.initialized = NULL;
940 lck->lk.location = NULL;
941 lck->lk.next_ticket = 0;
942 lck->lk.now_serving = 0;
943 lck->lk.owner_id = 0;
944 lck->lk.depth_locked = -1;
945}
946
947static void
948__kmp_destroy_ticket_lock_with_checks( kmp_ticket_lock_t *lck )
949{
950 if ( __kmp_env_consistency_check ) {
951 char const * const func = "omp_destroy_lock";
952 if ( lck->lk.initialized != lck ) {
953 KMP_FATAL( LockIsUninitialized, func );
954 }
955 if ( __kmp_is_ticket_lock_nestable( lck ) ) {
956 KMP_FATAL( LockNestableUsedAsSimple, func );
957 }
958 if ( __kmp_get_ticket_lock_owner( lck ) != -1 ) {
959 KMP_FATAL( LockStillOwned, func );
960 }
961 }
962 __kmp_destroy_ticket_lock( lck );
963}
964
965
966//
967// nested ticket locks
968//
969
970void
971__kmp_acquire_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
972{
973 KMP_DEBUG_ASSERT( gtid >= 0 );
974
975 if ( __kmp_get_ticket_lock_owner( lck ) == gtid ) {
976 lck->lk.depth_locked += 1;
977 }
978 else {
979 __kmp_acquire_ticket_lock_timed_template( lck, gtid );
980 KMP_MB();
981 lck->lk.depth_locked = 1;
982 KMP_MB();
983 lck->lk.owner_id = gtid + 1;
984 }
985}
986
987static void
988__kmp_acquire_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
989{
990 if ( __kmp_env_consistency_check ) {
991 char const * const func = "omp_set_nest_lock";
992 if ( lck->lk.initialized != lck ) {
993 KMP_FATAL( LockIsUninitialized, func );
994 }
995 if ( ! __kmp_is_ticket_lock_nestable( lck ) ) {
996 KMP_FATAL( LockSimpleUsedAsNestable, func );
997 }
998 }
999 __kmp_acquire_nested_ticket_lock( lck, gtid );
1000}
1001
1002int
1003__kmp_test_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
1004{
1005 int retval;
1006
1007 KMP_DEBUG_ASSERT( gtid >= 0 );
1008
1009 if ( __kmp_get_ticket_lock_owner( lck ) == gtid ) {
1010 retval = ++lck->lk.depth_locked;
1011 }
1012 else if ( !__kmp_test_ticket_lock( lck, gtid ) ) {
1013 retval = 0;
1014 }
1015 else {
1016 KMP_MB();
1017 retval = lck->lk.depth_locked = 1;
1018 KMP_MB();
1019 lck->lk.owner_id = gtid + 1;
1020 }
1021 return retval;
1022}
1023
1024static int
1025__kmp_test_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck,
1026 kmp_int32 gtid )
1027{
1028 if ( __kmp_env_consistency_check ) {
1029 char const * const func = "omp_test_nest_lock";
1030 if ( lck->lk.initialized != lck ) {
1031 KMP_FATAL( LockIsUninitialized, func );
1032 }
1033 if ( ! __kmp_is_ticket_lock_nestable( lck ) ) {
1034 KMP_FATAL( LockSimpleUsedAsNestable, func );
1035 }
1036 }
1037 return __kmp_test_nested_ticket_lock( lck, gtid );
1038}
1039
1040void
1041__kmp_release_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
1042{
1043 KMP_DEBUG_ASSERT( gtid >= 0 );
1044
1045 KMP_MB();
1046 if ( --(lck->lk.depth_locked) == 0 ) {
1047 KMP_MB();
1048 lck->lk.owner_id = 0;
1049 __kmp_release_ticket_lock( lck, gtid );
1050 }
1051}
1052
1053static void
1054__kmp_release_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
1055{
1056 if ( __kmp_env_consistency_check ) {
1057 char const * const func = "omp_unset_nest_lock";
1058 KMP_MB(); /* in case another processor initialized lock */
1059 if ( lck->lk.initialized != lck ) {
1060 KMP_FATAL( LockIsUninitialized, func );
1061 }
1062 if ( ! __kmp_is_ticket_lock_nestable( lck ) ) {
1063 KMP_FATAL( LockSimpleUsedAsNestable, func );
1064 }
1065 if ( __kmp_get_ticket_lock_owner( lck ) == -1 ) {
1066 KMP_FATAL( LockUnsettingFree, func );
1067 }
1068 if ( __kmp_get_ticket_lock_owner( lck ) != gtid ) {
1069 KMP_FATAL( LockUnsettingSetByAnother, func );
1070 }
1071 }
1072 __kmp_release_nested_ticket_lock( lck, gtid );
1073}
1074
1075void
1076__kmp_init_nested_ticket_lock( kmp_ticket_lock_t * lck )
1077{
1078 __kmp_init_ticket_lock( lck );
1079 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
1080}
1081
1082static void
1083__kmp_init_nested_ticket_lock_with_checks( kmp_ticket_lock_t * lck )
1084{
1085 __kmp_init_nested_ticket_lock( lck );
1086}
1087
1088void
1089__kmp_destroy_nested_ticket_lock( kmp_ticket_lock_t *lck )
1090{
1091 __kmp_destroy_ticket_lock( lck );
1092 lck->lk.depth_locked = 0;
1093}
1094
1095static void
1096__kmp_destroy_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck )
1097{
1098 if ( __kmp_env_consistency_check ) {
1099 char const * const func = "omp_destroy_nest_lock";
1100 if ( lck->lk.initialized != lck ) {
1101 KMP_FATAL( LockIsUninitialized, func );
1102 }
1103 if ( ! __kmp_is_ticket_lock_nestable( lck ) ) {
1104 KMP_FATAL( LockSimpleUsedAsNestable, func );
1105 }
1106 if ( __kmp_get_ticket_lock_owner( lck ) != -1 ) {
1107 KMP_FATAL( LockStillOwned, func );
1108 }
1109 }
1110 __kmp_destroy_nested_ticket_lock( lck );
1111}
1112
1113
1114//
1115// access functions to fields which don't exist for all lock kinds.
1116//
1117
1118static int
1119__kmp_is_ticket_lock_initialized( kmp_ticket_lock_t *lck )
1120{
1121 return lck == lck->lk.initialized;
1122}
1123
1124static const ident_t *
1125__kmp_get_ticket_lock_location( kmp_ticket_lock_t *lck )
1126{
1127 return lck->lk.location;
1128}
1129
1130static void
1131__kmp_set_ticket_lock_location( kmp_ticket_lock_t *lck, const ident_t *loc )
1132{
1133 lck->lk.location = loc;
1134}
1135
1136static kmp_lock_flags_t
1137__kmp_get_ticket_lock_flags( kmp_ticket_lock_t *lck )
1138{
1139 return lck->lk.flags;
1140}
1141
1142static void
1143__kmp_set_ticket_lock_flags( kmp_ticket_lock_t *lck, kmp_lock_flags_t flags )
1144{
1145 lck->lk.flags = flags;
1146}
1147
1148/* ------------------------------------------------------------------------ */
1149/* queuing locks */
1150
1151/*
1152 * First the states
1153 * (head,tail) = 0, 0 means lock is unheld, nobody on queue
1154 * UINT_MAX or -1, 0 means lock is held, nobody on queue
1155 * h, h means lock is held or about to transition, 1 element on queue
1156 * h, t h <> t, means lock is held or about to transition, >1 elements on queue
1157 *
1158 * Now the transitions
1159 * Acquire(0,0) = -1 ,0
1160 * Release(0,0) = Error
1161 * Acquire(-1,0) = h ,h h > 0
1162 * Release(-1,0) = 0 ,0
1163 * Acquire(h,h) = h ,t h > 0, t > 0, h <> t
1164 * Release(h,h) = -1 ,0 h > 0
1165 * Acquire(h,t) = h ,t' h > 0, t > 0, t' > 0, h <> t, h <> t', t <> t'
1166 * Release(h,t) = h',t h > 0, t > 0, h <> t, h <> h', h' maybe = t
1167 *
1168 * And pictorially
1169 *
1170 *
1171 * +-----+
1172 * | 0, 0|------- release -------> Error
1173 * +-----+
1174 * | ^
1175 * acquire| |release
1176 * | |
1177 * | |
1178 * v |
1179 * +-----+
1180 * |-1, 0|
1181 * +-----+
1182 * | ^
1183 * acquire| |release
1184 * | |
1185 * | |
1186 * v |
1187 * +-----+
1188 * | h, h|
1189 * +-----+
1190 * | ^
1191 * acquire| |release
1192 * | |
1193 * | |
1194 * v |
1195 * +-----+
1196 * | h, t|----- acquire, release loopback ---+
1197 * +-----+ |
1198 * ^ |
1199 * | |
1200 * +------------------------------------+
1201 *
1202 */
1203
1204#ifdef DEBUG_QUEUING_LOCKS
1205
1206/* Stuff for circular trace buffer */
1207#define TRACE_BUF_ELE 1024
1208static char traces[TRACE_BUF_ELE][128] = { 0 }
1209static int tc = 0;
1210#define TRACE_LOCK(X,Y) sprintf( traces[tc++ % TRACE_BUF_ELE], "t%d at %s\n", X, Y );
1211#define TRACE_LOCK_T(X,Y,Z) sprintf( traces[tc++ % TRACE_BUF_ELE], "t%d at %s%d\n", X,Y,Z );
1212#define TRACE_LOCK_HT(X,Y,Z,Q) sprintf( traces[tc++ % TRACE_BUF_ELE], "t%d at %s %d,%d\n", X, Y, Z, Q );
1213
1214static void
1215__kmp_dump_queuing_lock( kmp_info_t *this_thr, kmp_int32 gtid,
1216 kmp_queuing_lock_t *lck, kmp_int32 head_id, kmp_int32 tail_id )
1217{
1218 kmp_int32 t, i;
1219
1220 __kmp_printf_no_lock( "\n__kmp_dump_queuing_lock: TRACE BEGINS HERE! \n" );
1221
1222 i = tc % TRACE_BUF_ELE;
1223 __kmp_printf_no_lock( "%s\n", traces[i] );
1224 i = (i+1) % TRACE_BUF_ELE;
1225 while ( i != (tc % TRACE_BUF_ELE) ) {
1226 __kmp_printf_no_lock( "%s", traces[i] );
1227 i = (i+1) % TRACE_BUF_ELE;
1228 }
1229 __kmp_printf_no_lock( "\n" );
1230
1231 __kmp_printf_no_lock(
1232 "\n__kmp_dump_queuing_lock: gtid+1:%d, spin_here:%d, next_wait:%d, head_id:%d, tail_id:%d\n",
1233 gtid+1, this_thr->th.th_spin_here, this_thr->th.th_next_waiting,
1234 head_id, tail_id );
1235
1236 __kmp_printf_no_lock( "\t\thead: %d ", lck->lk.head_id );
1237
1238 if ( lck->lk.head_id >= 1 ) {
1239 t = __kmp_threads[lck->lk.head_id-1]->th.th_next_waiting;
1240 while (t > 0) {
1241 __kmp_printf_no_lock( "-> %d ", t );
1242 t = __kmp_threads[t-1]->th.th_next_waiting;
1243 }
1244 }
1245 __kmp_printf_no_lock( "; tail: %d ", lck->lk.tail_id );
1246 __kmp_printf_no_lock( "\n\n" );
1247}
1248
1249#endif /* DEBUG_QUEUING_LOCKS */
1250
1251static kmp_int32
1252__kmp_get_queuing_lock_owner( kmp_queuing_lock_t *lck )
1253{
1254 return TCR_4( lck->lk.owner_id ) - 1;
1255}
1256
1257static inline bool
1258__kmp_is_queuing_lock_nestable( kmp_queuing_lock_t *lck )
1259{
1260 return lck->lk.depth_locked != -1;
1261}
1262
1263/* Acquire a lock using a the queuing lock implementation */
1264template <bool takeTime>
1265/* [TLW] The unused template above is left behind because of what BEB believes is a
1266 potential compiler problem with __forceinline. */
1267__forceinline static void
1268__kmp_acquire_queuing_lock_timed_template( kmp_queuing_lock_t *lck,
1269 kmp_int32 gtid )
1270{
1271 register kmp_info_t *this_thr = __kmp_thread_from_gtid( gtid );
1272 volatile kmp_int32 *head_id_p = & lck->lk.head_id;
1273 volatile kmp_int32 *tail_id_p = & lck->lk.tail_id;
1274 volatile kmp_uint32 *spin_here_p;
1275 kmp_int32 need_mf = 1;
1276
1277 KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d entering\n", lck, gtid ));
1278
1279 KMP_FSYNC_PREPARE( lck );
1280 KMP_DEBUG_ASSERT( this_thr != NULL );
1281 spin_here_p = & this_thr->th.th_spin_here;
1282
1283#ifdef DEBUG_QUEUING_LOCKS
1284 TRACE_LOCK( gtid+1, "acq ent" );
1285 if ( *spin_here_p )
1286 __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1287 if ( this_thr->th.th_next_waiting != 0 )
1288 __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1289#endif
1290 KMP_DEBUG_ASSERT( !*spin_here_p );
1291 KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 );
1292
1293
1294 /* The following st.rel to spin_here_p needs to precede the cmpxchg.acq to head_id_p
1295 that may follow, not just in execution order, but also in visibility order. This way,
1296 when a releasing thread observes the changes to the queue by this thread, it can
1297 rightly assume that spin_here_p has already been set to TRUE, so that when it sets
1298 spin_here_p to FALSE, it is not premature. If the releasing thread sets spin_here_p
1299 to FALSE before this thread sets it to TRUE, this thread will hang.
1300 */
1301 *spin_here_p = TRUE; /* before enqueuing to prevent race */
1302
1303 while( 1 ) {
1304 kmp_int32 enqueued;
1305 kmp_int32 head;
1306 kmp_int32 tail;
1307
1308 head = *head_id_p;
1309
1310 switch ( head ) {
1311
1312 case -1:
1313 {
1314#ifdef DEBUG_QUEUING_LOCKS
1315 tail = *tail_id_p;
1316 TRACE_LOCK_HT( gtid+1, "acq read: ", head, tail );
1317#endif
1318 tail = 0; /* to make sure next link asynchronously read is not set accidentally;
1319 this assignment prevents us from entering the if ( t > 0 )
1320 condition in the enqueued case below, which is not necessary for
1321 this state transition */
1322
1323 need_mf = 0;
1324 /* try (-1,0)->(tid,tid) */
1325 enqueued = KMP_COMPARE_AND_STORE_ACQ64( (volatile kmp_int64 *) tail_id_p,
1326 KMP_PACK_64( -1, 0 ),
1327 KMP_PACK_64( gtid+1, gtid+1 ) );
1328#ifdef DEBUG_QUEUING_LOCKS
1329 if ( enqueued ) TRACE_LOCK( gtid+1, "acq enq: (-1,0)->(tid,tid)" );
1330#endif
1331 }
1332 break;
1333
1334 default:
1335 {
1336 tail = *tail_id_p;
1337 KMP_DEBUG_ASSERT( tail != gtid + 1 );
1338
1339#ifdef DEBUG_QUEUING_LOCKS
1340 TRACE_LOCK_HT( gtid+1, "acq read: ", head, tail );
1341#endif
1342
1343 if ( tail == 0 ) {
1344 enqueued = FALSE;
1345 }
1346 else {
1347 need_mf = 0;
1348 /* try (h,t) or (h,h)->(h,tid) */
1349 enqueued = KMP_COMPARE_AND_STORE_ACQ32( tail_id_p, tail, gtid+1 );
1350
1351#ifdef DEBUG_QUEUING_LOCKS
1352 if ( enqueued ) TRACE_LOCK( gtid+1, "acq enq: (h,t)->(h,tid)" );
1353#endif
1354 }
1355 }
1356 break;
1357
1358 case 0: /* empty queue */
1359 {
1360 kmp_int32 grabbed_lock;
1361
1362#ifdef DEBUG_QUEUING_LOCKS
1363 tail = *tail_id_p;
1364 TRACE_LOCK_HT( gtid+1, "acq read: ", head, tail );
1365#endif
1366 /* try (0,0)->(-1,0) */
1367
1368 /* only legal transition out of head = 0 is head = -1 with no change to tail */
1369 grabbed_lock = KMP_COMPARE_AND_STORE_ACQ32( head_id_p, 0, -1 );
1370
1371 if ( grabbed_lock ) {
1372
1373 *spin_here_p = FALSE;
1374
1375 KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: no queuing\n",
1376 lck, gtid ));
1377#ifdef DEBUG_QUEUING_LOCKS
1378 TRACE_LOCK_HT( gtid+1, "acq exit: ", head, 0 );
1379#endif
1380 KMP_FSYNC_ACQUIRED( lck );
1381 return; /* lock holder cannot be on queue */
1382 }
1383 enqueued = FALSE;
1384 }
1385 break;
1386 }
1387
1388 if ( enqueued ) {
1389 if ( tail > 0 ) {
1390 kmp_info_t *tail_thr = __kmp_thread_from_gtid( tail - 1 );
1391 KMP_ASSERT( tail_thr != NULL );
1392 tail_thr->th.th_next_waiting = gtid+1;
1393 /* corresponding wait for this write in release code */
1394 }
1395 KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d waiting for lock\n", lck, gtid ));
1396
1397
1398 /* ToDo: May want to consider using __kmp_wait_sleep or something that sleeps for
1399 * throughput only here.
1400 */
1401 KMP_MB();
1402 KMP_WAIT_YIELD(spin_here_p, FALSE, KMP_EQ, lck);
1403
1404#ifdef DEBUG_QUEUING_LOCKS
1405 TRACE_LOCK( gtid+1, "acq spin" );
1406
1407 if ( this_thr->th.th_next_waiting != 0 )
1408 __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1409#endif
1410 KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 );
1411 KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: after waiting on queue\n",
1412 lck, gtid ));
1413
1414#ifdef DEBUG_QUEUING_LOCKS
1415 TRACE_LOCK( gtid+1, "acq exit 2" );
1416#endif
1417 /* got lock, we were dequeued by the thread that released lock */
1418 return;
1419 }
1420
1421 /* Yield if number of threads > number of logical processors */
1422 /* ToDo: Not sure why this should only be in oversubscription case,
1423 maybe should be traditional YIELD_INIT/YIELD_WHEN loop */
1424 KMP_YIELD( TCR_4( __kmp_nth ) > (__kmp_avail_proc ? __kmp_avail_proc :
1425 __kmp_xproc ) );
1426#ifdef DEBUG_QUEUING_LOCKS
1427 TRACE_LOCK( gtid+1, "acq retry" );
1428#endif
1429
1430 }
1431 KMP_ASSERT2( 0, "should not get here" );
1432}
1433
1434void
1435__kmp_acquire_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1436{
1437 KMP_DEBUG_ASSERT( gtid >= 0 );
1438
1439 __kmp_acquire_queuing_lock_timed_template<false>( lck, gtid );
1440}
1441
1442static void
1443__kmp_acquire_queuing_lock_with_checks( kmp_queuing_lock_t *lck,
1444 kmp_int32 gtid )
1445{
1446 if ( __kmp_env_consistency_check ) {
1447 char const * const func = "omp_set_lock";
1448 if ( lck->lk.initialized != lck ) {
1449 KMP_FATAL( LockIsUninitialized, func );
1450 }
1451 if ( __kmp_is_queuing_lock_nestable( lck ) ) {
1452 KMP_FATAL( LockNestableUsedAsSimple, func );
1453 }
1454 if ( __kmp_get_queuing_lock_owner( lck ) == gtid ) {
1455 KMP_FATAL( LockIsAlreadyOwned, func );
1456 }
1457 }
1458
1459 __kmp_acquire_queuing_lock( lck, gtid );
1460
1461 if ( __kmp_env_consistency_check ) {
1462 lck->lk.owner_id = gtid + 1;
1463 }
1464}
1465
1466int
1467__kmp_test_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1468{
1469 volatile kmp_int32 *head_id_p = & lck->lk.head_id;
1470 kmp_int32 head;
1471#ifdef KMP_DEBUG
1472 kmp_info_t *this_thr;
1473#endif
1474
1475 KA_TRACE( 1000, ("__kmp_test_queuing_lock: T#%d entering\n", gtid ));
1476 KMP_DEBUG_ASSERT( gtid >= 0 );
1477#ifdef KMP_DEBUG
1478 this_thr = __kmp_thread_from_gtid( gtid );
1479 KMP_DEBUG_ASSERT( this_thr != NULL );
1480 KMP_DEBUG_ASSERT( !this_thr->th.th_spin_here );
1481#endif
1482
1483 head = *head_id_p;
1484
1485 if ( head == 0 ) { /* nobody on queue, nobody holding */
1486
1487 /* try (0,0)->(-1,0) */
1488
1489 if ( KMP_COMPARE_AND_STORE_ACQ32( head_id_p, 0, -1 ) ) {
1490 KA_TRACE( 1000, ("__kmp_test_queuing_lock: T#%d exiting: holding lock\n", gtid ));
1491 KMP_FSYNC_ACQUIRED(lck);
1492 return TRUE;
1493 }
1494 }
1495
1496 KA_TRACE( 1000, ("__kmp_test_queuing_lock: T#%d exiting: without lock\n", gtid ));
1497 return FALSE;
1498}
1499
1500static int
1501__kmp_test_queuing_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1502{
1503 if ( __kmp_env_consistency_check ) {
1504 char const * const func = "omp_test_lock";
1505 if ( lck->lk.initialized != lck ) {
1506 KMP_FATAL( LockIsUninitialized, func );
1507 }
1508 if ( __kmp_is_queuing_lock_nestable( lck ) ) {
1509 KMP_FATAL( LockNestableUsedAsSimple, func );
1510 }
1511 }
1512
1513 int retval = __kmp_test_queuing_lock( lck, gtid );
1514
1515 if ( __kmp_env_consistency_check && retval ) {
1516 lck->lk.owner_id = gtid + 1;
1517 }
1518 return retval;
1519}
1520
1521void
1522__kmp_release_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1523{
1524 register kmp_info_t *this_thr;
1525 volatile kmp_int32 *head_id_p = & lck->lk.head_id;
1526 volatile kmp_int32 *tail_id_p = & lck->lk.tail_id;
1527
1528 KA_TRACE( 1000, ("__kmp_release_queuing_lock: lck:%p, T#%d entering\n", lck, gtid ));
1529 KMP_DEBUG_ASSERT( gtid >= 0 );
1530 this_thr = __kmp_thread_from_gtid( gtid );
1531 KMP_DEBUG_ASSERT( this_thr != NULL );
1532#ifdef DEBUG_QUEUING_LOCKS
1533 TRACE_LOCK( gtid+1, "rel ent" );
1534
1535 if ( this_thr->th.th_spin_here )
1536 __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1537 if ( this_thr->th.th_next_waiting != 0 )
1538 __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1539#endif
1540 KMP_DEBUG_ASSERT( !this_thr->th.th_spin_here );
1541 KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 );
1542
1543 KMP_FSYNC_RELEASING(lck);
1544
1545 while( 1 ) {
1546 kmp_int32 dequeued;
1547 kmp_int32 head;
1548 kmp_int32 tail;
1549
1550 head = *head_id_p;
1551
1552#ifdef DEBUG_QUEUING_LOCKS
1553 tail = *tail_id_p;
1554 TRACE_LOCK_HT( gtid+1, "rel read: ", head, tail );
1555 if ( head == 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail );
1556#endif
1557 KMP_DEBUG_ASSERT( head != 0 ); /* holding the lock, head must be -1 or queue head */
1558
1559 if ( head == -1 ) { /* nobody on queue */
1560
1561 /* try (-1,0)->(0,0) */
1562 if ( KMP_COMPARE_AND_STORE_REL32( head_id_p, -1, 0 ) ) {
1563 KA_TRACE( 1000, ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: queue empty\n",
1564 lck, gtid ));
1565#ifdef DEBUG_QUEUING_LOCKS
1566 TRACE_LOCK_HT( gtid+1, "rel exit: ", 0, 0 );
1567#endif
1568 return;
1569 }
1570 dequeued = FALSE;
1571
1572 }
1573 else {
1574
1575 tail = *tail_id_p;
1576 if ( head == tail ) { /* only one thread on the queue */
1577
1578#ifdef DEBUG_QUEUING_LOCKS
1579 if ( head <= 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail );
1580#endif
1581 KMP_DEBUG_ASSERT( head > 0 );
1582
1583 /* try (h,h)->(-1,0) */
1584 dequeued = KMP_COMPARE_AND_STORE_REL64( (kmp_int64 *) tail_id_p,
1585 KMP_PACK_64( head, head ), KMP_PACK_64( -1, 0 ) );
1586#ifdef DEBUG_QUEUING_LOCKS
1587 TRACE_LOCK( gtid+1, "rel deq: (h,h)->(-1,0)" );
1588#endif
1589
1590 }
1591 else {
1592 volatile kmp_int32 *waiting_id_p;
1593 kmp_info_t *head_thr = __kmp_thread_from_gtid( head - 1 );
1594 KMP_DEBUG_ASSERT( head_thr != NULL );
1595 waiting_id_p = & head_thr->th.th_next_waiting;
1596
1597 /* Does this require synchronous reads? */
1598#ifdef DEBUG_QUEUING_LOCKS
1599 if ( head <= 0 || tail <= 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail );
1600#endif
1601 KMP_DEBUG_ASSERT( head > 0 && tail > 0 );
1602
1603 /* try (h,t)->(h',t) or (t,t) */
1604
1605 KMP_MB();
1606 /* make sure enqueuing thread has time to update next waiting thread field */
1607 *head_id_p = (kmp_int32) KMP_WAIT_YIELD((volatile kmp_uint*) waiting_id_p, 0, KMP_NEQ, NULL);
1608#ifdef DEBUG_QUEUING_LOCKS
1609 TRACE_LOCK( gtid+1, "rel deq: (h,t)->(h',t)" );
1610#endif
1611 dequeued = TRUE;
1612 }
1613 }
1614
1615 if ( dequeued ) {
1616 kmp_info_t *head_thr = __kmp_thread_from_gtid( head - 1 );
1617 KMP_DEBUG_ASSERT( head_thr != NULL );
1618
1619 /* Does this require synchronous reads? */
1620#ifdef DEBUG_QUEUING_LOCKS
1621 if ( head <= 0 || tail <= 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail );
1622#endif
1623 KMP_DEBUG_ASSERT( head > 0 && tail > 0 );
1624
1625 /* For clean code only.
1626 * Thread not released until next statement prevents race with acquire code.
1627 */
1628 head_thr->th.th_next_waiting = 0;
1629#ifdef DEBUG_QUEUING_LOCKS
1630 TRACE_LOCK_T( gtid+1, "rel nw=0 for t=", head );
1631#endif
1632
1633 KMP_MB();
1634 /* reset spin value */
1635 head_thr->th.th_spin_here = FALSE;
1636
1637 KA_TRACE( 1000, ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: after dequeuing\n",
1638 lck, gtid ));
1639#ifdef DEBUG_QUEUING_LOCKS
1640 TRACE_LOCK( gtid+1, "rel exit 2" );
1641#endif
1642 return;
1643 }
1644 /* KMP_CPU_PAUSE( ); don't want to make releasing thread hold up acquiring threads */
1645
1646#ifdef DEBUG_QUEUING_LOCKS
1647 TRACE_LOCK( gtid+1, "rel retry" );
1648#endif
1649
1650 } /* while */
1651 KMP_ASSERT2( 0, "should not get here" );
1652}
1653
1654static void
1655__kmp_release_queuing_lock_with_checks( kmp_queuing_lock_t *lck,
1656 kmp_int32 gtid )
1657{
1658 if ( __kmp_env_consistency_check ) {
1659 char const * const func = "omp_unset_lock";
1660 KMP_MB(); /* in case another processor initialized lock */
1661 if ( lck->lk.initialized != lck ) {
1662 KMP_FATAL( LockIsUninitialized, func );
1663 }
1664 if ( __kmp_is_queuing_lock_nestable( lck ) ) {
1665 KMP_FATAL( LockNestableUsedAsSimple, func );
1666 }
1667 if ( __kmp_get_queuing_lock_owner( lck ) == -1 ) {
1668 KMP_FATAL( LockUnsettingFree, func );
1669 }
1670 if ( __kmp_get_queuing_lock_owner( lck ) != gtid ) {
1671 KMP_FATAL( LockUnsettingSetByAnother, func );
1672 }
1673 lck->lk.owner_id = 0;
1674 }
1675 __kmp_release_queuing_lock( lck, gtid );
1676}
1677
1678void
1679__kmp_init_queuing_lock( kmp_queuing_lock_t *lck )
1680{
1681 lck->lk.location = NULL;
1682 lck->lk.head_id = 0;
1683 lck->lk.tail_id = 0;
1684 lck->lk.next_ticket = 0;
1685 lck->lk.now_serving = 0;
1686 lck->lk.owner_id = 0; // no thread owns the lock.
1687 lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks.
1688 lck->lk.initialized = lck;
1689
1690 KA_TRACE(1000, ("__kmp_init_queuing_lock: lock %p initialized\n", lck));
1691}
1692
1693static void
1694__kmp_init_queuing_lock_with_checks( kmp_queuing_lock_t * lck )
1695{
1696 __kmp_init_queuing_lock( lck );
1697}
1698
1699void
1700__kmp_destroy_queuing_lock( kmp_queuing_lock_t *lck )
1701{
1702 lck->lk.initialized = NULL;
1703 lck->lk.location = NULL;
1704 lck->lk.head_id = 0;
1705 lck->lk.tail_id = 0;
1706 lck->lk.next_ticket = 0;
1707 lck->lk.now_serving = 0;
1708 lck->lk.owner_id = 0;
1709 lck->lk.depth_locked = -1;
1710}
1711
1712static void
1713__kmp_destroy_queuing_lock_with_checks( kmp_queuing_lock_t *lck )
1714{
1715 if ( __kmp_env_consistency_check ) {
1716 char const * const func = "omp_destroy_lock";
1717 if ( lck->lk.initialized != lck ) {
1718 KMP_FATAL( LockIsUninitialized, func );
1719 }
1720 if ( __kmp_is_queuing_lock_nestable( lck ) ) {
1721 KMP_FATAL( LockNestableUsedAsSimple, func );
1722 }
1723 if ( __kmp_get_queuing_lock_owner( lck ) != -1 ) {
1724 KMP_FATAL( LockStillOwned, func );
1725 }
1726 }
1727 __kmp_destroy_queuing_lock( lck );
1728}
1729
1730
1731//
1732// nested queuing locks
1733//
1734
1735void
1736__kmp_acquire_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1737{
1738 KMP_DEBUG_ASSERT( gtid >= 0 );
1739
1740 if ( __kmp_get_queuing_lock_owner( lck ) == gtid ) {
1741 lck->lk.depth_locked += 1;
1742 }
1743 else {
1744 __kmp_acquire_queuing_lock_timed_template<false>( lck, gtid );
1745 KMP_MB();
1746 lck->lk.depth_locked = 1;
1747 KMP_MB();
1748 lck->lk.owner_id = gtid + 1;
1749 }
1750}
1751
1752static void
1753__kmp_acquire_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1754{
1755 if ( __kmp_env_consistency_check ) {
1756 char const * const func = "omp_set_nest_lock";
1757 if ( lck->lk.initialized != lck ) {
1758 KMP_FATAL( LockIsUninitialized, func );
1759 }
1760 if ( ! __kmp_is_queuing_lock_nestable( lck ) ) {
1761 KMP_FATAL( LockSimpleUsedAsNestable, func );
1762 }
1763 }
1764 __kmp_acquire_nested_queuing_lock( lck, gtid );
1765}
1766
1767int
1768__kmp_test_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1769{
1770 int retval;
1771
1772 KMP_DEBUG_ASSERT( gtid >= 0 );
1773
1774 if ( __kmp_get_queuing_lock_owner( lck ) == gtid ) {
1775 retval = ++lck->lk.depth_locked;
1776 }
1777 else if ( !__kmp_test_queuing_lock( lck, gtid ) ) {
1778 retval = 0;
1779 }
1780 else {
1781 KMP_MB();
1782 retval = lck->lk.depth_locked = 1;
1783 KMP_MB();
1784 lck->lk.owner_id = gtid + 1;
1785 }
1786 return retval;
1787}
1788
1789static int
1790__kmp_test_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck,
1791 kmp_int32 gtid )
1792{
1793 if ( __kmp_env_consistency_check ) {
1794 char const * const func = "omp_test_nest_lock";
1795 if ( lck->lk.initialized != lck ) {
1796 KMP_FATAL( LockIsUninitialized, func );
1797 }
1798 if ( ! __kmp_is_queuing_lock_nestable( lck ) ) {
1799 KMP_FATAL( LockSimpleUsedAsNestable, func );
1800 }
1801 }
1802 return __kmp_test_nested_queuing_lock( lck, gtid );
1803}
1804
1805void
1806__kmp_release_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1807{
1808 KMP_DEBUG_ASSERT( gtid >= 0 );
1809
1810 KMP_MB();
1811 if ( --(lck->lk.depth_locked) == 0 ) {
1812 KMP_MB();
1813 lck->lk.owner_id = 0;
1814 __kmp_release_queuing_lock( lck, gtid );
1815 }
1816}
1817
1818static void
1819__kmp_release_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1820{
1821 if ( __kmp_env_consistency_check ) {
1822 char const * const func = "omp_unset_nest_lock";
1823 KMP_MB(); /* in case another processor initialized lock */
1824 if ( lck->lk.initialized != lck ) {
1825 KMP_FATAL( LockIsUninitialized, func );
1826 }
1827 if ( ! __kmp_is_queuing_lock_nestable( lck ) ) {
1828 KMP_FATAL( LockSimpleUsedAsNestable, func );
1829 }
1830 if ( __kmp_get_queuing_lock_owner( lck ) == -1 ) {
1831 KMP_FATAL( LockUnsettingFree, func );
1832 }
1833 if ( __kmp_get_queuing_lock_owner( lck ) != gtid ) {
1834 KMP_FATAL( LockUnsettingSetByAnother, func );
1835 }
1836 }
1837 __kmp_release_nested_queuing_lock( lck, gtid );
1838}
1839
1840void
1841__kmp_init_nested_queuing_lock( kmp_queuing_lock_t * lck )
1842{
1843 __kmp_init_queuing_lock( lck );
1844 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
1845}
1846
1847static void
1848__kmp_init_nested_queuing_lock_with_checks( kmp_queuing_lock_t * lck )
1849{
1850 __kmp_init_nested_queuing_lock( lck );
1851}
1852
1853void
1854__kmp_destroy_nested_queuing_lock( kmp_queuing_lock_t *lck )
1855{
1856 __kmp_destroy_queuing_lock( lck );
1857 lck->lk.depth_locked = 0;
1858}
1859
1860static void
1861__kmp_destroy_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck )
1862{
1863 if ( __kmp_env_consistency_check ) {
1864 char const * const func = "omp_destroy_nest_lock";
1865 if ( lck->lk.initialized != lck ) {
1866 KMP_FATAL( LockIsUninitialized, func );
1867 }
1868 if ( ! __kmp_is_queuing_lock_nestable( lck ) ) {
1869 KMP_FATAL( LockSimpleUsedAsNestable, func );
1870 }
1871 if ( __kmp_get_queuing_lock_owner( lck ) != -1 ) {
1872 KMP_FATAL( LockStillOwned, func );
1873 }
1874 }
1875 __kmp_destroy_nested_queuing_lock( lck );
1876}
1877
1878
1879//
1880// access functions to fields which don't exist for all lock kinds.
1881//
1882
1883static int
1884__kmp_is_queuing_lock_initialized( kmp_queuing_lock_t *lck )
1885{
1886 return lck == lck->lk.initialized;
1887}
1888
1889static const ident_t *
1890__kmp_get_queuing_lock_location( kmp_queuing_lock_t *lck )
1891{
1892 return lck->lk.location;
1893}
1894
1895static void
1896__kmp_set_queuing_lock_location( kmp_queuing_lock_t *lck, const ident_t *loc )
1897{
1898 lck->lk.location = loc;
1899}
1900
1901static kmp_lock_flags_t
1902__kmp_get_queuing_lock_flags( kmp_queuing_lock_t *lck )
1903{
1904 return lck->lk.flags;
1905}
1906
1907static void
1908__kmp_set_queuing_lock_flags( kmp_queuing_lock_t *lck, kmp_lock_flags_t flags )
1909{
1910 lck->lk.flags = flags;
1911}
1912
1913#if KMP_USE_ADAPTIVE_LOCKS
1914
1915/*
1916 RTM Adaptive locks
1917*/
1918
1919// TODO: Use the header for intrinsics below with the compiler 13.0
1920//#include <immintrin.h>
1921
1922// Values from the status register after failed speculation.
1923#define _XBEGIN_STARTED (~0u)
1924#define _XABORT_EXPLICIT (1 << 0)
1925#define _XABORT_RETRY (1 << 1)
1926#define _XABORT_CONFLICT (1 << 2)
1927#define _XABORT_CAPACITY (1 << 3)
1928#define _XABORT_DEBUG (1 << 4)
1929#define _XABORT_NESTED (1 << 5)
1930#define _XABORT_CODE(x) ((unsigned char)(((x) >> 24) & 0xFF))
1931
1932// Aborts for which it's worth trying again immediately
1933#define SOFT_ABORT_MASK (_XABORT_RETRY | _XABORT_CONFLICT | _XABORT_EXPLICIT)
1934
1935#define STRINGIZE_INTERNAL(arg) #arg
1936#define STRINGIZE(arg) STRINGIZE_INTERNAL(arg)
1937
1938// Access to RTM instructions
1939
1940/*
1941 A version of XBegin which returns -1 on speculation, and the value of EAX on an abort.
1942 This is the same definition as the compiler intrinsic that will be supported at some point.
1943*/
1944static __inline int _xbegin()
1945{
1946 int res = -1;
1947
1948#if KMP_OS_WINDOWS
1949#if KMP_ARCH_X86_64
1950 _asm {
1951 _emit 0xC7
1952 _emit 0xF8
1953 _emit 2
1954 _emit 0
1955 _emit 0
1956 _emit 0
1957 jmp L2
1958 mov res, eax
1959 L2:
1960 }
1961#else /* IA32 */
1962 _asm {
1963 _emit 0xC7
1964 _emit 0xF8
1965 _emit 2
1966 _emit 0
1967 _emit 0
1968 _emit 0
1969 jmp L2
1970 mov res, eax
1971 L2:
1972 }
1973#endif // KMP_ARCH_X86_64
1974#else
1975 /* Note that %eax must be noted as killed (clobbered), because
1976 * the XSR is returned in %eax(%rax) on abort. Other register
1977 * values are restored, so don't need to be killed.
1978 *
1979 * We must also mark 'res' as an input and an output, since otherwise
1980 * 'res=-1' may be dropped as being dead, whereas we do need the
1981 * assignment on the successful (i.e., non-abort) path.
1982 */
1983 __asm__ volatile ("1: .byte 0xC7; .byte 0xF8;\n"
1984 " .long 1f-1b-6\n"
1985 " jmp 2f\n"
1986 "1: movl %%eax,%0\n"
1987 "2:"
1988 :"+r"(res)::"memory","%eax");
1989#endif // KMP_OS_WINDOWS
1990 return res;
1991}
1992
1993/*
1994 Transaction end
1995*/
1996static __inline void _xend()
1997{
1998#if KMP_OS_WINDOWS
1999 __asm {
2000 _emit 0x0f
2001 _emit 0x01
2002 _emit 0xd5
2003 }
2004#else
2005 __asm__ volatile (".byte 0x0f; .byte 0x01; .byte 0xd5" :::"memory");
2006#endif
2007}
2008
2009/*
2010 This is a macro, the argument must be a single byte constant which
2011 can be evaluated by the inline assembler, since it is emitted as a
2012 byte into the assembly code.
2013*/
2014#if KMP_OS_WINDOWS
2015#define _xabort(ARG) \
2016 _asm _emit 0xc6 \
2017 _asm _emit 0xf8 \
2018 _asm _emit ARG
2019#else
2020#define _xabort(ARG) \
2021 __asm__ volatile (".byte 0xC6; .byte 0xF8; .byte " STRINGIZE(ARG) :::"memory");
2022#endif
2023
2024//
2025// Statistics is collected for testing purpose
2026//
2027#if KMP_DEBUG_ADAPTIVE_LOCKS
2028
2029// We accumulate speculative lock statistics when the lock is destroyed.
2030// We keep locks that haven't been destroyed in the liveLocks list
2031// so that we can grab their statistics too.
2032static kmp_adaptive_lock_statistics_t destroyedStats;
2033
2034// To hold the list of live locks.
2035static kmp_adaptive_lock_t liveLocks;
2036
2037// A lock so we can safely update the list of locks.
2038static kmp_bootstrap_lock_t chain_lock;
2039
2040// Initialize the list of stats.
2041void
2042__kmp_init_speculative_stats()
2043{
2044 kmp_adaptive_lock *lck = &liveLocks;
2045
2046 memset( ( void * ) & ( lck->stats ), 0, sizeof( lck->stats ) );
2047 lck->stats.next = lck;
2048 lck->stats.prev = lck;
2049
2050 KMP_ASSERT( lck->stats.next->stats.prev == lck );
2051 KMP_ASSERT( lck->stats.prev->stats.next == lck );
2052
2053 __kmp_init_bootstrap_lock( &chain_lock );
2054
2055}
2056
2057// Insert the lock into the circular list
2058static void
2059__kmp_remember_lock( kmp_adaptive_lock * lck )
2060{
2061 __kmp_acquire_bootstrap_lock( &chain_lock );
2062
2063 lck->stats.next = liveLocks.stats.next;
2064 lck->stats.prev = &liveLocks;
2065
2066 liveLocks.stats.next = lck;
2067 lck->stats.next->stats.prev = lck;
2068
2069 KMP_ASSERT( lck->stats.next->stats.prev == lck );
2070 KMP_ASSERT( lck->stats.prev->stats.next == lck );
2071
2072 __kmp_release_bootstrap_lock( &chain_lock );
2073}
2074
2075static void
2076__kmp_forget_lock( kmp_adaptive_lock * lck )
2077{
2078 KMP_ASSERT( lck->stats.next->stats.prev == lck );
2079 KMP_ASSERT( lck->stats.prev->stats.next == lck );
2080
2081 kmp_adaptive_lock * n = lck->stats.next;
2082 kmp_adaptive_lock * p = lck->stats.prev;
2083
2084 n->stats.prev = p;
2085 p->stats.next = n;
2086}
2087
2088static void
2089__kmp_zero_speculative_stats( kmp_adaptive_lock * lck )
2090{
2091 memset( ( void * )&lck->stats, 0, sizeof( lck->stats ) );
2092 __kmp_remember_lock( lck );
2093}
2094
2095static void
2096__kmp_add_stats( kmp_adaptive_lock_statistics_t * t, kmp_adaptive_lock_t * lck )
2097{
2098 kmp_adaptive_lock_statistics_t volatile *s = &lck->stats;
2099
2100 t->nonSpeculativeAcquireAttempts += lck->acquire_attempts;
2101 t->successfulSpeculations += s->successfulSpeculations;
2102 t->hardFailedSpeculations += s->hardFailedSpeculations;
2103 t->softFailedSpeculations += s->softFailedSpeculations;
2104 t->nonSpeculativeAcquires += s->nonSpeculativeAcquires;
2105 t->lemmingYields += s->lemmingYields;
2106}
2107
2108static void
2109__kmp_accumulate_speculative_stats( kmp_adaptive_lock * lck)
2110{
2111 kmp_adaptive_lock_statistics_t *t = &destroyedStats;
2112
2113 __kmp_acquire_bootstrap_lock( &chain_lock );
2114
2115 __kmp_add_stats( &destroyedStats, lck );
2116 __kmp_forget_lock( lck );
2117
2118 __kmp_release_bootstrap_lock( &chain_lock );
2119}
2120
2121static float
2122percent (kmp_uint32 count, kmp_uint32 total)
2123{
2124 return (total == 0) ? 0.0: (100.0 * count)/total;
2125}
2126
2127static
2128FILE * __kmp_open_stats_file()
2129{
2130 if (strcmp (__kmp_speculative_statsfile, "-") == 0)
2131 return stdout;
2132
2133 size_t buffLen = strlen( __kmp_speculative_statsfile ) + 20;
2134 char buffer[buffLen];
2135 snprintf (&buffer[0], buffLen, __kmp_speculative_statsfile, getpid());
2136 FILE * result = fopen(&buffer[0], "w");
2137
2138 // Maybe we should issue a warning here...
2139 return result ? result : stdout;
2140}
2141
2142void
2143__kmp_print_speculative_stats()
2144{
2145 if (__kmp_user_lock_kind != lk_adaptive)
2146 return;
2147
2148 FILE * statsFile = __kmp_open_stats_file();
2149
2150 kmp_adaptive_lock_statistics_t total = destroyedStats;
2151 kmp_adaptive_lock *lck;
2152
2153 for (lck = liveLocks.stats.next; lck != &liveLocks; lck = lck->stats.next) {
2154 __kmp_add_stats( &total, lck );
2155 }
2156 kmp_adaptive_lock_statistics_t *t = &total;
2157 kmp_uint32 totalSections = t->nonSpeculativeAcquires + t->successfulSpeculations;
2158 kmp_uint32 totalSpeculations = t->successfulSpeculations + t->hardFailedSpeculations +
2159 t->softFailedSpeculations;
2160
2161 fprintf ( statsFile, "Speculative lock statistics (all approximate!)\n");
2162 fprintf ( statsFile, " Lock parameters: \n"
2163 " max_soft_retries : %10d\n"
2164 " max_badness : %10d\n",
2165 __kmp_adaptive_backoff_params.max_soft_retries,
2166 __kmp_adaptive_backoff_params.max_badness);
2167 fprintf( statsFile, " Non-speculative acquire attempts : %10d\n", t->nonSpeculativeAcquireAttempts );
2168 fprintf( statsFile, " Total critical sections : %10d\n", totalSections );
2169 fprintf( statsFile, " Successful speculations : %10d (%5.1f%%)\n",
2170 t->successfulSpeculations, percent( t->successfulSpeculations, totalSections ) );
2171 fprintf( statsFile, " Non-speculative acquires : %10d (%5.1f%%)\n",
2172 t->nonSpeculativeAcquires, percent( t->nonSpeculativeAcquires, totalSections ) );
2173 fprintf( statsFile, " Lemming yields : %10d\n\n", t->lemmingYields );
2174
2175 fprintf( statsFile, " Speculative acquire attempts : %10d\n", totalSpeculations );
2176 fprintf( statsFile, " Successes : %10d (%5.1f%%)\n",
2177 t->successfulSpeculations, percent( t->successfulSpeculations, totalSpeculations ) );
2178 fprintf( statsFile, " Soft failures : %10d (%5.1f%%)\n",
2179 t->softFailedSpeculations, percent( t->softFailedSpeculations, totalSpeculations ) );
2180 fprintf( statsFile, " Hard failures : %10d (%5.1f%%)\n",
2181 t->hardFailedSpeculations, percent( t->hardFailedSpeculations, totalSpeculations ) );
2182
2183 if (statsFile != stdout)
2184 fclose( statsFile );
2185}
2186
2187# define KMP_INC_STAT(lck,stat) ( lck->lk.adaptive.stats.stat++ )
2188#else
2189# define KMP_INC_STAT(lck,stat)
2190
2191#endif // KMP_DEBUG_ADAPTIVE_LOCKS
2192
2193static inline bool
2194__kmp_is_unlocked_queuing_lock( kmp_queuing_lock_t *lck )
2195{
2196 // It is enough to check that the head_id is zero.
2197 // We don't also need to check the tail.
2198 bool res = lck->lk.head_id == 0;
2199
2200 // We need a fence here, since we must ensure that no memory operations
2201 // from later in this thread float above that read.
Jim Cownie181b4bb2013-12-23 17:28:57 +00002202#if KMP_COMPILER_ICC
Jim Cownie5e8470a2013-09-27 10:38:44 +00002203 _mm_mfence();
Jim Cownie181b4bb2013-12-23 17:28:57 +00002204#else
2205 __sync_synchronize();
Jim Cownie5e8470a2013-09-27 10:38:44 +00002206#endif
2207
2208 return res;
2209}
2210
2211// Functions for manipulating the badness
2212static __inline void
2213__kmp_update_badness_after_success( kmp_queuing_lock_t *lck )
2214{
2215 // Reset the badness to zero so we eagerly try to speculate again
2216 lck->lk.adaptive.badness = 0;
2217 KMP_INC_STAT(lck,successfulSpeculations);
2218}
2219
2220// Create a bit mask with one more set bit.
2221static __inline void
2222__kmp_step_badness( kmp_queuing_lock_t *lck )
2223{
2224 kmp_uint32 newBadness = ( lck->lk.adaptive.badness << 1 ) | 1;
2225 if ( newBadness > lck->lk.adaptive.max_badness) {
2226 return;
2227 } else {
2228 lck->lk.adaptive.badness = newBadness;
2229 }
2230}
2231
2232// Check whether speculation should be attempted.
2233static __inline int
2234__kmp_should_speculate( kmp_queuing_lock_t *lck, kmp_int32 gtid )
2235{
2236 kmp_uint32 badness = lck->lk.adaptive.badness;
2237 kmp_uint32 attempts= lck->lk.adaptive.acquire_attempts;
2238 int res = (attempts & badness) == 0;
2239 return res;
2240}
2241
2242// Attempt to acquire only the speculative lock.
2243// Does not back off to the non-speculative lock.
2244//
2245static int
2246__kmp_test_adaptive_lock_only( kmp_queuing_lock_t * lck, kmp_int32 gtid )
2247{
2248 int retries = lck->lk.adaptive.max_soft_retries;
2249
2250 // We don't explicitly count the start of speculation, rather we record
2251 // the results (success, hard fail, soft fail). The sum of all of those
2252 // is the total number of times we started speculation since all
2253 // speculations must end one of those ways.
2254 do
2255 {
2256 kmp_uint32 status = _xbegin();
2257 // Switch this in to disable actual speculation but exercise
2258 // at least some of the rest of the code. Useful for debugging...
2259 // kmp_uint32 status = _XABORT_NESTED;
2260
2261 if (status == _XBEGIN_STARTED )
2262 { /* We have successfully started speculation
2263 * Check that no-one acquired the lock for real between when we last looked
2264 * and now. This also gets the lock cache line into our read-set,
2265 * which we need so that we'll abort if anyone later claims it for real.
2266 */
2267 if (! __kmp_is_unlocked_queuing_lock( lck ) )
2268 {
2269 // Lock is now visibly acquired, so someone beat us to it.
2270 // Abort the transaction so we'll restart from _xbegin with the
2271 // failure status.
2272 _xabort(0x01)
2273 KMP_ASSERT2( 0, "should not get here" );
2274 }
2275 return 1; // Lock has been acquired (speculatively)
2276 } else {
2277 // We have aborted, update the statistics
2278 if ( status & SOFT_ABORT_MASK)
2279 {
2280 KMP_INC_STAT(lck,softFailedSpeculations);
2281 // and loop round to retry.
2282 }
2283 else
2284 {
2285 KMP_INC_STAT(lck,hardFailedSpeculations);
2286 // Give up if we had a hard failure.
2287 break;
2288 }
2289 }
2290 } while( retries-- ); // Loop while we have retries, and didn't fail hard.
2291
2292 // Either we had a hard failure or we didn't succeed softly after
2293 // the full set of attempts, so back off the badness.
2294 __kmp_step_badness( lck );
2295 return 0;
2296}
2297
2298// Attempt to acquire the speculative lock, or back off to the non-speculative one
2299// if the speculative lock cannot be acquired.
2300// We can succeed speculatively, non-speculatively, or fail.
2301static int
2302__kmp_test_adaptive_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
2303{
2304 // First try to acquire the lock speculatively
2305 if ( __kmp_should_speculate( lck, gtid ) && __kmp_test_adaptive_lock_only( lck, gtid ) )
2306 return 1;
2307
2308 // Speculative acquisition failed, so try to acquire it non-speculatively.
2309 // Count the non-speculative acquire attempt
2310 lck->lk.adaptive.acquire_attempts++;
2311
2312 // Use base, non-speculative lock.
2313 if ( __kmp_test_queuing_lock( lck, gtid ) )
2314 {
2315 KMP_INC_STAT(lck,nonSpeculativeAcquires);
2316 return 1; // Lock is acquired (non-speculatively)
2317 }
2318 else
2319 {
2320 return 0; // Failed to acquire the lock, it's already visibly locked.
2321 }
2322}
2323
2324static int
2325__kmp_test_adaptive_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid )
2326{
2327 if ( __kmp_env_consistency_check ) {
2328 char const * const func = "omp_test_lock";
2329 if ( lck->lk.initialized != lck ) {
2330 KMP_FATAL( LockIsUninitialized, func );
2331 }
2332 }
2333
2334 int retval = __kmp_test_adaptive_lock( lck, gtid );
2335
2336 if ( __kmp_env_consistency_check && retval ) {
2337 lck->lk.owner_id = gtid + 1;
2338 }
2339 return retval;
2340}
2341
2342// Block until we can acquire a speculative, adaptive lock.
2343// We check whether we should be trying to speculate.
2344// If we should be, we check the real lock to see if it is free,
2345// and, if not, pause without attempting to acquire it until it is.
2346// Then we try the speculative acquire.
2347// This means that although we suffer from lemmings a little (
2348// because all we can't acquire the lock speculatively until
2349// the queue of threads waiting has cleared), we don't get into a
2350// state where we can never acquire the lock speculatively (because we
2351// force the queue to clear by preventing new arrivals from entering the
2352// queue).
2353// This does mean that when we're trying to break lemmings, the lock
2354// is no longer fair. However OpenMP makes no guarantee that its
2355// locks are fair, so this isn't a real problem.
2356static void
2357__kmp_acquire_adaptive_lock( kmp_queuing_lock_t * lck, kmp_int32 gtid )
2358{
2359 if ( __kmp_should_speculate( lck, gtid ) )
2360 {
2361 if ( __kmp_is_unlocked_queuing_lock( lck ) )
2362 {
2363 if ( __kmp_test_adaptive_lock_only( lck , gtid ) )
2364 return;
2365 // We tried speculation and failed, so give up.
2366 }
2367 else
2368 {
2369 // We can't try speculation until the lock is free, so we
2370 // pause here (without suspending on the queueing lock,
2371 // to allow it to drain, then try again.
2372 // All other threads will also see the same result for
2373 // shouldSpeculate, so will be doing the same if they
2374 // try to claim the lock from now on.
2375 while ( ! __kmp_is_unlocked_queuing_lock( lck ) )
2376 {
2377 KMP_INC_STAT(lck,lemmingYields);
2378 __kmp_yield (TRUE);
2379 }
2380
2381 if ( __kmp_test_adaptive_lock_only( lck, gtid ) )
2382 return;
2383 }
2384 }
2385
2386 // Speculative acquisition failed, so acquire it non-speculatively.
2387 // Count the non-speculative acquire attempt
2388 lck->lk.adaptive.acquire_attempts++;
2389
2390 __kmp_acquire_queuing_lock_timed_template<FALSE>( lck, gtid );
2391 // We have acquired the base lock, so count that.
2392 KMP_INC_STAT(lck,nonSpeculativeAcquires );
2393}
2394
2395static void
2396__kmp_acquire_adaptive_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid )
2397{
2398 if ( __kmp_env_consistency_check ) {
2399 char const * const func = "omp_set_lock";
2400 if ( lck->lk.initialized != lck ) {
2401 KMP_FATAL( LockIsUninitialized, func );
2402 }
2403 if ( __kmp_get_queuing_lock_owner( lck ) == gtid ) {
2404 KMP_FATAL( LockIsAlreadyOwned, func );
2405 }
2406 }
2407
2408 __kmp_acquire_adaptive_lock( lck, gtid );
2409
2410 if ( __kmp_env_consistency_check ) {
2411 lck->lk.owner_id = gtid + 1;
2412 }
2413}
2414
2415static void
2416__kmp_release_adaptive_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
2417{
2418 if ( __kmp_is_unlocked_queuing_lock( lck ) )
2419 { // If the lock doesn't look claimed we must be speculating.
2420 // (Or the user's code is buggy and they're releasing without locking;
2421 // if we had XTEST we'd be able to check that case...)
2422 _xend(); // Exit speculation
2423 __kmp_update_badness_after_success( lck );
2424 }
2425 else
2426 { // Since the lock *is* visibly locked we're not speculating,
2427 // so should use the underlying lock's release scheme.
2428 __kmp_release_queuing_lock( lck, gtid );
2429 }
2430}
2431
2432static void
2433__kmp_release_adaptive_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid )
2434{
2435 if ( __kmp_env_consistency_check ) {
2436 char const * const func = "omp_unset_lock";
2437 KMP_MB(); /* in case another processor initialized lock */
2438 if ( lck->lk.initialized != lck ) {
2439 KMP_FATAL( LockIsUninitialized, func );
2440 }
2441 if ( __kmp_get_queuing_lock_owner( lck ) == -1 ) {
2442 KMP_FATAL( LockUnsettingFree, func );
2443 }
2444 if ( __kmp_get_queuing_lock_owner( lck ) != gtid ) {
2445 KMP_FATAL( LockUnsettingSetByAnother, func );
2446 }
2447 lck->lk.owner_id = 0;
2448 }
2449 __kmp_release_adaptive_lock( lck, gtid );
2450}
2451
2452static void
2453__kmp_init_adaptive_lock( kmp_queuing_lock_t *lck )
2454{
2455 __kmp_init_queuing_lock( lck );
2456 lck->lk.adaptive.badness = 0;
2457 lck->lk.adaptive.acquire_attempts = 0; //nonSpeculativeAcquireAttempts = 0;
2458 lck->lk.adaptive.max_soft_retries = __kmp_adaptive_backoff_params.max_soft_retries;
2459 lck->lk.adaptive.max_badness = __kmp_adaptive_backoff_params.max_badness;
2460#if KMP_DEBUG_ADAPTIVE_LOCKS
2461 __kmp_zero_speculative_stats( &lck->lk.adaptive );
2462#endif
2463 KA_TRACE(1000, ("__kmp_init_adaptive_lock: lock %p initialized\n", lck));
2464}
2465
2466static void
2467__kmp_init_adaptive_lock_with_checks( kmp_queuing_lock_t * lck )
2468{
2469 __kmp_init_adaptive_lock( lck );
2470}
2471
2472static void
2473__kmp_destroy_adaptive_lock( kmp_queuing_lock_t *lck )
2474{
2475#if KMP_DEBUG_ADAPTIVE_LOCKS
2476 __kmp_accumulate_speculative_stats( &lck->lk.adaptive );
2477#endif
2478 __kmp_destroy_queuing_lock (lck);
2479 // Nothing needed for the speculative part.
2480}
2481
2482static void
2483__kmp_destroy_adaptive_lock_with_checks( kmp_queuing_lock_t *lck )
2484{
2485 if ( __kmp_env_consistency_check ) {
2486 char const * const func = "omp_destroy_lock";
2487 if ( lck->lk.initialized != lck ) {
2488 KMP_FATAL( LockIsUninitialized, func );
2489 }
2490 if ( __kmp_get_queuing_lock_owner( lck ) != -1 ) {
2491 KMP_FATAL( LockStillOwned, func );
2492 }
2493 }
2494 __kmp_destroy_adaptive_lock( lck );
2495}
2496
2497
2498#endif // KMP_USE_ADAPTIVE_LOCKS
2499
2500
2501/* ------------------------------------------------------------------------ */
2502/* DRDPA ticket locks */
2503/* "DRDPA" means Dynamically Reconfigurable Distributed Polling Area */
2504
2505static kmp_int32
2506__kmp_get_drdpa_lock_owner( kmp_drdpa_lock_t *lck )
2507{
2508 return TCR_4( lck->lk.owner_id ) - 1;
2509}
2510
2511static inline bool
2512__kmp_is_drdpa_lock_nestable( kmp_drdpa_lock_t *lck )
2513{
2514 return lck->lk.depth_locked != -1;
2515}
2516
2517__forceinline static void
2518__kmp_acquire_drdpa_lock_timed_template( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2519{
2520 kmp_uint64 ticket = KMP_TEST_THEN_INC64((kmp_int64 *)&lck->lk.next_ticket);
2521 kmp_uint64 mask = TCR_8(lck->lk.mask); // volatile load
2522 volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls
2523 = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2524 TCR_PTR(lck->lk.polls); // volatile load
2525
2526#ifdef USE_LOCK_PROFILE
2527 if (TCR_8(polls[ticket & mask].poll) != ticket)
2528 __kmp_printf("LOCK CONTENTION: %p\n", lck);
2529 /* else __kmp_printf( "." );*/
2530#endif /* USE_LOCK_PROFILE */
2531
2532 //
2533 // Now spin-wait, but reload the polls pointer and mask, in case the
2534 // polling area has been reconfigured. Unless it is reconfigured, the
2535 // reloads stay in L1 cache and are cheap.
2536 //
2537 // Keep this code in sync with KMP_WAIT_YIELD, in kmp_dispatch.c !!!
2538 //
2539 // The current implementation of KMP_WAIT_YIELD doesn't allow for mask
2540 // and poll to be re-read every spin iteration.
2541 //
2542 kmp_uint32 spins;
2543
2544 KMP_FSYNC_PREPARE(lck);
2545 KMP_INIT_YIELD(spins);
2546 while (TCR_8(polls[ticket & mask]).poll < ticket) { // volatile load
2547 __kmp_static_delay(TRUE);
2548
2549 //
2550 // If we are oversubscribed,
Alp Toker8f2d3f02014-02-24 10:40:15 +00002551 // or have waited a bit (and KMP_LIBRARY=turnaround), then yield.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002552 // CPU Pause is in the macros for yield.
2553 //
2554 KMP_YIELD(TCR_4(__kmp_nth)
2555 > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc));
2556 KMP_YIELD_SPIN(spins);
2557
2558 // Re-read the mask and the poll pointer from the lock structure.
2559 //
2560 // Make certain that "mask" is read before "polls" !!!
2561 //
2562 // If another thread picks reconfigures the polling area and updates
2563 // their values, and we get the new value of mask and the old polls
2564 // pointer, we could access memory beyond the end of the old polling
2565 // area.
2566 //
2567 mask = TCR_8(lck->lk.mask); // volatile load
2568 polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2569 TCR_PTR(lck->lk.polls); // volatile load
2570 }
2571
2572 //
2573 // Critical section starts here
2574 //
2575 KMP_FSYNC_ACQUIRED(lck);
2576 KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld acquired lock %p\n",
2577 ticket, lck));
2578 lck->lk.now_serving = ticket; // non-volatile store
2579
2580 //
2581 // Deallocate a garbage polling area if we know that we are the last
2582 // thread that could possibly access it.
2583 //
2584 // The >= check is in case __kmp_test_drdpa_lock() allocated the cleanup
2585 // ticket.
2586 //
2587 if ((lck->lk.old_polls != NULL) && (ticket >= lck->lk.cleanup_ticket)) {
2588 __kmp_free((void *)lck->lk.old_polls);
2589 lck->lk.old_polls = NULL;
2590 lck->lk.cleanup_ticket = 0;
2591 }
2592
2593 //
2594 // Check to see if we should reconfigure the polling area.
2595 // If there is still a garbage polling area to be deallocated from a
2596 // previous reconfiguration, let a later thread reconfigure it.
2597 //
2598 if (lck->lk.old_polls == NULL) {
2599 bool reconfigure = false;
2600 volatile struct kmp_base_drdpa_lock::kmp_lock_poll *old_polls = polls;
2601 kmp_uint32 num_polls = TCR_4(lck->lk.num_polls);
2602
2603 if (TCR_4(__kmp_nth)
2604 > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) {
2605 //
2606 // We are in oversubscription mode. Contract the polling area
2607 // down to a single location, if that hasn't been done already.
2608 //
2609 if (num_polls > 1) {
2610 reconfigure = true;
2611 num_polls = TCR_4(lck->lk.num_polls);
2612 mask = 0;
2613 num_polls = 1;
2614 polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2615 __kmp_allocate(num_polls * sizeof(*polls));
2616 polls[0].poll = ticket;
2617 }
2618 }
2619 else {
2620 //
2621 // We are in under/fully subscribed mode. Check the number of
2622 // threads waiting on the lock. The size of the polling area
2623 // should be at least the number of threads waiting.
2624 //
2625 kmp_uint64 num_waiting = TCR_8(lck->lk.next_ticket) - ticket - 1;
2626 if (num_waiting > num_polls) {
2627 kmp_uint32 old_num_polls = num_polls;
2628 reconfigure = true;
2629 do {
2630 mask = (mask << 1) | 1;
2631 num_polls *= 2;
2632 } while (num_polls <= num_waiting);
2633
2634 //
2635 // Allocate the new polling area, and copy the relevant portion
2636 // of the old polling area to the new area. __kmp_allocate()
2637 // zeroes the memory it allocates, and most of the old area is
2638 // just zero padding, so we only copy the release counters.
2639 //
2640 polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2641 __kmp_allocate(num_polls * sizeof(*polls));
2642 kmp_uint32 i;
2643 for (i = 0; i < old_num_polls; i++) {
2644 polls[i].poll = old_polls[i].poll;
2645 }
2646 }
2647 }
2648
2649 if (reconfigure) {
2650 //
2651 // Now write the updated fields back to the lock structure.
2652 //
2653 // Make certain that "polls" is written before "mask" !!!
2654 //
2655 // If another thread picks up the new value of mask and the old
2656 // polls pointer , it could access memory beyond the end of the
2657 // old polling area.
2658 //
2659 // On x86, we need memory fences.
2660 //
2661 KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld reconfiguring lock %p to %d polls\n",
2662 ticket, lck, num_polls));
2663
2664 lck->lk.old_polls = old_polls; // non-volatile store
2665 lck->lk.polls = polls; // volatile store
2666
2667 KMP_MB();
2668
2669 lck->lk.num_polls = num_polls; // non-volatile store
2670 lck->lk.mask = mask; // volatile store
2671
2672 KMP_MB();
2673
2674 //
2675 // Only after the new polling area and mask have been flushed
2676 // to main memory can we update the cleanup ticket field.
2677 //
2678 // volatile load / non-volatile store
2679 //
2680 lck->lk.cleanup_ticket = TCR_8(lck->lk.next_ticket);
2681 }
2682 }
2683}
2684
2685void
2686__kmp_acquire_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2687{
2688 __kmp_acquire_drdpa_lock_timed_template( lck, gtid );
2689}
2690
2691static void
2692__kmp_acquire_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2693{
2694 if ( __kmp_env_consistency_check ) {
2695 char const * const func = "omp_set_lock";
2696 if ( lck->lk.initialized != lck ) {
2697 KMP_FATAL( LockIsUninitialized, func );
2698 }
2699 if ( __kmp_is_drdpa_lock_nestable( lck ) ) {
2700 KMP_FATAL( LockNestableUsedAsSimple, func );
2701 }
2702 if ( ( gtid >= 0 ) && ( __kmp_get_drdpa_lock_owner( lck ) == gtid ) ) {
2703 KMP_FATAL( LockIsAlreadyOwned, func );
2704 }
2705 }
2706
2707 __kmp_acquire_drdpa_lock( lck, gtid );
2708
2709 if ( __kmp_env_consistency_check ) {
2710 lck->lk.owner_id = gtid + 1;
2711 }
2712}
2713
2714int
2715__kmp_test_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2716{
2717 //
2718 // First get a ticket, then read the polls pointer and the mask.
2719 // The polls pointer must be read before the mask!!! (See above)
2720 //
2721 kmp_uint64 ticket = TCR_8(lck->lk.next_ticket); // volatile load
2722 volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls
2723 = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2724 TCR_PTR(lck->lk.polls); // volatile load
2725 kmp_uint64 mask = TCR_8(lck->lk.mask); // volatile load
2726 if (TCR_8(polls[ticket & mask].poll) == ticket) {
2727 kmp_uint64 next_ticket = ticket + 1;
2728 if (KMP_COMPARE_AND_STORE_ACQ64((kmp_int64 *)&lck->lk.next_ticket,
2729 ticket, next_ticket)) {
2730 KMP_FSYNC_ACQUIRED(lck);
2731 KA_TRACE(1000, ("__kmp_test_drdpa_lock: ticket #%lld acquired lock %p\n",
2732 ticket, lck));
2733 lck->lk.now_serving = ticket; // non-volatile store
2734
2735 //
Alp Toker8f2d3f02014-02-24 10:40:15 +00002736 // Since no threads are waiting, there is no possibility that
Jim Cownie5e8470a2013-09-27 10:38:44 +00002737 // we would want to reconfigure the polling area. We might
2738 // have the cleanup ticket value (which says that it is now
2739 // safe to deallocate old_polls), but we'll let a later thread
2740 // which calls __kmp_acquire_lock do that - this routine
2741 // isn't supposed to block, and we would risk blocks if we
2742 // called __kmp_free() to do the deallocation.
2743 //
2744 return TRUE;
2745 }
2746 }
2747 return FALSE;
2748}
2749
2750static int
2751__kmp_test_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2752{
2753 if ( __kmp_env_consistency_check ) {
2754 char const * const func = "omp_test_lock";
2755 if ( lck->lk.initialized != lck ) {
2756 KMP_FATAL( LockIsUninitialized, func );
2757 }
2758 if ( __kmp_is_drdpa_lock_nestable( lck ) ) {
2759 KMP_FATAL( LockNestableUsedAsSimple, func );
2760 }
2761 }
2762
2763 int retval = __kmp_test_drdpa_lock( lck, gtid );
2764
2765 if ( __kmp_env_consistency_check && retval ) {
2766 lck->lk.owner_id = gtid + 1;
2767 }
2768 return retval;
2769}
2770
2771void
2772__kmp_release_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2773{
2774 //
2775 // Read the ticket value from the lock data struct, then the polls
2776 // pointer and the mask. The polls pointer must be read before the
2777 // mask!!! (See above)
2778 //
2779 kmp_uint64 ticket = lck->lk.now_serving + 1; // non-volatile load
2780 volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls
2781 = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2782 TCR_PTR(lck->lk.polls); // volatile load
2783 kmp_uint64 mask = TCR_8(lck->lk.mask); // volatile load
2784 KA_TRACE(1000, ("__kmp_release_drdpa_lock: ticket #%lld released lock %p\n",
2785 ticket - 1, lck));
2786 KMP_FSYNC_RELEASING(lck);
2787 KMP_ST_REL64(&(polls[ticket & mask].poll), ticket); // volatile store
2788}
2789
2790static void
2791__kmp_release_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2792{
2793 if ( __kmp_env_consistency_check ) {
2794 char const * const func = "omp_unset_lock";
2795 KMP_MB(); /* in case another processor initialized lock */
2796 if ( lck->lk.initialized != lck ) {
2797 KMP_FATAL( LockIsUninitialized, func );
2798 }
2799 if ( __kmp_is_drdpa_lock_nestable( lck ) ) {
2800 KMP_FATAL( LockNestableUsedAsSimple, func );
2801 }
2802 if ( __kmp_get_drdpa_lock_owner( lck ) == -1 ) {
2803 KMP_FATAL( LockUnsettingFree, func );
2804 }
2805 if ( ( gtid >= 0 ) && ( __kmp_get_drdpa_lock_owner( lck ) >= 0 )
2806 && ( __kmp_get_drdpa_lock_owner( lck ) != gtid ) ) {
2807 KMP_FATAL( LockUnsettingSetByAnother, func );
2808 }
2809 lck->lk.owner_id = 0;
2810 }
2811 __kmp_release_drdpa_lock( lck, gtid );
2812}
2813
2814void
2815__kmp_init_drdpa_lock( kmp_drdpa_lock_t *lck )
2816{
2817 lck->lk.location = NULL;
2818 lck->lk.mask = 0;
2819 lck->lk.num_polls = 1;
2820 lck->lk.polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2821 __kmp_allocate(lck->lk.num_polls * sizeof(*(lck->lk.polls)));
2822 lck->lk.cleanup_ticket = 0;
2823 lck->lk.old_polls = NULL;
2824 lck->lk.next_ticket = 0;
2825 lck->lk.now_serving = 0;
2826 lck->lk.owner_id = 0; // no thread owns the lock.
2827 lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks.
2828 lck->lk.initialized = lck;
2829
2830 KA_TRACE(1000, ("__kmp_init_drdpa_lock: lock %p initialized\n", lck));
2831}
2832
2833static void
2834__kmp_init_drdpa_lock_with_checks( kmp_drdpa_lock_t * lck )
2835{
2836 __kmp_init_drdpa_lock( lck );
2837}
2838
2839void
2840__kmp_destroy_drdpa_lock( kmp_drdpa_lock_t *lck )
2841{
2842 lck->lk.initialized = NULL;
2843 lck->lk.location = NULL;
2844 if (lck->lk.polls != NULL) {
2845 __kmp_free((void *)lck->lk.polls);
2846 lck->lk.polls = NULL;
2847 }
2848 if (lck->lk.old_polls != NULL) {
2849 __kmp_free((void *)lck->lk.old_polls);
2850 lck->lk.old_polls = NULL;
2851 }
2852 lck->lk.mask = 0;
2853 lck->lk.num_polls = 0;
2854 lck->lk.cleanup_ticket = 0;
2855 lck->lk.next_ticket = 0;
2856 lck->lk.now_serving = 0;
2857 lck->lk.owner_id = 0;
2858 lck->lk.depth_locked = -1;
2859}
2860
2861static void
2862__kmp_destroy_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck )
2863{
2864 if ( __kmp_env_consistency_check ) {
2865 char const * const func = "omp_destroy_lock";
2866 if ( lck->lk.initialized != lck ) {
2867 KMP_FATAL( LockIsUninitialized, func );
2868 }
2869 if ( __kmp_is_drdpa_lock_nestable( lck ) ) {
2870 KMP_FATAL( LockNestableUsedAsSimple, func );
2871 }
2872 if ( __kmp_get_drdpa_lock_owner( lck ) != -1 ) {
2873 KMP_FATAL( LockStillOwned, func );
2874 }
2875 }
2876 __kmp_destroy_drdpa_lock( lck );
2877}
2878
2879
2880//
2881// nested drdpa ticket locks
2882//
2883
2884void
2885__kmp_acquire_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2886{
2887 KMP_DEBUG_ASSERT( gtid >= 0 );
2888
2889 if ( __kmp_get_drdpa_lock_owner( lck ) == gtid ) {
2890 lck->lk.depth_locked += 1;
2891 }
2892 else {
2893 __kmp_acquire_drdpa_lock_timed_template( lck, gtid );
2894 KMP_MB();
2895 lck->lk.depth_locked = 1;
2896 KMP_MB();
2897 lck->lk.owner_id = gtid + 1;
2898 }
2899}
2900
2901static void
2902__kmp_acquire_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2903{
2904 if ( __kmp_env_consistency_check ) {
2905 char const * const func = "omp_set_nest_lock";
2906 if ( lck->lk.initialized != lck ) {
2907 KMP_FATAL( LockIsUninitialized, func );
2908 }
2909 if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) {
2910 KMP_FATAL( LockSimpleUsedAsNestable, func );
2911 }
2912 }
2913 __kmp_acquire_nested_drdpa_lock( lck, gtid );
2914}
2915
2916int
2917__kmp_test_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2918{
2919 int retval;
2920
2921 KMP_DEBUG_ASSERT( gtid >= 0 );
2922
2923 if ( __kmp_get_drdpa_lock_owner( lck ) == gtid ) {
2924 retval = ++lck->lk.depth_locked;
2925 }
2926 else if ( !__kmp_test_drdpa_lock( lck, gtid ) ) {
2927 retval = 0;
2928 }
2929 else {
2930 KMP_MB();
2931 retval = lck->lk.depth_locked = 1;
2932 KMP_MB();
2933 lck->lk.owner_id = gtid + 1;
2934 }
2935 return retval;
2936}
2937
2938static int
2939__kmp_test_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2940{
2941 if ( __kmp_env_consistency_check ) {
2942 char const * const func = "omp_test_nest_lock";
2943 if ( lck->lk.initialized != lck ) {
2944 KMP_FATAL( LockIsUninitialized, func );
2945 }
2946 if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) {
2947 KMP_FATAL( LockSimpleUsedAsNestable, func );
2948 }
2949 }
2950 return __kmp_test_nested_drdpa_lock( lck, gtid );
2951}
2952
2953void
2954__kmp_release_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2955{
2956 KMP_DEBUG_ASSERT( gtid >= 0 );
2957
2958 KMP_MB();
2959 if ( --(lck->lk.depth_locked) == 0 ) {
2960 KMP_MB();
2961 lck->lk.owner_id = 0;
2962 __kmp_release_drdpa_lock( lck, gtid );
2963 }
2964}
2965
2966static void
2967__kmp_release_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2968{
2969 if ( __kmp_env_consistency_check ) {
2970 char const * const func = "omp_unset_nest_lock";
2971 KMP_MB(); /* in case another processor initialized lock */
2972 if ( lck->lk.initialized != lck ) {
2973 KMP_FATAL( LockIsUninitialized, func );
2974 }
2975 if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) {
2976 KMP_FATAL( LockSimpleUsedAsNestable, func );
2977 }
2978 if ( __kmp_get_drdpa_lock_owner( lck ) == -1 ) {
2979 KMP_FATAL( LockUnsettingFree, func );
2980 }
2981 if ( __kmp_get_drdpa_lock_owner( lck ) != gtid ) {
2982 KMP_FATAL( LockUnsettingSetByAnother, func );
2983 }
2984 }
2985 __kmp_release_nested_drdpa_lock( lck, gtid );
2986}
2987
2988void
2989__kmp_init_nested_drdpa_lock( kmp_drdpa_lock_t * lck )
2990{
2991 __kmp_init_drdpa_lock( lck );
2992 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
2993}
2994
2995static void
2996__kmp_init_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t * lck )
2997{
2998 __kmp_init_nested_drdpa_lock( lck );
2999}
3000
3001void
3002__kmp_destroy_nested_drdpa_lock( kmp_drdpa_lock_t *lck )
3003{
3004 __kmp_destroy_drdpa_lock( lck );
3005 lck->lk.depth_locked = 0;
3006}
3007
3008static void
3009__kmp_destroy_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck )
3010{
3011 if ( __kmp_env_consistency_check ) {
3012 char const * const func = "omp_destroy_nest_lock";
3013 if ( lck->lk.initialized != lck ) {
3014 KMP_FATAL( LockIsUninitialized, func );
3015 }
3016 if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) {
3017 KMP_FATAL( LockSimpleUsedAsNestable, func );
3018 }
3019 if ( __kmp_get_drdpa_lock_owner( lck ) != -1 ) {
3020 KMP_FATAL( LockStillOwned, func );
3021 }
3022 }
3023 __kmp_destroy_nested_drdpa_lock( lck );
3024}
3025
3026
3027//
3028// access functions to fields which don't exist for all lock kinds.
3029//
3030
3031static int
3032__kmp_is_drdpa_lock_initialized( kmp_drdpa_lock_t *lck )
3033{
3034 return lck == lck->lk.initialized;
3035}
3036
3037static const ident_t *
3038__kmp_get_drdpa_lock_location( kmp_drdpa_lock_t *lck )
3039{
3040 return lck->lk.location;
3041}
3042
3043static void
3044__kmp_set_drdpa_lock_location( kmp_drdpa_lock_t *lck, const ident_t *loc )
3045{
3046 lck->lk.location = loc;
3047}
3048
3049static kmp_lock_flags_t
3050__kmp_get_drdpa_lock_flags( kmp_drdpa_lock_t *lck )
3051{
3052 return lck->lk.flags;
3053}
3054
3055static void
3056__kmp_set_drdpa_lock_flags( kmp_drdpa_lock_t *lck, kmp_lock_flags_t flags )
3057{
3058 lck->lk.flags = flags;
3059}
3060
3061/* ------------------------------------------------------------------------ */
3062/* user locks
3063 *
3064 * They are implemented as a table of function pointers which are set to the
3065 * lock functions of the appropriate kind, once that has been determined.
3066 */
3067
3068enum kmp_lock_kind __kmp_user_lock_kind = lk_default;
3069
3070size_t __kmp_base_user_lock_size = 0;
3071size_t __kmp_user_lock_size = 0;
3072
3073kmp_int32 ( *__kmp_get_user_lock_owner_ )( kmp_user_lock_p lck ) = NULL;
3074void ( *__kmp_acquire_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
3075
3076int ( *__kmp_test_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
3077void ( *__kmp_release_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
3078void ( *__kmp_init_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL;
3079void ( *__kmp_destroy_user_lock_ )( kmp_user_lock_p lck ) = NULL;
3080void ( *__kmp_destroy_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL;
3081void ( *__kmp_acquire_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
3082
3083int ( *__kmp_test_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
3084void ( *__kmp_release_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
3085void ( *__kmp_init_nested_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL;
3086void ( *__kmp_destroy_nested_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL;
3087
3088int ( *__kmp_is_user_lock_initialized_ )( kmp_user_lock_p lck ) = NULL;
3089const ident_t * ( *__kmp_get_user_lock_location_ )( kmp_user_lock_p lck ) = NULL;
3090void ( *__kmp_set_user_lock_location_ )( kmp_user_lock_p lck, const ident_t *loc ) = NULL;
3091kmp_lock_flags_t ( *__kmp_get_user_lock_flags_ )( kmp_user_lock_p lck ) = NULL;
3092void ( *__kmp_set_user_lock_flags_ )( kmp_user_lock_p lck, kmp_lock_flags_t flags ) = NULL;
3093
3094void __kmp_set_user_lock_vptrs( kmp_lock_kind_t user_lock_kind )
3095{
3096 switch ( user_lock_kind ) {
3097 case lk_default:
3098 default:
3099 KMP_ASSERT( 0 );
3100
3101 case lk_tas: {
3102 __kmp_base_user_lock_size = sizeof( kmp_base_tas_lock_t );
3103 __kmp_user_lock_size = sizeof( kmp_tas_lock_t );
3104
3105 __kmp_get_user_lock_owner_ =
3106 ( kmp_int32 ( * )( kmp_user_lock_p ) )
3107 ( &__kmp_get_tas_lock_owner );
3108
3109 __kmp_acquire_user_lock_with_checks_ =
3110 ( void ( * )( kmp_user_lock_p, kmp_int32 ) )
3111 ( &__kmp_acquire_tas_lock_with_checks );
3112
3113 __kmp_test_user_lock_with_checks_ =
3114 ( int ( * )( kmp_user_lock_p, kmp_int32 ) )
3115 ( &__kmp_test_tas_lock_with_checks );
3116
3117 __kmp_release_user_lock_with_checks_ =
3118 ( void ( * )( kmp_user_lock_p, kmp_int32 ) )
3119 ( &__kmp_release_tas_lock_with_checks );
3120
3121 __kmp_init_user_lock_with_checks_ =
3122 ( void ( * )( kmp_user_lock_p ) )
3123 ( &__kmp_init_tas_lock_with_checks );
3124
3125 __kmp_destroy_user_lock_ =
3126 ( void ( * )( kmp_user_lock_p ) )
3127 ( &__kmp_destroy_tas_lock );
3128
3129 __kmp_destroy_user_lock_with_checks_ =
3130 ( void ( * )( kmp_user_lock_p ) )
3131 ( &__kmp_destroy_tas_lock_with_checks );
3132
3133 __kmp_acquire_nested_user_lock_with_checks_ =
3134 ( void ( * )( kmp_user_lock_p, kmp_int32 ) )
3135 ( &__kmp_acquire_nested_tas_lock_with_checks );
3136
3137 __kmp_test_nested_user_lock_with_checks_ =
3138 ( int ( * )( kmp_user_lock_p, kmp_int32 ) )
3139 ( &__kmp_test_nested_tas_lock_with_checks );
3140
3141 __kmp_release_nested_user_lock_with_checks_ =
3142 ( void ( * )( kmp_user_lock_p, kmp_int32 ) )
3143 ( &__kmp_release_nested_tas_lock_with_checks );
3144
3145 __kmp_init_nested_user_lock_with_checks_ =
3146 ( void ( * )( kmp_user_lock_p ) )
3147 ( &__kmp_init_nested_tas_lock_with_checks );
3148
3149 __kmp_destroy_nested_user_lock_with_checks_ =
3150 ( void ( * )( kmp_user_lock_p ) )
3151 ( &__kmp_destroy_nested_tas_lock_with_checks );
3152
3153 __kmp_is_user_lock_initialized_ =
3154 ( int ( * )( kmp_user_lock_p ) ) NULL;
3155
3156 __kmp_get_user_lock_location_ =
3157 ( const ident_t * ( * )( kmp_user_lock_p ) ) NULL;
3158
3159 __kmp_set_user_lock_location_ =
3160 ( void ( * )( kmp_user_lock_p, const ident_t * ) ) NULL;
3161
3162 __kmp_get_user_lock_flags_ =
3163 ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) NULL;
3164
3165 __kmp_set_user_lock_flags_ =
3166 ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) NULL;
3167 }
3168 break;
3169
Jim Cownie181b4bb2013-12-23 17:28:57 +00003170#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
Jim Cownie5e8470a2013-09-27 10:38:44 +00003171
3172 case lk_futex: {
3173 __kmp_base_user_lock_size = sizeof( kmp_base_futex_lock_t );
3174 __kmp_user_lock_size = sizeof( kmp_futex_lock_t );
3175
3176 __kmp_get_user_lock_owner_ =
3177 ( kmp_int32 ( * )( kmp_user_lock_p ) )
3178 ( &__kmp_get_futex_lock_owner );
3179
3180 __kmp_acquire_user_lock_with_checks_ =
3181 ( void ( * )( kmp_user_lock_p, kmp_int32 ) )
3182 ( &__kmp_acquire_futex_lock_with_checks );
3183
3184 __kmp_test_user_lock_with_checks_ =
3185 ( int ( * )( kmp_user_lock_p, kmp_int32 ) )
3186 ( &__kmp_test_futex_lock_with_checks );
3187
3188 __kmp_release_user_lock_with_checks_ =
3189 ( void ( * )( kmp_user_lock_p, kmp_int32 ) )
3190 ( &__kmp_release_futex_lock_with_checks );
3191
3192 __kmp_init_user_lock_with_checks_ =
3193 ( void ( * )( kmp_user_lock_p ) )
3194 ( &__kmp_init_futex_lock_with_checks );
3195
3196 __kmp_destroy_user_lock_ =
3197 ( void ( * )( kmp_user_lock_p ) )
3198 ( &__kmp_destroy_futex_lock );
3199
3200 __kmp_destroy_user_lock_with_checks_ =
3201 ( void ( * )( kmp_user_lock_p ) )
3202 ( &__kmp_destroy_futex_lock_with_checks );
3203
3204 __kmp_acquire_nested_user_lock_with_checks_ =
3205 ( void ( * )( kmp_user_lock_p, kmp_int32 ) )
3206 ( &__kmp_acquire_nested_futex_lock_with_checks );
3207
3208 __kmp_test_nested_user_lock_with_checks_ =
3209 ( int ( * )( kmp_user_lock_p, kmp_int32 ) )
3210 ( &__kmp_test_nested_futex_lock_with_checks );
3211
3212 __kmp_release_nested_user_lock_with_checks_ =
3213 ( void ( * )( kmp_user_lock_p, kmp_int32 ) )
3214 ( &__kmp_release_nested_futex_lock_with_checks );
3215
3216 __kmp_init_nested_user_lock_with_checks_ =
3217 ( void ( * )( kmp_user_lock_p ) )
3218 ( &__kmp_init_nested_futex_lock_with_checks );
3219
3220 __kmp_destroy_nested_user_lock_with_checks_ =
3221 ( void ( * )( kmp_user_lock_p ) )
3222 ( &__kmp_destroy_nested_futex_lock_with_checks );
3223
3224 __kmp_is_user_lock_initialized_ =
3225 ( int ( * )( kmp_user_lock_p ) ) NULL;
3226
3227 __kmp_get_user_lock_location_ =
3228 ( const ident_t * ( * )( kmp_user_lock_p ) ) NULL;
3229
3230 __kmp_set_user_lock_location_ =
3231 ( void ( * )( kmp_user_lock_p, const ident_t * ) ) NULL;
3232
3233 __kmp_get_user_lock_flags_ =
3234 ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) NULL;
3235
3236 __kmp_set_user_lock_flags_ =
3237 ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) NULL;
3238 }
3239 break;
3240
Jim Cownie181b4bb2013-12-23 17:28:57 +00003241#endif // KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
Jim Cownie5e8470a2013-09-27 10:38:44 +00003242
3243 case lk_ticket: {
3244 __kmp_base_user_lock_size = sizeof( kmp_base_ticket_lock_t );
3245 __kmp_user_lock_size = sizeof( kmp_ticket_lock_t );
3246
3247 __kmp_get_user_lock_owner_ =
3248 ( kmp_int32 ( * )( kmp_user_lock_p ) )
3249 ( &__kmp_get_ticket_lock_owner );
3250
3251 __kmp_acquire_user_lock_with_checks_ =
3252 ( void ( * )( kmp_user_lock_p, kmp_int32 ) )
3253 ( &__kmp_acquire_ticket_lock_with_checks );
3254
3255 __kmp_test_user_lock_with_checks_ =
3256 ( int ( * )( kmp_user_lock_p, kmp_int32 ) )
3257 ( &__kmp_test_ticket_lock_with_checks );
3258
3259 __kmp_release_user_lock_with_checks_ =
3260 ( void ( * )( kmp_user_lock_p, kmp_int32 ) )
3261 ( &__kmp_release_ticket_lock_with_checks );
3262
3263 __kmp_init_user_lock_with_checks_ =
3264 ( void ( * )( kmp_user_lock_p ) )
3265 ( &__kmp_init_ticket_lock_with_checks );
3266
3267 __kmp_destroy_user_lock_ =
3268 ( void ( * )( kmp_user_lock_p ) )
3269 ( &__kmp_destroy_ticket_lock );
3270
3271 __kmp_destroy_user_lock_with_checks_ =
3272 ( void ( * )( kmp_user_lock_p ) )
3273 ( &__kmp_destroy_ticket_lock_with_checks );
3274
3275 __kmp_acquire_nested_user_lock_with_checks_ =
3276 ( void ( * )( kmp_user_lock_p, kmp_int32 ) )
3277 ( &__kmp_acquire_nested_ticket_lock_with_checks );
3278
3279 __kmp_test_nested_user_lock_with_checks_ =
3280 ( int ( * )( kmp_user_lock_p, kmp_int32 ) )
3281 ( &__kmp_test_nested_ticket_lock_with_checks );
3282
3283 __kmp_release_nested_user_lock_with_checks_ =
3284 ( void ( * )( kmp_user_lock_p, kmp_int32 ) )
3285 ( &__kmp_release_nested_ticket_lock_with_checks );
3286
3287 __kmp_init_nested_user_lock_with_checks_ =
3288 ( void ( * )( kmp_user_lock_p ) )
3289 ( &__kmp_init_nested_ticket_lock_with_checks );
3290
3291 __kmp_destroy_nested_user_lock_with_checks_ =
3292 ( void ( * )( kmp_user_lock_p ) )
3293 ( &__kmp_destroy_nested_ticket_lock_with_checks );
3294
3295 __kmp_is_user_lock_initialized_ =
3296 ( int ( * )( kmp_user_lock_p ) )
3297 ( &__kmp_is_ticket_lock_initialized );
3298
3299 __kmp_get_user_lock_location_ =
3300 ( const ident_t * ( * )( kmp_user_lock_p ) )
3301 ( &__kmp_get_ticket_lock_location );
3302
3303 __kmp_set_user_lock_location_ =
3304 ( void ( * )( kmp_user_lock_p, const ident_t * ) )
3305 ( &__kmp_set_ticket_lock_location );
3306
3307 __kmp_get_user_lock_flags_ =
3308 ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) )
3309 ( &__kmp_get_ticket_lock_flags );
3310
3311 __kmp_set_user_lock_flags_ =
3312 ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) )
3313 ( &__kmp_set_ticket_lock_flags );
3314 }
3315 break;
3316
3317 case lk_queuing: {
3318 __kmp_base_user_lock_size = sizeof( kmp_base_queuing_lock_t );
3319 __kmp_user_lock_size = sizeof( kmp_queuing_lock_t );
3320
3321 __kmp_get_user_lock_owner_ =
3322 ( kmp_int32 ( * )( kmp_user_lock_p ) )
3323 ( &__kmp_get_queuing_lock_owner );
3324
3325 __kmp_acquire_user_lock_with_checks_ =
3326 ( void ( * )( kmp_user_lock_p, kmp_int32 ) )
3327 ( &__kmp_acquire_queuing_lock_with_checks );
3328
3329 __kmp_test_user_lock_with_checks_ =
3330 ( int ( * )( kmp_user_lock_p, kmp_int32 ) )
3331 ( &__kmp_test_queuing_lock_with_checks );
3332
3333 __kmp_release_user_lock_with_checks_ =
3334 ( void ( * )( kmp_user_lock_p, kmp_int32 ) )
3335 ( &__kmp_release_queuing_lock_with_checks );
3336
3337 __kmp_init_user_lock_with_checks_ =
3338 ( void ( * )( kmp_user_lock_p ) )
3339 ( &__kmp_init_queuing_lock_with_checks );
3340
3341 __kmp_destroy_user_lock_ =
3342 ( void ( * )( kmp_user_lock_p ) )
3343 ( &__kmp_destroy_queuing_lock );
3344
3345 __kmp_destroy_user_lock_with_checks_ =
3346 ( void ( * )( kmp_user_lock_p ) )
3347 ( &__kmp_destroy_queuing_lock_with_checks );
3348
3349 __kmp_acquire_nested_user_lock_with_checks_ =
3350 ( void ( * )( kmp_user_lock_p, kmp_int32 ) )
3351 ( &__kmp_acquire_nested_queuing_lock_with_checks );
3352
3353 __kmp_test_nested_user_lock_with_checks_ =
3354 ( int ( * )( kmp_user_lock_p, kmp_int32 ) )
3355 ( &__kmp_test_nested_queuing_lock_with_checks );
3356
3357 __kmp_release_nested_user_lock_with_checks_ =
3358 ( void ( * )( kmp_user_lock_p, kmp_int32 ) )
3359 ( &__kmp_release_nested_queuing_lock_with_checks );
3360
3361 __kmp_init_nested_user_lock_with_checks_ =
3362 ( void ( * )( kmp_user_lock_p ) )
3363 ( &__kmp_init_nested_queuing_lock_with_checks );
3364
3365 __kmp_destroy_nested_user_lock_with_checks_ =
3366 ( void ( * )( kmp_user_lock_p ) )
3367 ( &__kmp_destroy_nested_queuing_lock_with_checks );
3368
3369 __kmp_is_user_lock_initialized_ =
3370 ( int ( * )( kmp_user_lock_p ) )
3371 ( &__kmp_is_queuing_lock_initialized );
3372
3373 __kmp_get_user_lock_location_ =
3374 ( const ident_t * ( * )( kmp_user_lock_p ) )
3375 ( &__kmp_get_queuing_lock_location );
3376
3377 __kmp_set_user_lock_location_ =
3378 ( void ( * )( kmp_user_lock_p, const ident_t * ) )
3379 ( &__kmp_set_queuing_lock_location );
3380
3381 __kmp_get_user_lock_flags_ =
3382 ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) )
3383 ( &__kmp_get_queuing_lock_flags );
3384
3385 __kmp_set_user_lock_flags_ =
3386 ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) )
3387 ( &__kmp_set_queuing_lock_flags );
3388 }
3389 break;
3390
3391#if KMP_USE_ADAPTIVE_LOCKS
3392 case lk_adaptive: {
3393 __kmp_base_user_lock_size = sizeof( kmp_base_queuing_lock_t );
3394 __kmp_user_lock_size = sizeof( kmp_queuing_lock_t );
3395
3396 __kmp_get_user_lock_owner_ =
3397 ( kmp_int32 ( * )( kmp_user_lock_p ) )
3398 ( &__kmp_get_queuing_lock_owner );
3399
3400 __kmp_acquire_user_lock_with_checks_ =
3401 ( void ( * )( kmp_user_lock_p, kmp_int32 ) )
3402 ( &__kmp_acquire_adaptive_lock_with_checks );
3403
3404 __kmp_test_user_lock_with_checks_ =
3405 ( int ( * )( kmp_user_lock_p, kmp_int32 ) )
3406 ( &__kmp_test_adaptive_lock_with_checks );
3407
3408 __kmp_release_user_lock_with_checks_ =
3409 ( void ( * )( kmp_user_lock_p, kmp_int32 ) )
3410 ( &__kmp_release_adaptive_lock_with_checks );
3411
3412 __kmp_init_user_lock_with_checks_ =
3413 ( void ( * )( kmp_user_lock_p ) )
3414 ( &__kmp_init_adaptive_lock_with_checks );
3415
3416 __kmp_destroy_user_lock_with_checks_ =
3417 ( void ( * )( kmp_user_lock_p ) )
3418 ( &__kmp_destroy_adaptive_lock_with_checks );
3419
3420 __kmp_destroy_user_lock_ =
3421 ( void ( * )( kmp_user_lock_p ) )
3422 ( &__kmp_destroy_adaptive_lock );
3423
3424 __kmp_is_user_lock_initialized_ =
3425 ( int ( * )( kmp_user_lock_p ) )
3426 ( &__kmp_is_queuing_lock_initialized );
3427
3428 __kmp_get_user_lock_location_ =
3429 ( const ident_t * ( * )( kmp_user_lock_p ) )
3430 ( &__kmp_get_queuing_lock_location );
3431
3432 __kmp_set_user_lock_location_ =
3433 ( void ( * )( kmp_user_lock_p, const ident_t * ) )
3434 ( &__kmp_set_queuing_lock_location );
3435
3436 __kmp_get_user_lock_flags_ =
3437 ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) )
3438 ( &__kmp_get_queuing_lock_flags );
3439
3440 __kmp_set_user_lock_flags_ =
3441 ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) )
3442 ( &__kmp_set_queuing_lock_flags );
3443
3444 }
3445 break;
3446#endif // KMP_USE_ADAPTIVE_LOCKS
3447
3448 case lk_drdpa: {
3449 __kmp_base_user_lock_size = sizeof( kmp_base_drdpa_lock_t );
3450 __kmp_user_lock_size = sizeof( kmp_drdpa_lock_t );
3451
3452 __kmp_get_user_lock_owner_ =
3453 ( kmp_int32 ( * )( kmp_user_lock_p ) )
3454 ( &__kmp_get_drdpa_lock_owner );
3455
3456 __kmp_acquire_user_lock_with_checks_ =
3457 ( void ( * )( kmp_user_lock_p, kmp_int32 ) )
3458 ( &__kmp_acquire_drdpa_lock_with_checks );
3459
3460 __kmp_test_user_lock_with_checks_ =
3461 ( int ( * )( kmp_user_lock_p, kmp_int32 ) )
3462 ( &__kmp_test_drdpa_lock_with_checks );
3463
3464 __kmp_release_user_lock_with_checks_ =
3465 ( void ( * )( kmp_user_lock_p, kmp_int32 ) )
3466 ( &__kmp_release_drdpa_lock_with_checks );
3467
3468 __kmp_init_user_lock_with_checks_ =
3469 ( void ( * )( kmp_user_lock_p ) )
3470 ( &__kmp_init_drdpa_lock_with_checks );
3471
3472 __kmp_destroy_user_lock_ =
3473 ( void ( * )( kmp_user_lock_p ) )
3474 ( &__kmp_destroy_drdpa_lock );
3475
3476 __kmp_destroy_user_lock_with_checks_ =
3477 ( void ( * )( kmp_user_lock_p ) )
3478 ( &__kmp_destroy_drdpa_lock_with_checks );
3479
3480 __kmp_acquire_nested_user_lock_with_checks_ =
3481 ( void ( * )( kmp_user_lock_p, kmp_int32 ) )
3482 ( &__kmp_acquire_nested_drdpa_lock_with_checks );
3483
3484 __kmp_test_nested_user_lock_with_checks_ =
3485 ( int ( * )( kmp_user_lock_p, kmp_int32 ) )
3486 ( &__kmp_test_nested_drdpa_lock_with_checks );
3487
3488 __kmp_release_nested_user_lock_with_checks_ =
3489 ( void ( * )( kmp_user_lock_p, kmp_int32 ) )
3490 ( &__kmp_release_nested_drdpa_lock_with_checks );
3491
3492 __kmp_init_nested_user_lock_with_checks_ =
3493 ( void ( * )( kmp_user_lock_p ) )
3494 ( &__kmp_init_nested_drdpa_lock_with_checks );
3495
3496 __kmp_destroy_nested_user_lock_with_checks_ =
3497 ( void ( * )( kmp_user_lock_p ) )
3498 ( &__kmp_destroy_nested_drdpa_lock_with_checks );
3499
3500 __kmp_is_user_lock_initialized_ =
3501 ( int ( * )( kmp_user_lock_p ) )
3502 ( &__kmp_is_drdpa_lock_initialized );
3503
3504 __kmp_get_user_lock_location_ =
3505 ( const ident_t * ( * )( kmp_user_lock_p ) )
3506 ( &__kmp_get_drdpa_lock_location );
3507
3508 __kmp_set_user_lock_location_ =
3509 ( void ( * )( kmp_user_lock_p, const ident_t * ) )
3510 ( &__kmp_set_drdpa_lock_location );
3511
3512 __kmp_get_user_lock_flags_ =
3513 ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) )
3514 ( &__kmp_get_drdpa_lock_flags );
3515
3516 __kmp_set_user_lock_flags_ =
3517 ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) )
3518 ( &__kmp_set_drdpa_lock_flags );
3519 }
3520 break;
3521 }
3522}
3523
3524
3525// ----------------------------------------------------------------------------
3526// User lock table & lock allocation
3527
3528kmp_lock_table_t __kmp_user_lock_table = { 1, 0, NULL };
3529kmp_user_lock_p __kmp_lock_pool = NULL;
3530
3531// Lock block-allocation support.
3532kmp_block_of_locks* __kmp_lock_blocks = NULL;
3533int __kmp_num_locks_in_block = 1; // FIXME - tune this value
3534
3535static kmp_lock_index_t
3536__kmp_lock_table_insert( kmp_user_lock_p lck )
3537{
3538 // Assume that kmp_global_lock is held upon entry/exit.
3539 kmp_lock_index_t index;
3540 if ( __kmp_user_lock_table.used >= __kmp_user_lock_table.allocated ) {
3541 kmp_lock_index_t size;
3542 kmp_user_lock_p *table;
3543 kmp_lock_index_t i;
3544 // Reallocate lock table.
3545 if ( __kmp_user_lock_table.allocated == 0 ) {
3546 size = 1024;
3547 }
3548 else {
3549 size = __kmp_user_lock_table.allocated * 2;
3550 }
3551 table = (kmp_user_lock_p *)__kmp_allocate( sizeof( kmp_user_lock_p ) * size );
3552 memcpy( table + 1, __kmp_user_lock_table.table + 1, sizeof( kmp_user_lock_p ) * ( __kmp_user_lock_table.used - 1 ) );
3553 table[ 0 ] = (kmp_user_lock_p)__kmp_user_lock_table.table;
3554 // We cannot free the previos table now, sinse it may be in use by other
3555 // threads. So save the pointer to the previous table in in the first element of the
3556 // new table. All the tables will be organized into a list, and could be freed when
3557 // library shutting down.
3558 __kmp_user_lock_table.table = table;
3559 __kmp_user_lock_table.allocated = size;
3560 }
3561 KMP_DEBUG_ASSERT( __kmp_user_lock_table.used < __kmp_user_lock_table.allocated );
3562 index = __kmp_user_lock_table.used;
3563 __kmp_user_lock_table.table[ index ] = lck;
3564 ++ __kmp_user_lock_table.used;
3565 return index;
3566}
3567
3568static kmp_user_lock_p
3569__kmp_lock_block_allocate()
3570{
3571 // Assume that kmp_global_lock is held upon entry/exit.
3572 static int last_index = 0;
3573 if ( ( last_index >= __kmp_num_locks_in_block )
3574 || ( __kmp_lock_blocks == NULL ) ) {
3575 // Restart the index.
3576 last_index = 0;
3577 // Need to allocate a new block.
3578 KMP_DEBUG_ASSERT( __kmp_user_lock_size > 0 );
3579 size_t space_for_locks = __kmp_user_lock_size * __kmp_num_locks_in_block;
3580 char* buffer = (char*)__kmp_allocate( space_for_locks + sizeof( kmp_block_of_locks ) );
3581 // Set up the new block.
3582 kmp_block_of_locks *new_block = (kmp_block_of_locks *)(& buffer[space_for_locks]);
3583 new_block->next_block = __kmp_lock_blocks;
3584 new_block->locks = (void *)buffer;
3585 // Publish the new block.
3586 KMP_MB();
3587 __kmp_lock_blocks = new_block;
3588 }
3589 kmp_user_lock_p ret = (kmp_user_lock_p)(& ( ( (char *)( __kmp_lock_blocks->locks ) )
3590 [ last_index * __kmp_user_lock_size ] ) );
3591 last_index++;
3592 return ret;
3593}
3594
3595//
3596// Get memory for a lock. It may be freshly allocated memory or reused memory
3597// from lock pool.
3598//
3599kmp_user_lock_p
3600__kmp_user_lock_allocate( void **user_lock, kmp_int32 gtid,
3601 kmp_lock_flags_t flags )
3602{
3603 kmp_user_lock_p lck;
3604 kmp_lock_index_t index;
3605 KMP_DEBUG_ASSERT( user_lock );
3606
3607 __kmp_acquire_lock( &__kmp_global_lock, gtid );
3608
3609 if ( __kmp_lock_pool == NULL ) {
3610 // Lock pool is empty. Allocate new memory.
3611 if ( __kmp_num_locks_in_block <= 1 ) { // Tune this cutoff point.
3612 lck = (kmp_user_lock_p) __kmp_allocate( __kmp_user_lock_size );
3613 }
3614 else {
3615 lck = __kmp_lock_block_allocate();
3616 }
3617
3618 // Insert lock in the table so that it can be freed in __kmp_cleanup,
3619 // and debugger has info on all allocated locks.
3620 index = __kmp_lock_table_insert( lck );
3621 }
3622 else {
3623 // Pick up lock from pool.
3624 lck = __kmp_lock_pool;
3625 index = __kmp_lock_pool->pool.index;
3626 __kmp_lock_pool = __kmp_lock_pool->pool.next;
3627 }
3628
3629 //
3630 // We could potentially differentiate between nested and regular locks
3631 // here, and do the lock table lookup for regular locks only.
3632 //
3633 if ( OMP_LOCK_T_SIZE < sizeof(void *) ) {
3634 * ( (kmp_lock_index_t *) user_lock ) = index;
3635 }
3636 else {
3637 * ( (kmp_user_lock_p *) user_lock ) = lck;
3638 }
3639
3640 // mark the lock if it is critical section lock.
3641 __kmp_set_user_lock_flags( lck, flags );
3642
3643 __kmp_release_lock( & __kmp_global_lock, gtid ); // AC: TODO: move this line upper
3644
3645 return lck;
3646}
3647
3648// Put lock's memory to pool for reusing.
3649void
3650__kmp_user_lock_free( void **user_lock, kmp_int32 gtid, kmp_user_lock_p lck )
3651{
3652 kmp_lock_pool_t * lock_pool;
3653
3654 KMP_DEBUG_ASSERT( user_lock != NULL );
3655 KMP_DEBUG_ASSERT( lck != NULL );
3656
3657 __kmp_acquire_lock( & __kmp_global_lock, gtid );
3658
3659 lck->pool.next = __kmp_lock_pool;
3660 __kmp_lock_pool = lck;
3661 if ( OMP_LOCK_T_SIZE < sizeof(void *) ) {
3662 kmp_lock_index_t index = * ( (kmp_lock_index_t *) user_lock );
3663 KMP_DEBUG_ASSERT( 0 < index && index <= __kmp_user_lock_table.used );
3664 lck->pool.index = index;
3665 }
3666
3667 __kmp_release_lock( & __kmp_global_lock, gtid );
3668}
3669
3670kmp_user_lock_p
3671__kmp_lookup_user_lock( void **user_lock, char const *func )
3672{
3673 kmp_user_lock_p lck = NULL;
3674
3675 if ( __kmp_env_consistency_check ) {
3676 if ( user_lock == NULL ) {
3677 KMP_FATAL( LockIsUninitialized, func );
3678 }
3679 }
3680
3681 if ( OMP_LOCK_T_SIZE < sizeof(void *) ) {
3682 kmp_lock_index_t index = *( (kmp_lock_index_t *)user_lock );
3683 if ( __kmp_env_consistency_check ) {
3684 if ( ! ( 0 < index && index < __kmp_user_lock_table.used ) ) {
3685 KMP_FATAL( LockIsUninitialized, func );
3686 }
3687 }
3688 KMP_DEBUG_ASSERT( 0 < index && index < __kmp_user_lock_table.used );
3689 KMP_DEBUG_ASSERT( __kmp_user_lock_size > 0 );
3690 lck = __kmp_user_lock_table.table[index];
3691 }
3692 else {
3693 lck = *( (kmp_user_lock_p *)user_lock );
3694 }
3695
3696 if ( __kmp_env_consistency_check ) {
3697 if ( lck == NULL ) {
3698 KMP_FATAL( LockIsUninitialized, func );
3699 }
3700 }
3701
3702 return lck;
3703}
3704
3705void
3706__kmp_cleanup_user_locks( void )
3707{
3708 //
3709 // Reset lock pool. Do not worry about lock in the pool -- we will free
3710 // them when iterating through lock table (it includes all the locks,
3711 // dead or alive).
3712 //
3713 __kmp_lock_pool = NULL;
3714
3715#define IS_CRITICAL(lck) \
3716 ( ( __kmp_get_user_lock_flags_ != NULL ) && \
3717 ( ( *__kmp_get_user_lock_flags_ )( lck ) & kmp_lf_critical_section ) )
3718
3719 //
3720 // Loop through lock table, free all locks.
3721 //
3722 // Do not free item [0], it is reserved for lock tables list.
3723 //
3724 // FIXME - we are iterating through a list of (pointers to) objects of
3725 // type union kmp_user_lock, but we have no way of knowing whether the
3726 // base type is currently "pool" or whatever the global user lock type
3727 // is.
3728 //
3729 // We are relying on the fact that for all of the user lock types
3730 // (except "tas"), the first field in the lock struct is the "initialized"
3731 // field, which is set to the address of the lock object itself when
3732 // the lock is initialized. When the union is of type "pool", the
3733 // first field is a pointer to the next object in the free list, which
3734 // will not be the same address as the object itself.
3735 //
3736 // This means that the check ( *__kmp_is_user_lock_initialized_ )( lck )
3737 // will fail for "pool" objects on the free list. This must happen as
3738 // the "location" field of real user locks overlaps the "index" field
3739 // of "pool" objects.
3740 //
3741 // It would be better to run through the free list, and remove all "pool"
3742 // objects from the lock table before executing this loop. However,
3743 // "pool" objects do not always have their index field set (only on
3744 // lin_32e), and I don't want to search the lock table for the address
3745 // of every "pool" object on the free list.
3746 //
3747 while ( __kmp_user_lock_table.used > 1 ) {
3748 const ident *loc;
3749
3750 //
3751 // reduce __kmp_user_lock_table.used before freeing the lock,
3752 // so that state of locks is consistent
3753 //
3754 kmp_user_lock_p lck = __kmp_user_lock_table.table[
3755 --__kmp_user_lock_table.used ];
3756
3757 if ( ( __kmp_is_user_lock_initialized_ != NULL ) &&
3758 ( *__kmp_is_user_lock_initialized_ )( lck ) ) {
3759 //
3760 // Issue a warning if: KMP_CONSISTENCY_CHECK AND lock is
3761 // initialized AND it is NOT a critical section (user is not
3762 // responsible for destroying criticals) AND we know source
3763 // location to report.
3764 //
3765 if ( __kmp_env_consistency_check && ( ! IS_CRITICAL( lck ) ) &&
3766 ( ( loc = __kmp_get_user_lock_location( lck ) ) != NULL ) &&
3767 ( loc->psource != NULL ) ) {
3768 kmp_str_loc_t str_loc = __kmp_str_loc_init( loc->psource, 0 );
3769 KMP_WARNING( CnsLockNotDestroyed, str_loc.file, str_loc.func,
3770 str_loc.line, str_loc.col );
3771 __kmp_str_loc_free( &str_loc);
3772 }
3773
3774#ifdef KMP_DEBUG
3775 if ( IS_CRITICAL( lck ) ) {
3776 KA_TRACE( 20, ("__kmp_cleanup_user_locks: free critical section lock %p (%p)\n", lck, *(void**)lck ) );
3777 }
3778 else {
3779 KA_TRACE( 20, ("__kmp_cleanup_user_locks: free lock %p (%p)\n", lck, *(void**)lck ) );
3780 }
3781#endif // KMP_DEBUG
3782
3783 //
3784 // Cleanup internal lock dynamic resources
3785 // (for drdpa locks particularly).
3786 //
3787 __kmp_destroy_user_lock( lck );
3788 }
3789
3790 //
3791 // Free the lock if block allocation of locks is not used.
3792 //
3793 if ( __kmp_lock_blocks == NULL ) {
3794 __kmp_free( lck );
3795 }
3796 }
3797
3798#undef IS_CRITICAL
3799
3800 //
3801 // delete lock table(s).
3802 //
3803 kmp_user_lock_p *table_ptr = __kmp_user_lock_table.table;
3804 __kmp_user_lock_table.table = NULL;
3805 __kmp_user_lock_table.allocated = 0;
3806
3807 while ( table_ptr != NULL ) {
3808 //
3809 // In the first element we saved the pointer to the previous
3810 // (smaller) lock table.
3811 //
3812 kmp_user_lock_p *next = (kmp_user_lock_p *)( table_ptr[ 0 ] );
3813 __kmp_free( table_ptr );
3814 table_ptr = next;
3815 }
3816
3817 //
3818 // Free buffers allocated for blocks of locks.
3819 //
3820 kmp_block_of_locks_t *block_ptr = __kmp_lock_blocks;
3821 __kmp_lock_blocks = NULL;
3822
3823 while ( block_ptr != NULL ) {
3824 kmp_block_of_locks_t *next = block_ptr->next_block;
3825 __kmp_free( block_ptr->locks );
3826 //
3827 // *block_ptr itself was allocated at the end of the locks vector.
3828 //
3829 block_ptr = next;
3830 }
3831
3832 TCW_4(__kmp_init_user_locks, FALSE);
3833}
3834