blob: 0a0e9d12e0ff31f3e0e6a57156e0b43021eeefec [file] [log] [blame]
Jim Cownie5e8470a2013-09-27 10:38:44 +00001/*
2 * kmp_lock.cpp -- lock-related functions
Jim Cownie5e8470a2013-09-27 10:38:44 +00003 */
4
5
6//===----------------------------------------------------------------------===//
7//
8// The LLVM Compiler Infrastructure
9//
10// This file is dual licensed under the MIT and the University of Illinois Open
11// Source Licenses. See LICENSE.txt for details.
12//
13//===----------------------------------------------------------------------===//
14
15
16#include <stddef.h>
17
18#include "kmp.h"
19#include "kmp_itt.h"
20#include "kmp_i18n.h"
21#include "kmp_lock.h"
22#include "kmp_io.h"
23
Andrey Churbanovcbda8682015-01-13 14:43:35 +000024#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +000025# include <unistd.h>
26# include <sys/syscall.h>
27// We should really include <futex.h>, but that causes compatibility problems on different
28// Linux* OS distributions that either require that you include (or break when you try to include)
29// <pci/types.h>.
30// Since all we need is the two macros below (which are part of the kernel ABI, so can't change)
31// we just define the constants here and don't include <futex.h>
32# ifndef FUTEX_WAIT
33# define FUTEX_WAIT 0
34# endif
35# ifndef FUTEX_WAKE
36# define FUTEX_WAKE 1
37# endif
38#endif
39
Jim Cownie5e8470a2013-09-27 10:38:44 +000040/* Implement spin locks for internal library use. */
41/* The algorithm implemented is Lamport's bakery lock [1974]. */
42
43void
44__kmp_validate_locks( void )
45{
46 int i;
47 kmp_uint32 x, y;
48
49 /* Check to make sure unsigned arithmetic does wraps properly */
50 x = ~((kmp_uint32) 0) - 2;
51 y = x - 2;
52
53 for (i = 0; i < 8; ++i, ++x, ++y) {
54 kmp_uint32 z = (x - y);
55 KMP_ASSERT( z == 2 );
56 }
57
58 KMP_ASSERT( offsetof( kmp_base_queuing_lock, tail_id ) % 8 == 0 );
59}
60
61
62/* ------------------------------------------------------------------------ */
63/* test and set locks */
64
65//
66// For the non-nested locks, we can only assume that the first 4 bytes were
67// allocated, since gcc only allocates 4 bytes for omp_lock_t, and the Intel
68// compiler only allocates a 4 byte pointer on IA-32 architecture. On
69// Windows* OS on Intel(R) 64, we can assume that all 8 bytes were allocated.
70//
71// gcc reserves >= 8 bytes for nested locks, so we can assume that the
72// entire 8 bytes were allocated for nested locks on all 64-bit platforms.
73//
74
75static kmp_int32
76__kmp_get_tas_lock_owner( kmp_tas_lock_t *lck )
77{
Andrey Churbanov5c56fb52015-02-20 18:05:17 +000078 return DYNA_LOCK_STRIP(TCR_4( lck->lk.poll )) - 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +000079}
80
81static inline bool
82__kmp_is_tas_lock_nestable( kmp_tas_lock_t *lck )
83{
84 return lck->lk.depth_locked != -1;
85}
86
87__forceinline static void
88__kmp_acquire_tas_lock_timed_template( kmp_tas_lock_t *lck, kmp_int32 gtid )
89{
90 KMP_MB();
91
92#ifdef USE_LOCK_PROFILE
93 kmp_uint32 curr = TCR_4( lck->lk.poll );
94 if ( ( curr != 0 ) && ( curr != gtid + 1 ) )
95 __kmp_printf( "LOCK CONTENTION: %p\n", lck );
96 /* else __kmp_printf( "." );*/
97#endif /* USE_LOCK_PROFILE */
98
Andrey Churbanov5c56fb52015-02-20 18:05:17 +000099 if ( ( lck->lk.poll == DYNA_LOCK_FREE(tas) )
100 && KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), DYNA_LOCK_FREE(tas), DYNA_LOCK_BUSY(gtid+1, tas) ) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000101 KMP_FSYNC_ACQUIRED(lck);
102 return;
103 }
104
105 kmp_uint32 spins;
106 KMP_FSYNC_PREPARE( lck );
107 KMP_INIT_YIELD( spins );
108 if ( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc :
109 __kmp_xproc ) ) {
110 KMP_YIELD( TRUE );
111 }
112 else {
113 KMP_YIELD_SPIN( spins );
114 }
115
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000116 while ( ( lck->lk.poll != DYNA_LOCK_FREE(tas) ) ||
117 ( ! KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), DYNA_LOCK_FREE(tas), DYNA_LOCK_BUSY(gtid+1, tas) ) ) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000118 //
119 // FIXME - use exponential backoff here
120 //
121 if ( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc :
122 __kmp_xproc ) ) {
123 KMP_YIELD( TRUE );
124 }
125 else {
126 KMP_YIELD_SPIN( spins );
127 }
128 }
129 KMP_FSYNC_ACQUIRED( lck );
130}
131
132void
133__kmp_acquire_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
134{
135 __kmp_acquire_tas_lock_timed_template( lck, gtid );
136}
137
138static void
139__kmp_acquire_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
140{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000141 char const * const func = "omp_set_lock";
142 if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE )
143 && __kmp_is_tas_lock_nestable( lck ) ) {
144 KMP_FATAL( LockNestableUsedAsSimple, func );
145 }
146 if ( ( gtid >= 0 ) && ( __kmp_get_tas_lock_owner( lck ) == gtid ) ) {
147 KMP_FATAL( LockIsAlreadyOwned, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000148 }
149 __kmp_acquire_tas_lock( lck, gtid );
150}
151
152int
153__kmp_test_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
154{
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000155 if ( ( lck->lk.poll == DYNA_LOCK_FREE(tas) )
156 && KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), DYNA_LOCK_FREE(tas), DYNA_LOCK_BUSY(gtid+1, tas) ) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000157 KMP_FSYNC_ACQUIRED( lck );
158 return TRUE;
159 }
160 return FALSE;
161}
162
163static int
164__kmp_test_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
165{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000166 char const * const func = "omp_test_lock";
167 if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE )
168 && __kmp_is_tas_lock_nestable( lck ) ) {
169 KMP_FATAL( LockNestableUsedAsSimple, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000170 }
171 return __kmp_test_tas_lock( lck, gtid );
172}
173
174void
175__kmp_release_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
176{
177 KMP_MB(); /* Flush all pending memory write invalidates. */
178
179 KMP_FSYNC_RELEASING(lck);
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000180 KMP_ST_REL32( &(lck->lk.poll), DYNA_LOCK_FREE(tas) );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000181 KMP_MB(); /* Flush all pending memory write invalidates. */
182
183 KMP_YIELD( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc :
184 __kmp_xproc ) );
185}
186
187static void
188__kmp_release_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
189{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000190 char const * const func = "omp_unset_lock";
191 KMP_MB(); /* in case another processor initialized lock */
192 if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE )
193 && __kmp_is_tas_lock_nestable( lck ) ) {
194 KMP_FATAL( LockNestableUsedAsSimple, func );
195 }
196 if ( __kmp_get_tas_lock_owner( lck ) == -1 ) {
197 KMP_FATAL( LockUnsettingFree, func );
198 }
199 if ( ( gtid >= 0 ) && ( __kmp_get_tas_lock_owner( lck ) >= 0 )
200 && ( __kmp_get_tas_lock_owner( lck ) != gtid ) ) {
201 KMP_FATAL( LockUnsettingSetByAnother, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000202 }
203 __kmp_release_tas_lock( lck, gtid );
204}
205
206void
207__kmp_init_tas_lock( kmp_tas_lock_t * lck )
208{
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000209 TCW_4( lck->lk.poll, DYNA_LOCK_FREE(tas) );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000210}
211
212static void
213__kmp_init_tas_lock_with_checks( kmp_tas_lock_t * lck )
214{
215 __kmp_init_tas_lock( lck );
216}
217
218void
219__kmp_destroy_tas_lock( kmp_tas_lock_t *lck )
220{
221 lck->lk.poll = 0;
222}
223
224static void
225__kmp_destroy_tas_lock_with_checks( kmp_tas_lock_t *lck )
226{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000227 char const * const func = "omp_destroy_lock";
228 if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE )
229 && __kmp_is_tas_lock_nestable( lck ) ) {
230 KMP_FATAL( LockNestableUsedAsSimple, func );
231 }
232 if ( __kmp_get_tas_lock_owner( lck ) != -1 ) {
233 KMP_FATAL( LockStillOwned, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000234 }
235 __kmp_destroy_tas_lock( lck );
236}
237
238
239//
240// nested test and set locks
241//
242
243void
244__kmp_acquire_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
245{
246 KMP_DEBUG_ASSERT( gtid >= 0 );
247
248 if ( __kmp_get_tas_lock_owner( lck ) == gtid ) {
249 lck->lk.depth_locked += 1;
250 }
251 else {
252 __kmp_acquire_tas_lock_timed_template( lck, gtid );
253 lck->lk.depth_locked = 1;
254 }
255}
256
257static void
258__kmp_acquire_nested_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
259{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000260 char const * const func = "omp_set_nest_lock";
261 if ( ! __kmp_is_tas_lock_nestable( lck ) ) {
262 KMP_FATAL( LockSimpleUsedAsNestable, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000263 }
264 __kmp_acquire_nested_tas_lock( lck, gtid );
265}
266
267int
268__kmp_test_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
269{
270 int retval;
271
272 KMP_DEBUG_ASSERT( gtid >= 0 );
273
274 if ( __kmp_get_tas_lock_owner( lck ) == gtid ) {
275 retval = ++lck->lk.depth_locked;
276 }
277 else if ( !__kmp_test_tas_lock( lck, gtid ) ) {
278 retval = 0;
279 }
280 else {
281 KMP_MB();
282 retval = lck->lk.depth_locked = 1;
283 }
284 return retval;
285}
286
287static int
288__kmp_test_nested_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
289{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000290 char const * const func = "omp_test_nest_lock";
291 if ( ! __kmp_is_tas_lock_nestable( lck ) ) {
292 KMP_FATAL( LockSimpleUsedAsNestable, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000293 }
294 return __kmp_test_nested_tas_lock( lck, gtid );
295}
296
297void
298__kmp_release_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
299{
300 KMP_DEBUG_ASSERT( gtid >= 0 );
301
302 KMP_MB();
303 if ( --(lck->lk.depth_locked) == 0 ) {
304 __kmp_release_tas_lock( lck, gtid );
305 }
306}
307
308static void
309__kmp_release_nested_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
310{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000311 char const * const func = "omp_unset_nest_lock";
312 KMP_MB(); /* in case another processor initialized lock */
313 if ( ! __kmp_is_tas_lock_nestable( lck ) ) {
314 KMP_FATAL( LockSimpleUsedAsNestable, func );
315 }
316 if ( __kmp_get_tas_lock_owner( lck ) == -1 ) {
317 KMP_FATAL( LockUnsettingFree, func );
318 }
319 if ( __kmp_get_tas_lock_owner( lck ) != gtid ) {
320 KMP_FATAL( LockUnsettingSetByAnother, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000321 }
322 __kmp_release_nested_tas_lock( lck, gtid );
323}
324
325void
326__kmp_init_nested_tas_lock( kmp_tas_lock_t * lck )
327{
328 __kmp_init_tas_lock( lck );
329 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
330}
331
332static void
333__kmp_init_nested_tas_lock_with_checks( kmp_tas_lock_t * lck )
334{
335 __kmp_init_nested_tas_lock( lck );
336}
337
338void
339__kmp_destroy_nested_tas_lock( kmp_tas_lock_t *lck )
340{
341 __kmp_destroy_tas_lock( lck );
342 lck->lk.depth_locked = 0;
343}
344
345static void
346__kmp_destroy_nested_tas_lock_with_checks( kmp_tas_lock_t *lck )
347{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000348 char const * const func = "omp_destroy_nest_lock";
349 if ( ! __kmp_is_tas_lock_nestable( lck ) ) {
350 KMP_FATAL( LockSimpleUsedAsNestable, func );
351 }
352 if ( __kmp_get_tas_lock_owner( lck ) != -1 ) {
353 KMP_FATAL( LockStillOwned, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000354 }
355 __kmp_destroy_nested_tas_lock( lck );
356}
357
358
Andrey Churbanovcbda8682015-01-13 14:43:35 +0000359#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Jim Cownie5e8470a2013-09-27 10:38:44 +0000360
361/* ------------------------------------------------------------------------ */
362/* futex locks */
363
364// futex locks are really just test and set locks, with a different method
365// of handling contention. They take the same amount of space as test and
366// set locks, and are allocated the same way (i.e. use the area allocated by
367// the compiler for non-nested locks / allocate nested locks on the heap).
368
369static kmp_int32
370__kmp_get_futex_lock_owner( kmp_futex_lock_t *lck )
371{
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000372 return DYNA_LOCK_STRIP(( TCR_4( lck->lk.poll ) >> 1 )) - 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000373}
374
375static inline bool
376__kmp_is_futex_lock_nestable( kmp_futex_lock_t *lck )
377{
378 return lck->lk.depth_locked != -1;
379}
380
381__forceinline static void
382__kmp_acquire_futex_lock_timed_template( kmp_futex_lock_t *lck, kmp_int32 gtid )
383{
384 kmp_int32 gtid_code = ( gtid + 1 ) << 1;
385
386 KMP_MB();
387
388#ifdef USE_LOCK_PROFILE
389 kmp_uint32 curr = TCR_4( lck->lk.poll );
390 if ( ( curr != 0 ) && ( curr != gtid_code ) )
391 __kmp_printf( "LOCK CONTENTION: %p\n", lck );
392 /* else __kmp_printf( "." );*/
393#endif /* USE_LOCK_PROFILE */
394
395 KMP_FSYNC_PREPARE( lck );
396 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d entering\n",
397 lck, lck->lk.poll, gtid ) );
398
399 kmp_int32 poll_val;
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000400
401 while ( ( poll_val = KMP_COMPARE_AND_STORE_RET32( & ( lck->lk.poll ), DYNA_LOCK_FREE(futex),
402 DYNA_LOCK_BUSY(gtid_code, futex) ) ) != DYNA_LOCK_FREE(futex) ) {
403
404 kmp_int32 cond = DYNA_LOCK_STRIP(poll_val) & 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000405 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d poll_val = 0x%x cond = 0x%x\n",
406 lck, gtid, poll_val, cond ) );
407
408 //
409 // NOTE: if you try to use the following condition for this branch
410 //
411 // if ( poll_val & 1 == 0 )
412 //
413 // Then the 12.0 compiler has a bug where the following block will
414 // always be skipped, regardless of the value of the LSB of poll_val.
415 //
416 if ( ! cond ) {
417 //
418 // Try to set the lsb in the poll to indicate to the owner
419 // thread that they need to wake this thread up.
420 //
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000421 if ( ! KMP_COMPARE_AND_STORE_REL32( & ( lck->lk.poll ), poll_val, poll_val | DYNA_LOCK_BUSY(1, futex) ) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000422 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d can't set bit 0\n",
423 lck, lck->lk.poll, gtid ) );
424 continue;
425 }
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000426 poll_val |= DYNA_LOCK_BUSY(1, futex);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000427
428 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d bit 0 set\n",
429 lck, lck->lk.poll, gtid ) );
430 }
431
432 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d before futex_wait(0x%x)\n",
433 lck, gtid, poll_val ) );
434
435 kmp_int32 rc;
436 if ( ( rc = syscall( __NR_futex, & ( lck->lk.poll ), FUTEX_WAIT,
437 poll_val, NULL, NULL, 0 ) ) != 0 ) {
438 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d futex_wait(0x%x) failed (rc=%d errno=%d)\n",
439 lck, gtid, poll_val, rc, errno ) );
440 continue;
441 }
442
443 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d after futex_wait(0x%x)\n",
444 lck, gtid, poll_val ) );
445 //
Alp Toker8f2d3f02014-02-24 10:40:15 +0000446 // This thread has now done a successful futex wait call and was
Jim Cownie5e8470a2013-09-27 10:38:44 +0000447 // entered on the OS futex queue. We must now perform a futex
448 // wake call when releasing the lock, as we have no idea how many
449 // other threads are in the queue.
450 //
451 gtid_code |= 1;
452 }
453
454 KMP_FSYNC_ACQUIRED( lck );
455 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d exiting\n",
456 lck, lck->lk.poll, gtid ) );
457}
458
459void
460__kmp_acquire_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
461{
462 __kmp_acquire_futex_lock_timed_template( lck, gtid );
463}
464
465static void
466__kmp_acquire_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
467{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000468 char const * const func = "omp_set_lock";
469 if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE )
470 && __kmp_is_futex_lock_nestable( lck ) ) {
471 KMP_FATAL( LockNestableUsedAsSimple, func );
472 }
473 if ( ( gtid >= 0 ) && ( __kmp_get_futex_lock_owner( lck ) == gtid ) ) {
474 KMP_FATAL( LockIsAlreadyOwned, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000475 }
476 __kmp_acquire_futex_lock( lck, gtid );
477}
478
479int
480__kmp_test_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
481{
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000482 if ( KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), DYNA_LOCK_FREE(futex), DYNA_LOCK_BUSY(gtid+1, futex) << 1 ) ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000483 KMP_FSYNC_ACQUIRED( lck );
484 return TRUE;
485 }
486 return FALSE;
487}
488
489static int
490__kmp_test_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
491{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000492 char const * const func = "omp_test_lock";
493 if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE )
494 && __kmp_is_futex_lock_nestable( lck ) ) {
495 KMP_FATAL( LockNestableUsedAsSimple, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000496 }
497 return __kmp_test_futex_lock( lck, gtid );
498}
499
500void
501__kmp_release_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
502{
503 KMP_MB(); /* Flush all pending memory write invalidates. */
504
505 KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d entering\n",
506 lck, lck->lk.poll, gtid ) );
507
508 KMP_FSYNC_RELEASING(lck);
509
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000510 kmp_int32 poll_val = KMP_XCHG_FIXED32( & ( lck->lk.poll ), DYNA_LOCK_FREE(futex) );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000511
512 KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p, T#%d released poll_val = 0x%x\n",
513 lck, gtid, poll_val ) );
514
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000515 if ( DYNA_LOCK_STRIP(poll_val) & 1 ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000516 KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p, T#%d futex_wake 1 thread\n",
517 lck, gtid ) );
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000518 syscall( __NR_futex, & ( lck->lk.poll ), FUTEX_WAKE, DYNA_LOCK_BUSY(1, futex), NULL, NULL, 0 );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000519 }
520
521 KMP_MB(); /* Flush all pending memory write invalidates. */
522
523 KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d exiting\n",
524 lck, lck->lk.poll, gtid ) );
525
526 KMP_YIELD( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc :
527 __kmp_xproc ) );
528}
529
530static void
531__kmp_release_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
532{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000533 char const * const func = "omp_unset_lock";
534 KMP_MB(); /* in case another processor initialized lock */
535 if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE )
536 && __kmp_is_futex_lock_nestable( lck ) ) {
537 KMP_FATAL( LockNestableUsedAsSimple, func );
538 }
539 if ( __kmp_get_futex_lock_owner( lck ) == -1 ) {
540 KMP_FATAL( LockUnsettingFree, func );
541 }
542 if ( ( gtid >= 0 ) && ( __kmp_get_futex_lock_owner( lck ) >= 0 )
543 && ( __kmp_get_futex_lock_owner( lck ) != gtid ) ) {
544 KMP_FATAL( LockUnsettingSetByAnother, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000545 }
546 __kmp_release_futex_lock( lck, gtid );
547}
548
549void
550__kmp_init_futex_lock( kmp_futex_lock_t * lck )
551{
Andrey Churbanov5c56fb52015-02-20 18:05:17 +0000552 TCW_4( lck->lk.poll, DYNA_LOCK_FREE(futex) );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000553}
554
555static void
556__kmp_init_futex_lock_with_checks( kmp_futex_lock_t * lck )
557{
558 __kmp_init_futex_lock( lck );
559}
560
561void
562__kmp_destroy_futex_lock( kmp_futex_lock_t *lck )
563{
564 lck->lk.poll = 0;
565}
566
567static void
568__kmp_destroy_futex_lock_with_checks( kmp_futex_lock_t *lck )
569{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000570 char const * const func = "omp_destroy_lock";
571 if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE )
572 && __kmp_is_futex_lock_nestable( lck ) ) {
573 KMP_FATAL( LockNestableUsedAsSimple, func );
574 }
575 if ( __kmp_get_futex_lock_owner( lck ) != -1 ) {
576 KMP_FATAL( LockStillOwned, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000577 }
578 __kmp_destroy_futex_lock( lck );
579}
580
581
582//
583// nested futex locks
584//
585
586void
587__kmp_acquire_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
588{
589 KMP_DEBUG_ASSERT( gtid >= 0 );
590
591 if ( __kmp_get_futex_lock_owner( lck ) == gtid ) {
592 lck->lk.depth_locked += 1;
593 }
594 else {
595 __kmp_acquire_futex_lock_timed_template( lck, gtid );
596 lck->lk.depth_locked = 1;
597 }
598}
599
600static void
601__kmp_acquire_nested_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
602{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000603 char const * const func = "omp_set_nest_lock";
604 if ( ! __kmp_is_futex_lock_nestable( lck ) ) {
605 KMP_FATAL( LockSimpleUsedAsNestable, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000606 }
607 __kmp_acquire_nested_futex_lock( lck, gtid );
608}
609
610int
611__kmp_test_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
612{
613 int retval;
614
615 KMP_DEBUG_ASSERT( gtid >= 0 );
616
617 if ( __kmp_get_futex_lock_owner( lck ) == gtid ) {
618 retval = ++lck->lk.depth_locked;
619 }
620 else if ( !__kmp_test_futex_lock( lck, gtid ) ) {
621 retval = 0;
622 }
623 else {
624 KMP_MB();
625 retval = lck->lk.depth_locked = 1;
626 }
627 return retval;
628}
629
630static int
631__kmp_test_nested_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
632{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000633 char const * const func = "omp_test_nest_lock";
634 if ( ! __kmp_is_futex_lock_nestable( lck ) ) {
635 KMP_FATAL( LockSimpleUsedAsNestable, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000636 }
637 return __kmp_test_nested_futex_lock( lck, gtid );
638}
639
640void
641__kmp_release_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
642{
643 KMP_DEBUG_ASSERT( gtid >= 0 );
644
645 KMP_MB();
646 if ( --(lck->lk.depth_locked) == 0 ) {
647 __kmp_release_futex_lock( lck, gtid );
648 }
649}
650
651static void
652__kmp_release_nested_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
653{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000654 char const * const func = "omp_unset_nest_lock";
655 KMP_MB(); /* in case another processor initialized lock */
656 if ( ! __kmp_is_futex_lock_nestable( lck ) ) {
657 KMP_FATAL( LockSimpleUsedAsNestable, func );
658 }
659 if ( __kmp_get_futex_lock_owner( lck ) == -1 ) {
660 KMP_FATAL( LockUnsettingFree, func );
661 }
662 if ( __kmp_get_futex_lock_owner( lck ) != gtid ) {
663 KMP_FATAL( LockUnsettingSetByAnother, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000664 }
665 __kmp_release_nested_futex_lock( lck, gtid );
666}
667
668void
669__kmp_init_nested_futex_lock( kmp_futex_lock_t * lck )
670{
671 __kmp_init_futex_lock( lck );
672 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
673}
674
675static void
676__kmp_init_nested_futex_lock_with_checks( kmp_futex_lock_t * lck )
677{
678 __kmp_init_nested_futex_lock( lck );
679}
680
681void
682__kmp_destroy_nested_futex_lock( kmp_futex_lock_t *lck )
683{
684 __kmp_destroy_futex_lock( lck );
685 lck->lk.depth_locked = 0;
686}
687
688static void
689__kmp_destroy_nested_futex_lock_with_checks( kmp_futex_lock_t *lck )
690{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000691 char const * const func = "omp_destroy_nest_lock";
692 if ( ! __kmp_is_futex_lock_nestable( lck ) ) {
693 KMP_FATAL( LockSimpleUsedAsNestable, func );
694 }
695 if ( __kmp_get_futex_lock_owner( lck ) != -1 ) {
696 KMP_FATAL( LockStillOwned, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000697 }
698 __kmp_destroy_nested_futex_lock( lck );
699}
700
Jim Cownie181b4bb2013-12-23 17:28:57 +0000701#endif // KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
Jim Cownie5e8470a2013-09-27 10:38:44 +0000702
703
704/* ------------------------------------------------------------------------ */
705/* ticket (bakery) locks */
706
707static kmp_int32
708__kmp_get_ticket_lock_owner( kmp_ticket_lock_t *lck )
709{
710 return TCR_4( lck->lk.owner_id ) - 1;
711}
712
713static inline bool
714__kmp_is_ticket_lock_nestable( kmp_ticket_lock_t *lck )
715{
716 return lck->lk.depth_locked != -1;
717}
718
719static kmp_uint32
720__kmp_bakery_check(kmp_uint value, kmp_uint checker)
721{
722 register kmp_uint32 pause;
723
724 if (value == checker) {
725 return TRUE;
726 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000727 for (pause = checker - value; pause != 0; --pause);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000728 return FALSE;
729}
730
731__forceinline static void
732__kmp_acquire_ticket_lock_timed_template( kmp_ticket_lock_t *lck, kmp_int32 gtid )
733{
734 kmp_uint32 my_ticket;
735 KMP_MB();
736
737 my_ticket = KMP_TEST_THEN_INC32( (kmp_int32 *) &lck->lk.next_ticket );
738
739#ifdef USE_LOCK_PROFILE
740 if ( TCR_4( lck->lk.now_serving ) != my_ticket )
741 __kmp_printf( "LOCK CONTENTION: %p\n", lck );
742 /* else __kmp_printf( "." );*/
743#endif /* USE_LOCK_PROFILE */
744
745 if ( TCR_4( lck->lk.now_serving ) == my_ticket ) {
746 KMP_FSYNC_ACQUIRED(lck);
747 return;
748 }
749 KMP_WAIT_YIELD( &lck->lk.now_serving, my_ticket, __kmp_bakery_check, lck );
750 KMP_FSYNC_ACQUIRED(lck);
751}
752
753void
754__kmp_acquire_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
755{
756 __kmp_acquire_ticket_lock_timed_template( lck, gtid );
757}
758
759static void
760__kmp_acquire_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
761{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000762 char const * const func = "omp_set_lock";
763 if ( lck->lk.initialized != lck ) {
764 KMP_FATAL( LockIsUninitialized, func );
765 }
766 if ( __kmp_is_ticket_lock_nestable( lck ) ) {
767 KMP_FATAL( LockNestableUsedAsSimple, func );
768 }
769 if ( ( gtid >= 0 ) && ( __kmp_get_ticket_lock_owner( lck ) == gtid ) ) {
770 KMP_FATAL( LockIsAlreadyOwned, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000771 }
772
773 __kmp_acquire_ticket_lock( lck, gtid );
774
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000775 lck->lk.owner_id = gtid + 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000776}
777
778int
779__kmp_test_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
780{
781 kmp_uint32 my_ticket = TCR_4( lck->lk.next_ticket );
782 if ( TCR_4( lck->lk.now_serving ) == my_ticket ) {
783 kmp_uint32 next_ticket = my_ticket + 1;
784 if ( KMP_COMPARE_AND_STORE_ACQ32( (kmp_int32 *) &lck->lk.next_ticket,
785 my_ticket, next_ticket ) ) {
786 KMP_FSYNC_ACQUIRED( lck );
787 return TRUE;
788 }
789 }
790 return FALSE;
791}
792
793static int
794__kmp_test_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
795{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000796 char const * const func = "omp_test_lock";
797 if ( lck->lk.initialized != lck ) {
798 KMP_FATAL( LockIsUninitialized, func );
799 }
800 if ( __kmp_is_ticket_lock_nestable( lck ) ) {
801 KMP_FATAL( LockNestableUsedAsSimple, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000802 }
803
804 int retval = __kmp_test_ticket_lock( lck, gtid );
805
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000806 if ( retval ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000807 lck->lk.owner_id = gtid + 1;
808 }
809 return retval;
810}
811
812void
813__kmp_release_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
814{
815 kmp_uint32 distance;
816
817 KMP_MB(); /* Flush all pending memory write invalidates. */
818
819 KMP_FSYNC_RELEASING(lck);
820 distance = ( TCR_4( lck->lk.next_ticket ) - TCR_4( lck->lk.now_serving ) );
821
822 KMP_ST_REL32( &(lck->lk.now_serving), lck->lk.now_serving + 1 );
823
824 KMP_MB(); /* Flush all pending memory write invalidates. */
825
826 KMP_YIELD( distance
827 > (kmp_uint32) (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc) );
828}
829
830static void
831__kmp_release_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
832{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000833 char const * const func = "omp_unset_lock";
834 KMP_MB(); /* in case another processor initialized lock */
835 if ( lck->lk.initialized != lck ) {
836 KMP_FATAL( LockIsUninitialized, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000837 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000838 if ( __kmp_is_ticket_lock_nestable( lck ) ) {
839 KMP_FATAL( LockNestableUsedAsSimple, func );
840 }
841 if ( __kmp_get_ticket_lock_owner( lck ) == -1 ) {
842 KMP_FATAL( LockUnsettingFree, func );
843 }
844 if ( ( gtid >= 0 ) && ( __kmp_get_ticket_lock_owner( lck ) >= 0 )
845 && ( __kmp_get_ticket_lock_owner( lck ) != gtid ) ) {
846 KMP_FATAL( LockUnsettingSetByAnother, func );
847 }
848 lck->lk.owner_id = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000849 __kmp_release_ticket_lock( lck, gtid );
850}
851
852void
853__kmp_init_ticket_lock( kmp_ticket_lock_t * lck )
854{
855 lck->lk.location = NULL;
856 TCW_4( lck->lk.next_ticket, 0 );
857 TCW_4( lck->lk.now_serving, 0 );
858 lck->lk.owner_id = 0; // no thread owns the lock.
859 lck->lk.depth_locked = -1; // -1 => not a nested lock.
860 lck->lk.initialized = (kmp_ticket_lock *)lck;
861}
862
863static void
864__kmp_init_ticket_lock_with_checks( kmp_ticket_lock_t * lck )
865{
866 __kmp_init_ticket_lock( lck );
867}
868
869void
870__kmp_destroy_ticket_lock( kmp_ticket_lock_t *lck )
871{
872 lck->lk.initialized = NULL;
873 lck->lk.location = NULL;
874 lck->lk.next_ticket = 0;
875 lck->lk.now_serving = 0;
876 lck->lk.owner_id = 0;
877 lck->lk.depth_locked = -1;
878}
879
880static void
881__kmp_destroy_ticket_lock_with_checks( kmp_ticket_lock_t *lck )
882{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000883 char const * const func = "omp_destroy_lock";
884 if ( lck->lk.initialized != lck ) {
885 KMP_FATAL( LockIsUninitialized, func );
886 }
887 if ( __kmp_is_ticket_lock_nestable( lck ) ) {
888 KMP_FATAL( LockNestableUsedAsSimple, func );
889 }
890 if ( __kmp_get_ticket_lock_owner( lck ) != -1 ) {
891 KMP_FATAL( LockStillOwned, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000892 }
893 __kmp_destroy_ticket_lock( lck );
894}
895
896
897//
898// nested ticket locks
899//
900
901void
902__kmp_acquire_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
903{
904 KMP_DEBUG_ASSERT( gtid >= 0 );
905
906 if ( __kmp_get_ticket_lock_owner( lck ) == gtid ) {
907 lck->lk.depth_locked += 1;
908 }
909 else {
910 __kmp_acquire_ticket_lock_timed_template( lck, gtid );
911 KMP_MB();
912 lck->lk.depth_locked = 1;
913 KMP_MB();
914 lck->lk.owner_id = gtid + 1;
915 }
916}
917
918static void
919__kmp_acquire_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
920{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000921 char const * const func = "omp_set_nest_lock";
922 if ( lck->lk.initialized != lck ) {
923 KMP_FATAL( LockIsUninitialized, func );
924 }
925 if ( ! __kmp_is_ticket_lock_nestable( lck ) ) {
926 KMP_FATAL( LockSimpleUsedAsNestable, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000927 }
928 __kmp_acquire_nested_ticket_lock( lck, gtid );
929}
930
931int
932__kmp_test_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
933{
934 int retval;
935
936 KMP_DEBUG_ASSERT( gtid >= 0 );
937
938 if ( __kmp_get_ticket_lock_owner( lck ) == gtid ) {
939 retval = ++lck->lk.depth_locked;
940 }
941 else if ( !__kmp_test_ticket_lock( lck, gtid ) ) {
942 retval = 0;
943 }
944 else {
945 KMP_MB();
946 retval = lck->lk.depth_locked = 1;
947 KMP_MB();
948 lck->lk.owner_id = gtid + 1;
949 }
950 return retval;
951}
952
953static int
954__kmp_test_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck,
955 kmp_int32 gtid )
956{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000957 char const * const func = "omp_test_nest_lock";
958 if ( lck->lk.initialized != lck ) {
959 KMP_FATAL( LockIsUninitialized, func );
960 }
961 if ( ! __kmp_is_ticket_lock_nestable( lck ) ) {
962 KMP_FATAL( LockSimpleUsedAsNestable, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000963 }
964 return __kmp_test_nested_ticket_lock( lck, gtid );
965}
966
967void
968__kmp_release_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
969{
970 KMP_DEBUG_ASSERT( gtid >= 0 );
971
972 KMP_MB();
973 if ( --(lck->lk.depth_locked) == 0 ) {
974 KMP_MB();
975 lck->lk.owner_id = 0;
976 __kmp_release_ticket_lock( lck, gtid );
977 }
978}
979
980static void
981__kmp_release_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
982{
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000983 char const * const func = "omp_unset_nest_lock";
984 KMP_MB(); /* in case another processor initialized lock */
985 if ( lck->lk.initialized != lck ) {
986 KMP_FATAL( LockIsUninitialized, func );
987 }
988 if ( ! __kmp_is_ticket_lock_nestable( lck ) ) {
989 KMP_FATAL( LockSimpleUsedAsNestable, func );
990 }
991 if ( __kmp_get_ticket_lock_owner( lck ) == -1 ) {
992 KMP_FATAL( LockUnsettingFree, func );
993 }
994 if ( __kmp_get_ticket_lock_owner( lck ) != gtid ) {
995 KMP_FATAL( LockUnsettingSetByAnother, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +0000996 }
997 __kmp_release_nested_ticket_lock( lck, gtid );
998}
999
1000void
1001__kmp_init_nested_ticket_lock( kmp_ticket_lock_t * lck )
1002{
1003 __kmp_init_ticket_lock( lck );
1004 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
1005}
1006
1007static void
1008__kmp_init_nested_ticket_lock_with_checks( kmp_ticket_lock_t * lck )
1009{
1010 __kmp_init_nested_ticket_lock( lck );
1011}
1012
1013void
1014__kmp_destroy_nested_ticket_lock( kmp_ticket_lock_t *lck )
1015{
1016 __kmp_destroy_ticket_lock( lck );
1017 lck->lk.depth_locked = 0;
1018}
1019
1020static void
1021__kmp_destroy_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck )
1022{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001023 char const * const func = "omp_destroy_nest_lock";
1024 if ( lck->lk.initialized != lck ) {
1025 KMP_FATAL( LockIsUninitialized, func );
1026 }
1027 if ( ! __kmp_is_ticket_lock_nestable( lck ) ) {
1028 KMP_FATAL( LockSimpleUsedAsNestable, func );
1029 }
1030 if ( __kmp_get_ticket_lock_owner( lck ) != -1 ) {
1031 KMP_FATAL( LockStillOwned, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001032 }
1033 __kmp_destroy_nested_ticket_lock( lck );
1034}
1035
1036
1037//
1038// access functions to fields which don't exist for all lock kinds.
1039//
1040
1041static int
1042__kmp_is_ticket_lock_initialized( kmp_ticket_lock_t *lck )
1043{
1044 return lck == lck->lk.initialized;
1045}
1046
1047static const ident_t *
1048__kmp_get_ticket_lock_location( kmp_ticket_lock_t *lck )
1049{
1050 return lck->lk.location;
1051}
1052
1053static void
1054__kmp_set_ticket_lock_location( kmp_ticket_lock_t *lck, const ident_t *loc )
1055{
1056 lck->lk.location = loc;
1057}
1058
1059static kmp_lock_flags_t
1060__kmp_get_ticket_lock_flags( kmp_ticket_lock_t *lck )
1061{
1062 return lck->lk.flags;
1063}
1064
1065static void
1066__kmp_set_ticket_lock_flags( kmp_ticket_lock_t *lck, kmp_lock_flags_t flags )
1067{
1068 lck->lk.flags = flags;
1069}
1070
1071/* ------------------------------------------------------------------------ */
1072/* queuing locks */
1073
1074/*
1075 * First the states
1076 * (head,tail) = 0, 0 means lock is unheld, nobody on queue
1077 * UINT_MAX or -1, 0 means lock is held, nobody on queue
1078 * h, h means lock is held or about to transition, 1 element on queue
1079 * h, t h <> t, means lock is held or about to transition, >1 elements on queue
1080 *
1081 * Now the transitions
1082 * Acquire(0,0) = -1 ,0
1083 * Release(0,0) = Error
1084 * Acquire(-1,0) = h ,h h > 0
1085 * Release(-1,0) = 0 ,0
1086 * Acquire(h,h) = h ,t h > 0, t > 0, h <> t
1087 * Release(h,h) = -1 ,0 h > 0
1088 * Acquire(h,t) = h ,t' h > 0, t > 0, t' > 0, h <> t, h <> t', t <> t'
1089 * Release(h,t) = h',t h > 0, t > 0, h <> t, h <> h', h' maybe = t
1090 *
1091 * And pictorially
1092 *
1093 *
1094 * +-----+
1095 * | 0, 0|------- release -------> Error
1096 * +-----+
1097 * | ^
1098 * acquire| |release
1099 * | |
1100 * | |
1101 * v |
1102 * +-----+
1103 * |-1, 0|
1104 * +-----+
1105 * | ^
1106 * acquire| |release
1107 * | |
1108 * | |
1109 * v |
1110 * +-----+
1111 * | h, h|
1112 * +-----+
1113 * | ^
1114 * acquire| |release
1115 * | |
1116 * | |
1117 * v |
1118 * +-----+
1119 * | h, t|----- acquire, release loopback ---+
1120 * +-----+ |
1121 * ^ |
1122 * | |
1123 * +------------------------------------+
1124 *
1125 */
1126
1127#ifdef DEBUG_QUEUING_LOCKS
1128
1129/* Stuff for circular trace buffer */
1130#define TRACE_BUF_ELE 1024
1131static char traces[TRACE_BUF_ELE][128] = { 0 }
1132static int tc = 0;
1133#define TRACE_LOCK(X,Y) sprintf( traces[tc++ % TRACE_BUF_ELE], "t%d at %s\n", X, Y );
1134#define TRACE_LOCK_T(X,Y,Z) sprintf( traces[tc++ % TRACE_BUF_ELE], "t%d at %s%d\n", X,Y,Z );
1135#define TRACE_LOCK_HT(X,Y,Z,Q) sprintf( traces[tc++ % TRACE_BUF_ELE], "t%d at %s %d,%d\n", X, Y, Z, Q );
1136
1137static void
1138__kmp_dump_queuing_lock( kmp_info_t *this_thr, kmp_int32 gtid,
1139 kmp_queuing_lock_t *lck, kmp_int32 head_id, kmp_int32 tail_id )
1140{
1141 kmp_int32 t, i;
1142
1143 __kmp_printf_no_lock( "\n__kmp_dump_queuing_lock: TRACE BEGINS HERE! \n" );
1144
1145 i = tc % TRACE_BUF_ELE;
1146 __kmp_printf_no_lock( "%s\n", traces[i] );
1147 i = (i+1) % TRACE_BUF_ELE;
1148 while ( i != (tc % TRACE_BUF_ELE) ) {
1149 __kmp_printf_no_lock( "%s", traces[i] );
1150 i = (i+1) % TRACE_BUF_ELE;
1151 }
1152 __kmp_printf_no_lock( "\n" );
1153
1154 __kmp_printf_no_lock(
1155 "\n__kmp_dump_queuing_lock: gtid+1:%d, spin_here:%d, next_wait:%d, head_id:%d, tail_id:%d\n",
1156 gtid+1, this_thr->th.th_spin_here, this_thr->th.th_next_waiting,
1157 head_id, tail_id );
1158
1159 __kmp_printf_no_lock( "\t\thead: %d ", lck->lk.head_id );
1160
1161 if ( lck->lk.head_id >= 1 ) {
1162 t = __kmp_threads[lck->lk.head_id-1]->th.th_next_waiting;
1163 while (t > 0) {
1164 __kmp_printf_no_lock( "-> %d ", t );
1165 t = __kmp_threads[t-1]->th.th_next_waiting;
1166 }
1167 }
1168 __kmp_printf_no_lock( "; tail: %d ", lck->lk.tail_id );
1169 __kmp_printf_no_lock( "\n\n" );
1170}
1171
1172#endif /* DEBUG_QUEUING_LOCKS */
1173
1174static kmp_int32
1175__kmp_get_queuing_lock_owner( kmp_queuing_lock_t *lck )
1176{
1177 return TCR_4( lck->lk.owner_id ) - 1;
1178}
1179
1180static inline bool
1181__kmp_is_queuing_lock_nestable( kmp_queuing_lock_t *lck )
1182{
1183 return lck->lk.depth_locked != -1;
1184}
1185
1186/* Acquire a lock using a the queuing lock implementation */
1187template <bool takeTime>
1188/* [TLW] The unused template above is left behind because of what BEB believes is a
1189 potential compiler problem with __forceinline. */
1190__forceinline static void
1191__kmp_acquire_queuing_lock_timed_template( kmp_queuing_lock_t *lck,
1192 kmp_int32 gtid )
1193{
1194 register kmp_info_t *this_thr = __kmp_thread_from_gtid( gtid );
1195 volatile kmp_int32 *head_id_p = & lck->lk.head_id;
1196 volatile kmp_int32 *tail_id_p = & lck->lk.tail_id;
1197 volatile kmp_uint32 *spin_here_p;
1198 kmp_int32 need_mf = 1;
1199
1200 KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d entering\n", lck, gtid ));
1201
1202 KMP_FSYNC_PREPARE( lck );
1203 KMP_DEBUG_ASSERT( this_thr != NULL );
1204 spin_here_p = & this_thr->th.th_spin_here;
1205
1206#ifdef DEBUG_QUEUING_LOCKS
1207 TRACE_LOCK( gtid+1, "acq ent" );
1208 if ( *spin_here_p )
1209 __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1210 if ( this_thr->th.th_next_waiting != 0 )
1211 __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1212#endif
1213 KMP_DEBUG_ASSERT( !*spin_here_p );
1214 KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 );
1215
1216
1217 /* The following st.rel to spin_here_p needs to precede the cmpxchg.acq to head_id_p
1218 that may follow, not just in execution order, but also in visibility order. This way,
1219 when a releasing thread observes the changes to the queue by this thread, it can
1220 rightly assume that spin_here_p has already been set to TRUE, so that when it sets
1221 spin_here_p to FALSE, it is not premature. If the releasing thread sets spin_here_p
1222 to FALSE before this thread sets it to TRUE, this thread will hang.
1223 */
1224 *spin_here_p = TRUE; /* before enqueuing to prevent race */
1225
1226 while( 1 ) {
1227 kmp_int32 enqueued;
1228 kmp_int32 head;
1229 kmp_int32 tail;
1230
1231 head = *head_id_p;
1232
1233 switch ( head ) {
1234
1235 case -1:
1236 {
1237#ifdef DEBUG_QUEUING_LOCKS
1238 tail = *tail_id_p;
1239 TRACE_LOCK_HT( gtid+1, "acq read: ", head, tail );
1240#endif
1241 tail = 0; /* to make sure next link asynchronously read is not set accidentally;
1242 this assignment prevents us from entering the if ( t > 0 )
1243 condition in the enqueued case below, which is not necessary for
1244 this state transition */
1245
1246 need_mf = 0;
1247 /* try (-1,0)->(tid,tid) */
1248 enqueued = KMP_COMPARE_AND_STORE_ACQ64( (volatile kmp_int64 *) tail_id_p,
1249 KMP_PACK_64( -1, 0 ),
1250 KMP_PACK_64( gtid+1, gtid+1 ) );
1251#ifdef DEBUG_QUEUING_LOCKS
1252 if ( enqueued ) TRACE_LOCK( gtid+1, "acq enq: (-1,0)->(tid,tid)" );
1253#endif
1254 }
1255 break;
1256
1257 default:
1258 {
1259 tail = *tail_id_p;
1260 KMP_DEBUG_ASSERT( tail != gtid + 1 );
1261
1262#ifdef DEBUG_QUEUING_LOCKS
1263 TRACE_LOCK_HT( gtid+1, "acq read: ", head, tail );
1264#endif
1265
1266 if ( tail == 0 ) {
1267 enqueued = FALSE;
1268 }
1269 else {
1270 need_mf = 0;
1271 /* try (h,t) or (h,h)->(h,tid) */
1272 enqueued = KMP_COMPARE_AND_STORE_ACQ32( tail_id_p, tail, gtid+1 );
1273
1274#ifdef DEBUG_QUEUING_LOCKS
1275 if ( enqueued ) TRACE_LOCK( gtid+1, "acq enq: (h,t)->(h,tid)" );
1276#endif
1277 }
1278 }
1279 break;
1280
1281 case 0: /* empty queue */
1282 {
1283 kmp_int32 grabbed_lock;
1284
1285#ifdef DEBUG_QUEUING_LOCKS
1286 tail = *tail_id_p;
1287 TRACE_LOCK_HT( gtid+1, "acq read: ", head, tail );
1288#endif
1289 /* try (0,0)->(-1,0) */
1290
1291 /* only legal transition out of head = 0 is head = -1 with no change to tail */
1292 grabbed_lock = KMP_COMPARE_AND_STORE_ACQ32( head_id_p, 0, -1 );
1293
1294 if ( grabbed_lock ) {
1295
1296 *spin_here_p = FALSE;
1297
1298 KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: no queuing\n",
1299 lck, gtid ));
1300#ifdef DEBUG_QUEUING_LOCKS
1301 TRACE_LOCK_HT( gtid+1, "acq exit: ", head, 0 );
1302#endif
1303 KMP_FSYNC_ACQUIRED( lck );
1304 return; /* lock holder cannot be on queue */
1305 }
1306 enqueued = FALSE;
1307 }
1308 break;
1309 }
1310
1311 if ( enqueued ) {
1312 if ( tail > 0 ) {
1313 kmp_info_t *tail_thr = __kmp_thread_from_gtid( tail - 1 );
1314 KMP_ASSERT( tail_thr != NULL );
1315 tail_thr->th.th_next_waiting = gtid+1;
1316 /* corresponding wait for this write in release code */
1317 }
1318 KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d waiting for lock\n", lck, gtid ));
1319
1320
1321 /* ToDo: May want to consider using __kmp_wait_sleep or something that sleeps for
1322 * throughput only here.
1323 */
1324 KMP_MB();
1325 KMP_WAIT_YIELD(spin_here_p, FALSE, KMP_EQ, lck);
1326
1327#ifdef DEBUG_QUEUING_LOCKS
1328 TRACE_LOCK( gtid+1, "acq spin" );
1329
1330 if ( this_thr->th.th_next_waiting != 0 )
1331 __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1332#endif
1333 KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 );
1334 KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: after waiting on queue\n",
1335 lck, gtid ));
1336
1337#ifdef DEBUG_QUEUING_LOCKS
1338 TRACE_LOCK( gtid+1, "acq exit 2" );
1339#endif
1340 /* got lock, we were dequeued by the thread that released lock */
1341 return;
1342 }
1343
1344 /* Yield if number of threads > number of logical processors */
1345 /* ToDo: Not sure why this should only be in oversubscription case,
1346 maybe should be traditional YIELD_INIT/YIELD_WHEN loop */
1347 KMP_YIELD( TCR_4( __kmp_nth ) > (__kmp_avail_proc ? __kmp_avail_proc :
1348 __kmp_xproc ) );
1349#ifdef DEBUG_QUEUING_LOCKS
1350 TRACE_LOCK( gtid+1, "acq retry" );
1351#endif
1352
1353 }
1354 KMP_ASSERT2( 0, "should not get here" );
1355}
1356
1357void
1358__kmp_acquire_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1359{
1360 KMP_DEBUG_ASSERT( gtid >= 0 );
1361
1362 __kmp_acquire_queuing_lock_timed_template<false>( lck, gtid );
1363}
1364
1365static void
1366__kmp_acquire_queuing_lock_with_checks( kmp_queuing_lock_t *lck,
1367 kmp_int32 gtid )
1368{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001369 char const * const func = "omp_set_lock";
1370 if ( lck->lk.initialized != lck ) {
1371 KMP_FATAL( LockIsUninitialized, func );
1372 }
1373 if ( __kmp_is_queuing_lock_nestable( lck ) ) {
1374 KMP_FATAL( LockNestableUsedAsSimple, func );
1375 }
1376 if ( __kmp_get_queuing_lock_owner( lck ) == gtid ) {
1377 KMP_FATAL( LockIsAlreadyOwned, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001378 }
1379
1380 __kmp_acquire_queuing_lock( lck, gtid );
1381
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001382 lck->lk.owner_id = gtid + 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001383}
1384
1385int
1386__kmp_test_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1387{
1388 volatile kmp_int32 *head_id_p = & lck->lk.head_id;
1389 kmp_int32 head;
1390#ifdef KMP_DEBUG
1391 kmp_info_t *this_thr;
1392#endif
1393
1394 KA_TRACE( 1000, ("__kmp_test_queuing_lock: T#%d entering\n", gtid ));
1395 KMP_DEBUG_ASSERT( gtid >= 0 );
1396#ifdef KMP_DEBUG
1397 this_thr = __kmp_thread_from_gtid( gtid );
1398 KMP_DEBUG_ASSERT( this_thr != NULL );
1399 KMP_DEBUG_ASSERT( !this_thr->th.th_spin_here );
1400#endif
1401
1402 head = *head_id_p;
1403
1404 if ( head == 0 ) { /* nobody on queue, nobody holding */
1405
1406 /* try (0,0)->(-1,0) */
1407
1408 if ( KMP_COMPARE_AND_STORE_ACQ32( head_id_p, 0, -1 ) ) {
1409 KA_TRACE( 1000, ("__kmp_test_queuing_lock: T#%d exiting: holding lock\n", gtid ));
1410 KMP_FSYNC_ACQUIRED(lck);
1411 return TRUE;
1412 }
1413 }
1414
1415 KA_TRACE( 1000, ("__kmp_test_queuing_lock: T#%d exiting: without lock\n", gtid ));
1416 return FALSE;
1417}
1418
1419static int
1420__kmp_test_queuing_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1421{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001422 char const * const func = "omp_test_lock";
1423 if ( lck->lk.initialized != lck ) {
1424 KMP_FATAL( LockIsUninitialized, func );
1425 }
1426 if ( __kmp_is_queuing_lock_nestable( lck ) ) {
1427 KMP_FATAL( LockNestableUsedAsSimple, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001428 }
1429
1430 int retval = __kmp_test_queuing_lock( lck, gtid );
1431
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001432 if ( retval ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001433 lck->lk.owner_id = gtid + 1;
1434 }
1435 return retval;
1436}
1437
1438void
1439__kmp_release_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1440{
1441 register kmp_info_t *this_thr;
1442 volatile kmp_int32 *head_id_p = & lck->lk.head_id;
1443 volatile kmp_int32 *tail_id_p = & lck->lk.tail_id;
1444
1445 KA_TRACE( 1000, ("__kmp_release_queuing_lock: lck:%p, T#%d entering\n", lck, gtid ));
1446 KMP_DEBUG_ASSERT( gtid >= 0 );
1447 this_thr = __kmp_thread_from_gtid( gtid );
1448 KMP_DEBUG_ASSERT( this_thr != NULL );
1449#ifdef DEBUG_QUEUING_LOCKS
1450 TRACE_LOCK( gtid+1, "rel ent" );
1451
1452 if ( this_thr->th.th_spin_here )
1453 __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1454 if ( this_thr->th.th_next_waiting != 0 )
1455 __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1456#endif
1457 KMP_DEBUG_ASSERT( !this_thr->th.th_spin_here );
1458 KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 );
1459
1460 KMP_FSYNC_RELEASING(lck);
1461
1462 while( 1 ) {
1463 kmp_int32 dequeued;
1464 kmp_int32 head;
1465 kmp_int32 tail;
1466
1467 head = *head_id_p;
1468
1469#ifdef DEBUG_QUEUING_LOCKS
1470 tail = *tail_id_p;
1471 TRACE_LOCK_HT( gtid+1, "rel read: ", head, tail );
1472 if ( head == 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail );
1473#endif
1474 KMP_DEBUG_ASSERT( head != 0 ); /* holding the lock, head must be -1 or queue head */
1475
1476 if ( head == -1 ) { /* nobody on queue */
1477
1478 /* try (-1,0)->(0,0) */
1479 if ( KMP_COMPARE_AND_STORE_REL32( head_id_p, -1, 0 ) ) {
1480 KA_TRACE( 1000, ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: queue empty\n",
1481 lck, gtid ));
1482#ifdef DEBUG_QUEUING_LOCKS
1483 TRACE_LOCK_HT( gtid+1, "rel exit: ", 0, 0 );
1484#endif
1485 return;
1486 }
1487 dequeued = FALSE;
1488
1489 }
1490 else {
1491
1492 tail = *tail_id_p;
1493 if ( head == tail ) { /* only one thread on the queue */
1494
1495#ifdef DEBUG_QUEUING_LOCKS
1496 if ( head <= 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail );
1497#endif
1498 KMP_DEBUG_ASSERT( head > 0 );
1499
1500 /* try (h,h)->(-1,0) */
1501 dequeued = KMP_COMPARE_AND_STORE_REL64( (kmp_int64 *) tail_id_p,
1502 KMP_PACK_64( head, head ), KMP_PACK_64( -1, 0 ) );
1503#ifdef DEBUG_QUEUING_LOCKS
1504 TRACE_LOCK( gtid+1, "rel deq: (h,h)->(-1,0)" );
1505#endif
1506
1507 }
1508 else {
1509 volatile kmp_int32 *waiting_id_p;
1510 kmp_info_t *head_thr = __kmp_thread_from_gtid( head - 1 );
1511 KMP_DEBUG_ASSERT( head_thr != NULL );
1512 waiting_id_p = & head_thr->th.th_next_waiting;
1513
1514 /* Does this require synchronous reads? */
1515#ifdef DEBUG_QUEUING_LOCKS
1516 if ( head <= 0 || tail <= 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail );
1517#endif
1518 KMP_DEBUG_ASSERT( head > 0 && tail > 0 );
1519
1520 /* try (h,t)->(h',t) or (t,t) */
1521
1522 KMP_MB();
1523 /* make sure enqueuing thread has time to update next waiting thread field */
1524 *head_id_p = (kmp_int32) KMP_WAIT_YIELD((volatile kmp_uint*) waiting_id_p, 0, KMP_NEQ, NULL);
1525#ifdef DEBUG_QUEUING_LOCKS
1526 TRACE_LOCK( gtid+1, "rel deq: (h,t)->(h',t)" );
1527#endif
1528 dequeued = TRUE;
1529 }
1530 }
1531
1532 if ( dequeued ) {
1533 kmp_info_t *head_thr = __kmp_thread_from_gtid( head - 1 );
1534 KMP_DEBUG_ASSERT( head_thr != NULL );
1535
1536 /* Does this require synchronous reads? */
1537#ifdef DEBUG_QUEUING_LOCKS
1538 if ( head <= 0 || tail <= 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail );
1539#endif
1540 KMP_DEBUG_ASSERT( head > 0 && tail > 0 );
1541
1542 /* For clean code only.
1543 * Thread not released until next statement prevents race with acquire code.
1544 */
1545 head_thr->th.th_next_waiting = 0;
1546#ifdef DEBUG_QUEUING_LOCKS
1547 TRACE_LOCK_T( gtid+1, "rel nw=0 for t=", head );
1548#endif
1549
1550 KMP_MB();
1551 /* reset spin value */
1552 head_thr->th.th_spin_here = FALSE;
1553
1554 KA_TRACE( 1000, ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: after dequeuing\n",
1555 lck, gtid ));
1556#ifdef DEBUG_QUEUING_LOCKS
1557 TRACE_LOCK( gtid+1, "rel exit 2" );
1558#endif
1559 return;
1560 }
1561 /* KMP_CPU_PAUSE( ); don't want to make releasing thread hold up acquiring threads */
1562
1563#ifdef DEBUG_QUEUING_LOCKS
1564 TRACE_LOCK( gtid+1, "rel retry" );
1565#endif
1566
1567 } /* while */
1568 KMP_ASSERT2( 0, "should not get here" );
1569}
1570
1571static void
1572__kmp_release_queuing_lock_with_checks( kmp_queuing_lock_t *lck,
1573 kmp_int32 gtid )
1574{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001575 char const * const func = "omp_unset_lock";
1576 KMP_MB(); /* in case another processor initialized lock */
1577 if ( lck->lk.initialized != lck ) {
1578 KMP_FATAL( LockIsUninitialized, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001579 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001580 if ( __kmp_is_queuing_lock_nestable( lck ) ) {
1581 KMP_FATAL( LockNestableUsedAsSimple, func );
1582 }
1583 if ( __kmp_get_queuing_lock_owner( lck ) == -1 ) {
1584 KMP_FATAL( LockUnsettingFree, func );
1585 }
1586 if ( __kmp_get_queuing_lock_owner( lck ) != gtid ) {
1587 KMP_FATAL( LockUnsettingSetByAnother, func );
1588 }
1589 lck->lk.owner_id = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001590 __kmp_release_queuing_lock( lck, gtid );
1591}
1592
1593void
1594__kmp_init_queuing_lock( kmp_queuing_lock_t *lck )
1595{
1596 lck->lk.location = NULL;
1597 lck->lk.head_id = 0;
1598 lck->lk.tail_id = 0;
1599 lck->lk.next_ticket = 0;
1600 lck->lk.now_serving = 0;
1601 lck->lk.owner_id = 0; // no thread owns the lock.
1602 lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks.
1603 lck->lk.initialized = lck;
1604
1605 KA_TRACE(1000, ("__kmp_init_queuing_lock: lock %p initialized\n", lck));
1606}
1607
1608static void
1609__kmp_init_queuing_lock_with_checks( kmp_queuing_lock_t * lck )
1610{
1611 __kmp_init_queuing_lock( lck );
1612}
1613
1614void
1615__kmp_destroy_queuing_lock( kmp_queuing_lock_t *lck )
1616{
1617 lck->lk.initialized = NULL;
1618 lck->lk.location = NULL;
1619 lck->lk.head_id = 0;
1620 lck->lk.tail_id = 0;
1621 lck->lk.next_ticket = 0;
1622 lck->lk.now_serving = 0;
1623 lck->lk.owner_id = 0;
1624 lck->lk.depth_locked = -1;
1625}
1626
1627static void
1628__kmp_destroy_queuing_lock_with_checks( kmp_queuing_lock_t *lck )
1629{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001630 char const * const func = "omp_destroy_lock";
1631 if ( lck->lk.initialized != lck ) {
1632 KMP_FATAL( LockIsUninitialized, func );
1633 }
1634 if ( __kmp_is_queuing_lock_nestable( lck ) ) {
1635 KMP_FATAL( LockNestableUsedAsSimple, func );
1636 }
1637 if ( __kmp_get_queuing_lock_owner( lck ) != -1 ) {
1638 KMP_FATAL( LockStillOwned, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001639 }
1640 __kmp_destroy_queuing_lock( lck );
1641}
1642
1643
1644//
1645// nested queuing locks
1646//
1647
1648void
1649__kmp_acquire_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1650{
1651 KMP_DEBUG_ASSERT( gtid >= 0 );
1652
1653 if ( __kmp_get_queuing_lock_owner( lck ) == gtid ) {
1654 lck->lk.depth_locked += 1;
1655 }
1656 else {
1657 __kmp_acquire_queuing_lock_timed_template<false>( lck, gtid );
1658 KMP_MB();
1659 lck->lk.depth_locked = 1;
1660 KMP_MB();
1661 lck->lk.owner_id = gtid + 1;
1662 }
1663}
1664
1665static void
1666__kmp_acquire_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1667{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001668 char const * const func = "omp_set_nest_lock";
1669 if ( lck->lk.initialized != lck ) {
1670 KMP_FATAL( LockIsUninitialized, func );
1671 }
1672 if ( ! __kmp_is_queuing_lock_nestable( lck ) ) {
1673 KMP_FATAL( LockSimpleUsedAsNestable, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001674 }
1675 __kmp_acquire_nested_queuing_lock( lck, gtid );
1676}
1677
1678int
1679__kmp_test_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1680{
1681 int retval;
1682
1683 KMP_DEBUG_ASSERT( gtid >= 0 );
1684
1685 if ( __kmp_get_queuing_lock_owner( lck ) == gtid ) {
1686 retval = ++lck->lk.depth_locked;
1687 }
1688 else if ( !__kmp_test_queuing_lock( lck, gtid ) ) {
1689 retval = 0;
1690 }
1691 else {
1692 KMP_MB();
1693 retval = lck->lk.depth_locked = 1;
1694 KMP_MB();
1695 lck->lk.owner_id = gtid + 1;
1696 }
1697 return retval;
1698}
1699
1700static int
1701__kmp_test_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck,
1702 kmp_int32 gtid )
1703{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001704 char const * const func = "omp_test_nest_lock";
1705 if ( lck->lk.initialized != lck ) {
1706 KMP_FATAL( LockIsUninitialized, func );
1707 }
1708 if ( ! __kmp_is_queuing_lock_nestable( lck ) ) {
1709 KMP_FATAL( LockSimpleUsedAsNestable, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001710 }
1711 return __kmp_test_nested_queuing_lock( lck, gtid );
1712}
1713
1714void
1715__kmp_release_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1716{
1717 KMP_DEBUG_ASSERT( gtid >= 0 );
1718
1719 KMP_MB();
1720 if ( --(lck->lk.depth_locked) == 0 ) {
1721 KMP_MB();
1722 lck->lk.owner_id = 0;
1723 __kmp_release_queuing_lock( lck, gtid );
1724 }
1725}
1726
1727static void
1728__kmp_release_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1729{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001730 char const * const func = "omp_unset_nest_lock";
1731 KMP_MB(); /* in case another processor initialized lock */
1732 if ( lck->lk.initialized != lck ) {
1733 KMP_FATAL( LockIsUninitialized, func );
1734 }
1735 if ( ! __kmp_is_queuing_lock_nestable( lck ) ) {
1736 KMP_FATAL( LockSimpleUsedAsNestable, func );
1737 }
1738 if ( __kmp_get_queuing_lock_owner( lck ) == -1 ) {
1739 KMP_FATAL( LockUnsettingFree, func );
1740 }
1741 if ( __kmp_get_queuing_lock_owner( lck ) != gtid ) {
1742 KMP_FATAL( LockUnsettingSetByAnother, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001743 }
1744 __kmp_release_nested_queuing_lock( lck, gtid );
1745}
1746
1747void
1748__kmp_init_nested_queuing_lock( kmp_queuing_lock_t * lck )
1749{
1750 __kmp_init_queuing_lock( lck );
1751 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
1752}
1753
1754static void
1755__kmp_init_nested_queuing_lock_with_checks( kmp_queuing_lock_t * lck )
1756{
1757 __kmp_init_nested_queuing_lock( lck );
1758}
1759
1760void
1761__kmp_destroy_nested_queuing_lock( kmp_queuing_lock_t *lck )
1762{
1763 __kmp_destroy_queuing_lock( lck );
1764 lck->lk.depth_locked = 0;
1765}
1766
1767static void
1768__kmp_destroy_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck )
1769{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001770 char const * const func = "omp_destroy_nest_lock";
1771 if ( lck->lk.initialized != lck ) {
1772 KMP_FATAL( LockIsUninitialized, func );
1773 }
1774 if ( ! __kmp_is_queuing_lock_nestable( lck ) ) {
1775 KMP_FATAL( LockSimpleUsedAsNestable, func );
1776 }
1777 if ( __kmp_get_queuing_lock_owner( lck ) != -1 ) {
1778 KMP_FATAL( LockStillOwned, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00001779 }
1780 __kmp_destroy_nested_queuing_lock( lck );
1781}
1782
1783
1784//
1785// access functions to fields which don't exist for all lock kinds.
1786//
1787
1788static int
1789__kmp_is_queuing_lock_initialized( kmp_queuing_lock_t *lck )
1790{
1791 return lck == lck->lk.initialized;
1792}
1793
1794static const ident_t *
1795__kmp_get_queuing_lock_location( kmp_queuing_lock_t *lck )
1796{
1797 return lck->lk.location;
1798}
1799
1800static void
1801__kmp_set_queuing_lock_location( kmp_queuing_lock_t *lck, const ident_t *loc )
1802{
1803 lck->lk.location = loc;
1804}
1805
1806static kmp_lock_flags_t
1807__kmp_get_queuing_lock_flags( kmp_queuing_lock_t *lck )
1808{
1809 return lck->lk.flags;
1810}
1811
1812static void
1813__kmp_set_queuing_lock_flags( kmp_queuing_lock_t *lck, kmp_lock_flags_t flags )
1814{
1815 lck->lk.flags = flags;
1816}
1817
1818#if KMP_USE_ADAPTIVE_LOCKS
1819
1820/*
1821 RTM Adaptive locks
1822*/
1823
1824// TODO: Use the header for intrinsics below with the compiler 13.0
1825//#include <immintrin.h>
1826
1827// Values from the status register after failed speculation.
1828#define _XBEGIN_STARTED (~0u)
1829#define _XABORT_EXPLICIT (1 << 0)
1830#define _XABORT_RETRY (1 << 1)
1831#define _XABORT_CONFLICT (1 << 2)
1832#define _XABORT_CAPACITY (1 << 3)
1833#define _XABORT_DEBUG (1 << 4)
1834#define _XABORT_NESTED (1 << 5)
1835#define _XABORT_CODE(x) ((unsigned char)(((x) >> 24) & 0xFF))
1836
1837// Aborts for which it's worth trying again immediately
1838#define SOFT_ABORT_MASK (_XABORT_RETRY | _XABORT_CONFLICT | _XABORT_EXPLICIT)
1839
1840#define STRINGIZE_INTERNAL(arg) #arg
1841#define STRINGIZE(arg) STRINGIZE_INTERNAL(arg)
1842
1843// Access to RTM instructions
1844
1845/*
1846 A version of XBegin which returns -1 on speculation, and the value of EAX on an abort.
1847 This is the same definition as the compiler intrinsic that will be supported at some point.
1848*/
1849static __inline int _xbegin()
1850{
1851 int res = -1;
1852
1853#if KMP_OS_WINDOWS
1854#if KMP_ARCH_X86_64
1855 _asm {
1856 _emit 0xC7
1857 _emit 0xF8
1858 _emit 2
1859 _emit 0
1860 _emit 0
1861 _emit 0
1862 jmp L2
1863 mov res, eax
1864 L2:
1865 }
1866#else /* IA32 */
1867 _asm {
1868 _emit 0xC7
1869 _emit 0xF8
1870 _emit 2
1871 _emit 0
1872 _emit 0
1873 _emit 0
1874 jmp L2
1875 mov res, eax
1876 L2:
1877 }
1878#endif // KMP_ARCH_X86_64
1879#else
1880 /* Note that %eax must be noted as killed (clobbered), because
1881 * the XSR is returned in %eax(%rax) on abort. Other register
1882 * values are restored, so don't need to be killed.
1883 *
1884 * We must also mark 'res' as an input and an output, since otherwise
1885 * 'res=-1' may be dropped as being dead, whereas we do need the
1886 * assignment on the successful (i.e., non-abort) path.
1887 */
1888 __asm__ volatile ("1: .byte 0xC7; .byte 0xF8;\n"
1889 " .long 1f-1b-6\n"
1890 " jmp 2f\n"
1891 "1: movl %%eax,%0\n"
1892 "2:"
1893 :"+r"(res)::"memory","%eax");
1894#endif // KMP_OS_WINDOWS
1895 return res;
1896}
1897
1898/*
1899 Transaction end
1900*/
1901static __inline void _xend()
1902{
1903#if KMP_OS_WINDOWS
1904 __asm {
1905 _emit 0x0f
1906 _emit 0x01
1907 _emit 0xd5
1908 }
1909#else
1910 __asm__ volatile (".byte 0x0f; .byte 0x01; .byte 0xd5" :::"memory");
1911#endif
1912}
1913
1914/*
1915 This is a macro, the argument must be a single byte constant which
1916 can be evaluated by the inline assembler, since it is emitted as a
1917 byte into the assembly code.
1918*/
1919#if KMP_OS_WINDOWS
1920#define _xabort(ARG) \
1921 _asm _emit 0xc6 \
1922 _asm _emit 0xf8 \
1923 _asm _emit ARG
1924#else
1925#define _xabort(ARG) \
1926 __asm__ volatile (".byte 0xC6; .byte 0xF8; .byte " STRINGIZE(ARG) :::"memory");
1927#endif
1928
1929//
1930// Statistics is collected for testing purpose
1931//
1932#if KMP_DEBUG_ADAPTIVE_LOCKS
1933
1934// We accumulate speculative lock statistics when the lock is destroyed.
1935// We keep locks that haven't been destroyed in the liveLocks list
1936// so that we can grab their statistics too.
1937static kmp_adaptive_lock_statistics_t destroyedStats;
1938
1939// To hold the list of live locks.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001940static kmp_adaptive_lock_info_t liveLocks;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001941
1942// A lock so we can safely update the list of locks.
1943static kmp_bootstrap_lock_t chain_lock;
1944
1945// Initialize the list of stats.
1946void
1947__kmp_init_speculative_stats()
1948{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001949 kmp_adaptive_lock_info_t *lck = &liveLocks;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001950
1951 memset( ( void * ) & ( lck->stats ), 0, sizeof( lck->stats ) );
1952 lck->stats.next = lck;
1953 lck->stats.prev = lck;
1954
1955 KMP_ASSERT( lck->stats.next->stats.prev == lck );
1956 KMP_ASSERT( lck->stats.prev->stats.next == lck );
1957
1958 __kmp_init_bootstrap_lock( &chain_lock );
1959
1960}
1961
1962// Insert the lock into the circular list
1963static void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001964__kmp_remember_lock( kmp_adaptive_lock_info_t * lck )
Jim Cownie5e8470a2013-09-27 10:38:44 +00001965{
1966 __kmp_acquire_bootstrap_lock( &chain_lock );
1967
1968 lck->stats.next = liveLocks.stats.next;
1969 lck->stats.prev = &liveLocks;
1970
1971 liveLocks.stats.next = lck;
1972 lck->stats.next->stats.prev = lck;
1973
1974 KMP_ASSERT( lck->stats.next->stats.prev == lck );
1975 KMP_ASSERT( lck->stats.prev->stats.next == lck );
1976
1977 __kmp_release_bootstrap_lock( &chain_lock );
1978}
1979
1980static void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001981__kmp_forget_lock( kmp_adaptive_lock_info_t * lck )
Jim Cownie5e8470a2013-09-27 10:38:44 +00001982{
1983 KMP_ASSERT( lck->stats.next->stats.prev == lck );
1984 KMP_ASSERT( lck->stats.prev->stats.next == lck );
1985
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001986 kmp_adaptive_lock_info_t * n = lck->stats.next;
1987 kmp_adaptive_lock_info_t * p = lck->stats.prev;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001988
1989 n->stats.prev = p;
1990 p->stats.next = n;
1991}
1992
1993static void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001994__kmp_zero_speculative_stats( kmp_adaptive_lock_info_t * lck )
Jim Cownie5e8470a2013-09-27 10:38:44 +00001995{
1996 memset( ( void * )&lck->stats, 0, sizeof( lck->stats ) );
1997 __kmp_remember_lock( lck );
1998}
1999
2000static void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002001__kmp_add_stats( kmp_adaptive_lock_statistics_t * t, kmp_adaptive_lock_info_t * lck )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002002{
2003 kmp_adaptive_lock_statistics_t volatile *s = &lck->stats;
2004
2005 t->nonSpeculativeAcquireAttempts += lck->acquire_attempts;
2006 t->successfulSpeculations += s->successfulSpeculations;
2007 t->hardFailedSpeculations += s->hardFailedSpeculations;
2008 t->softFailedSpeculations += s->softFailedSpeculations;
2009 t->nonSpeculativeAcquires += s->nonSpeculativeAcquires;
2010 t->lemmingYields += s->lemmingYields;
2011}
2012
2013static void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002014__kmp_accumulate_speculative_stats( kmp_adaptive_lock_info_t * lck)
Jim Cownie5e8470a2013-09-27 10:38:44 +00002015{
2016 kmp_adaptive_lock_statistics_t *t = &destroyedStats;
2017
2018 __kmp_acquire_bootstrap_lock( &chain_lock );
2019
2020 __kmp_add_stats( &destroyedStats, lck );
2021 __kmp_forget_lock( lck );
2022
2023 __kmp_release_bootstrap_lock( &chain_lock );
2024}
2025
2026static float
2027percent (kmp_uint32 count, kmp_uint32 total)
2028{
2029 return (total == 0) ? 0.0: (100.0 * count)/total;
2030}
2031
2032static
2033FILE * __kmp_open_stats_file()
2034{
2035 if (strcmp (__kmp_speculative_statsfile, "-") == 0)
2036 return stdout;
2037
2038 size_t buffLen = strlen( __kmp_speculative_statsfile ) + 20;
2039 char buffer[buffLen];
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002040 snprintf (&buffer[0], buffLen, __kmp_speculative_statsfile,
2041 (kmp_int32)getpid());
Jim Cownie5e8470a2013-09-27 10:38:44 +00002042 FILE * result = fopen(&buffer[0], "w");
2043
2044 // Maybe we should issue a warning here...
2045 return result ? result : stdout;
2046}
2047
2048void
2049__kmp_print_speculative_stats()
2050{
2051 if (__kmp_user_lock_kind != lk_adaptive)
2052 return;
2053
2054 FILE * statsFile = __kmp_open_stats_file();
2055
2056 kmp_adaptive_lock_statistics_t total = destroyedStats;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002057 kmp_adaptive_lock_info_t *lck;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002058
2059 for (lck = liveLocks.stats.next; lck != &liveLocks; lck = lck->stats.next) {
2060 __kmp_add_stats( &total, lck );
2061 }
2062 kmp_adaptive_lock_statistics_t *t = &total;
2063 kmp_uint32 totalSections = t->nonSpeculativeAcquires + t->successfulSpeculations;
2064 kmp_uint32 totalSpeculations = t->successfulSpeculations + t->hardFailedSpeculations +
2065 t->softFailedSpeculations;
2066
2067 fprintf ( statsFile, "Speculative lock statistics (all approximate!)\n");
2068 fprintf ( statsFile, " Lock parameters: \n"
2069 " max_soft_retries : %10d\n"
2070 " max_badness : %10d\n",
2071 __kmp_adaptive_backoff_params.max_soft_retries,
2072 __kmp_adaptive_backoff_params.max_badness);
2073 fprintf( statsFile, " Non-speculative acquire attempts : %10d\n", t->nonSpeculativeAcquireAttempts );
2074 fprintf( statsFile, " Total critical sections : %10d\n", totalSections );
2075 fprintf( statsFile, " Successful speculations : %10d (%5.1f%%)\n",
2076 t->successfulSpeculations, percent( t->successfulSpeculations, totalSections ) );
2077 fprintf( statsFile, " Non-speculative acquires : %10d (%5.1f%%)\n",
2078 t->nonSpeculativeAcquires, percent( t->nonSpeculativeAcquires, totalSections ) );
2079 fprintf( statsFile, " Lemming yields : %10d\n\n", t->lemmingYields );
2080
2081 fprintf( statsFile, " Speculative acquire attempts : %10d\n", totalSpeculations );
2082 fprintf( statsFile, " Successes : %10d (%5.1f%%)\n",
2083 t->successfulSpeculations, percent( t->successfulSpeculations, totalSpeculations ) );
2084 fprintf( statsFile, " Soft failures : %10d (%5.1f%%)\n",
2085 t->softFailedSpeculations, percent( t->softFailedSpeculations, totalSpeculations ) );
2086 fprintf( statsFile, " Hard failures : %10d (%5.1f%%)\n",
2087 t->hardFailedSpeculations, percent( t->hardFailedSpeculations, totalSpeculations ) );
2088
2089 if (statsFile != stdout)
2090 fclose( statsFile );
2091}
2092
2093# define KMP_INC_STAT(lck,stat) ( lck->lk.adaptive.stats.stat++ )
2094#else
2095# define KMP_INC_STAT(lck,stat)
2096
2097#endif // KMP_DEBUG_ADAPTIVE_LOCKS
2098
2099static inline bool
2100__kmp_is_unlocked_queuing_lock( kmp_queuing_lock_t *lck )
2101{
2102 // It is enough to check that the head_id is zero.
2103 // We don't also need to check the tail.
2104 bool res = lck->lk.head_id == 0;
2105
2106 // We need a fence here, since we must ensure that no memory operations
2107 // from later in this thread float above that read.
Jim Cownie181b4bb2013-12-23 17:28:57 +00002108#if KMP_COMPILER_ICC
Jim Cownie5e8470a2013-09-27 10:38:44 +00002109 _mm_mfence();
Jim Cownie181b4bb2013-12-23 17:28:57 +00002110#else
2111 __sync_synchronize();
Jim Cownie5e8470a2013-09-27 10:38:44 +00002112#endif
2113
2114 return res;
2115}
2116
2117// Functions for manipulating the badness
2118static __inline void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002119__kmp_update_badness_after_success( kmp_adaptive_lock_t *lck )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002120{
2121 // Reset the badness to zero so we eagerly try to speculate again
2122 lck->lk.adaptive.badness = 0;
2123 KMP_INC_STAT(lck,successfulSpeculations);
2124}
2125
2126// Create a bit mask with one more set bit.
2127static __inline void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002128__kmp_step_badness( kmp_adaptive_lock_t *lck )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002129{
2130 kmp_uint32 newBadness = ( lck->lk.adaptive.badness << 1 ) | 1;
2131 if ( newBadness > lck->lk.adaptive.max_badness) {
2132 return;
2133 } else {
2134 lck->lk.adaptive.badness = newBadness;
2135 }
2136}
2137
2138// Check whether speculation should be attempted.
2139static __inline int
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002140__kmp_should_speculate( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002141{
2142 kmp_uint32 badness = lck->lk.adaptive.badness;
2143 kmp_uint32 attempts= lck->lk.adaptive.acquire_attempts;
2144 int res = (attempts & badness) == 0;
2145 return res;
2146}
2147
2148// Attempt to acquire only the speculative lock.
2149// Does not back off to the non-speculative lock.
2150//
2151static int
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002152__kmp_test_adaptive_lock_only( kmp_adaptive_lock_t * lck, kmp_int32 gtid )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002153{
2154 int retries = lck->lk.adaptive.max_soft_retries;
2155
2156 // We don't explicitly count the start of speculation, rather we record
2157 // the results (success, hard fail, soft fail). The sum of all of those
2158 // is the total number of times we started speculation since all
2159 // speculations must end one of those ways.
2160 do
2161 {
2162 kmp_uint32 status = _xbegin();
2163 // Switch this in to disable actual speculation but exercise
2164 // at least some of the rest of the code. Useful for debugging...
2165 // kmp_uint32 status = _XABORT_NESTED;
2166
2167 if (status == _XBEGIN_STARTED )
2168 { /* We have successfully started speculation
2169 * Check that no-one acquired the lock for real between when we last looked
2170 * and now. This also gets the lock cache line into our read-set,
2171 * which we need so that we'll abort if anyone later claims it for real.
2172 */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002173 if (! __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002174 {
2175 // Lock is now visibly acquired, so someone beat us to it.
2176 // Abort the transaction so we'll restart from _xbegin with the
2177 // failure status.
2178 _xabort(0x01)
2179 KMP_ASSERT2( 0, "should not get here" );
2180 }
2181 return 1; // Lock has been acquired (speculatively)
2182 } else {
2183 // We have aborted, update the statistics
2184 if ( status & SOFT_ABORT_MASK)
2185 {
2186 KMP_INC_STAT(lck,softFailedSpeculations);
2187 // and loop round to retry.
2188 }
2189 else
2190 {
2191 KMP_INC_STAT(lck,hardFailedSpeculations);
2192 // Give up if we had a hard failure.
2193 break;
2194 }
2195 }
2196 } while( retries-- ); // Loop while we have retries, and didn't fail hard.
2197
2198 // Either we had a hard failure or we didn't succeed softly after
2199 // the full set of attempts, so back off the badness.
2200 __kmp_step_badness( lck );
2201 return 0;
2202}
2203
2204// Attempt to acquire the speculative lock, or back off to the non-speculative one
2205// if the speculative lock cannot be acquired.
2206// We can succeed speculatively, non-speculatively, or fail.
2207static int
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002208__kmp_test_adaptive_lock( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002209{
2210 // First try to acquire the lock speculatively
2211 if ( __kmp_should_speculate( lck, gtid ) && __kmp_test_adaptive_lock_only( lck, gtid ) )
2212 return 1;
2213
2214 // Speculative acquisition failed, so try to acquire it non-speculatively.
2215 // Count the non-speculative acquire attempt
2216 lck->lk.adaptive.acquire_attempts++;
2217
2218 // Use base, non-speculative lock.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002219 if ( __kmp_test_queuing_lock( GET_QLK_PTR(lck), gtid ) )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002220 {
2221 KMP_INC_STAT(lck,nonSpeculativeAcquires);
2222 return 1; // Lock is acquired (non-speculatively)
2223 }
2224 else
2225 {
2226 return 0; // Failed to acquire the lock, it's already visibly locked.
2227 }
2228}
2229
2230static int
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002231__kmp_test_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002232{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002233 char const * const func = "omp_test_lock";
2234 if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) {
2235 KMP_FATAL( LockIsUninitialized, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002236 }
2237
2238 int retval = __kmp_test_adaptive_lock( lck, gtid );
2239
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002240 if ( retval ) {
2241 lck->lk.qlk.owner_id = gtid + 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002242 }
2243 return retval;
2244}
2245
2246// Block until we can acquire a speculative, adaptive lock.
2247// We check whether we should be trying to speculate.
2248// If we should be, we check the real lock to see if it is free,
2249// and, if not, pause without attempting to acquire it until it is.
2250// Then we try the speculative acquire.
2251// This means that although we suffer from lemmings a little (
2252// because all we can't acquire the lock speculatively until
2253// the queue of threads waiting has cleared), we don't get into a
2254// state where we can never acquire the lock speculatively (because we
2255// force the queue to clear by preventing new arrivals from entering the
2256// queue).
2257// This does mean that when we're trying to break lemmings, the lock
2258// is no longer fair. However OpenMP makes no guarantee that its
2259// locks are fair, so this isn't a real problem.
2260static void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002261__kmp_acquire_adaptive_lock( kmp_adaptive_lock_t * lck, kmp_int32 gtid )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002262{
2263 if ( __kmp_should_speculate( lck, gtid ) )
2264 {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002265 if ( __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002266 {
2267 if ( __kmp_test_adaptive_lock_only( lck , gtid ) )
2268 return;
2269 // We tried speculation and failed, so give up.
2270 }
2271 else
2272 {
2273 // We can't try speculation until the lock is free, so we
2274 // pause here (without suspending on the queueing lock,
2275 // to allow it to drain, then try again.
2276 // All other threads will also see the same result for
2277 // shouldSpeculate, so will be doing the same if they
2278 // try to claim the lock from now on.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002279 while ( ! __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002280 {
2281 KMP_INC_STAT(lck,lemmingYields);
2282 __kmp_yield (TRUE);
2283 }
2284
2285 if ( __kmp_test_adaptive_lock_only( lck, gtid ) )
2286 return;
2287 }
2288 }
2289
2290 // Speculative acquisition failed, so acquire it non-speculatively.
2291 // Count the non-speculative acquire attempt
2292 lck->lk.adaptive.acquire_attempts++;
2293
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002294 __kmp_acquire_queuing_lock_timed_template<FALSE>( GET_QLK_PTR(lck), gtid );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002295 // We have acquired the base lock, so count that.
2296 KMP_INC_STAT(lck,nonSpeculativeAcquires );
2297}
2298
2299static void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002300__kmp_acquire_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002301{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002302 char const * const func = "omp_set_lock";
2303 if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) {
2304 KMP_FATAL( LockIsUninitialized, func );
2305 }
2306 if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) == gtid ) {
2307 KMP_FATAL( LockIsAlreadyOwned, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002308 }
2309
2310 __kmp_acquire_adaptive_lock( lck, gtid );
2311
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002312 lck->lk.qlk.owner_id = gtid + 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002313}
2314
2315static void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002316__kmp_release_adaptive_lock( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002317{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002318 if ( __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002319 { // If the lock doesn't look claimed we must be speculating.
2320 // (Or the user's code is buggy and they're releasing without locking;
2321 // if we had XTEST we'd be able to check that case...)
2322 _xend(); // Exit speculation
2323 __kmp_update_badness_after_success( lck );
2324 }
2325 else
2326 { // Since the lock *is* visibly locked we're not speculating,
2327 // so should use the underlying lock's release scheme.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002328 __kmp_release_queuing_lock( GET_QLK_PTR(lck), gtid );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002329 }
2330}
2331
2332static void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002333__kmp_release_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002334{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002335 char const * const func = "omp_unset_lock";
2336 KMP_MB(); /* in case another processor initialized lock */
2337 if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) {
2338 KMP_FATAL( LockIsUninitialized, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002339 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002340 if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) == -1 ) {
2341 KMP_FATAL( LockUnsettingFree, func );
2342 }
2343 if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) != gtid ) {
2344 KMP_FATAL( LockUnsettingSetByAnother, func );
2345 }
2346 lck->lk.qlk.owner_id = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002347 __kmp_release_adaptive_lock( lck, gtid );
2348}
2349
2350static void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002351__kmp_init_adaptive_lock( kmp_adaptive_lock_t *lck )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002352{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002353 __kmp_init_queuing_lock( GET_QLK_PTR(lck) );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002354 lck->lk.adaptive.badness = 0;
2355 lck->lk.adaptive.acquire_attempts = 0; //nonSpeculativeAcquireAttempts = 0;
2356 lck->lk.adaptive.max_soft_retries = __kmp_adaptive_backoff_params.max_soft_retries;
2357 lck->lk.adaptive.max_badness = __kmp_adaptive_backoff_params.max_badness;
2358#if KMP_DEBUG_ADAPTIVE_LOCKS
2359 __kmp_zero_speculative_stats( &lck->lk.adaptive );
2360#endif
2361 KA_TRACE(1000, ("__kmp_init_adaptive_lock: lock %p initialized\n", lck));
2362}
2363
2364static void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002365__kmp_init_adaptive_lock_with_checks( kmp_adaptive_lock_t * lck )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002366{
2367 __kmp_init_adaptive_lock( lck );
2368}
2369
2370static void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002371__kmp_destroy_adaptive_lock( kmp_adaptive_lock_t *lck )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002372{
2373#if KMP_DEBUG_ADAPTIVE_LOCKS
2374 __kmp_accumulate_speculative_stats( &lck->lk.adaptive );
2375#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002376 __kmp_destroy_queuing_lock (GET_QLK_PTR(lck));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002377 // Nothing needed for the speculative part.
2378}
2379
2380static void
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002381__kmp_destroy_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck )
Jim Cownie5e8470a2013-09-27 10:38:44 +00002382{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002383 char const * const func = "omp_destroy_lock";
2384 if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) {
2385 KMP_FATAL( LockIsUninitialized, func );
2386 }
2387 if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) != -1 ) {
2388 KMP_FATAL( LockStillOwned, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002389 }
2390 __kmp_destroy_adaptive_lock( lck );
2391}
2392
2393
2394#endif // KMP_USE_ADAPTIVE_LOCKS
2395
2396
2397/* ------------------------------------------------------------------------ */
2398/* DRDPA ticket locks */
2399/* "DRDPA" means Dynamically Reconfigurable Distributed Polling Area */
2400
2401static kmp_int32
2402__kmp_get_drdpa_lock_owner( kmp_drdpa_lock_t *lck )
2403{
2404 return TCR_4( lck->lk.owner_id ) - 1;
2405}
2406
2407static inline bool
2408__kmp_is_drdpa_lock_nestable( kmp_drdpa_lock_t *lck )
2409{
2410 return lck->lk.depth_locked != -1;
2411}
2412
2413__forceinline static void
2414__kmp_acquire_drdpa_lock_timed_template( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2415{
2416 kmp_uint64 ticket = KMP_TEST_THEN_INC64((kmp_int64 *)&lck->lk.next_ticket);
2417 kmp_uint64 mask = TCR_8(lck->lk.mask); // volatile load
2418 volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls
2419 = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2420 TCR_PTR(lck->lk.polls); // volatile load
2421
2422#ifdef USE_LOCK_PROFILE
2423 if (TCR_8(polls[ticket & mask].poll) != ticket)
2424 __kmp_printf("LOCK CONTENTION: %p\n", lck);
2425 /* else __kmp_printf( "." );*/
2426#endif /* USE_LOCK_PROFILE */
2427
2428 //
2429 // Now spin-wait, but reload the polls pointer and mask, in case the
2430 // polling area has been reconfigured. Unless it is reconfigured, the
2431 // reloads stay in L1 cache and are cheap.
2432 //
2433 // Keep this code in sync with KMP_WAIT_YIELD, in kmp_dispatch.c !!!
2434 //
2435 // The current implementation of KMP_WAIT_YIELD doesn't allow for mask
2436 // and poll to be re-read every spin iteration.
2437 //
2438 kmp_uint32 spins;
2439
2440 KMP_FSYNC_PREPARE(lck);
2441 KMP_INIT_YIELD(spins);
2442 while (TCR_8(polls[ticket & mask]).poll < ticket) { // volatile load
Jim Cownie5e8470a2013-09-27 10:38:44 +00002443 // If we are oversubscribed,
Alp Toker8f2d3f02014-02-24 10:40:15 +00002444 // or have waited a bit (and KMP_LIBRARY=turnaround), then yield.
Jim Cownie5e8470a2013-09-27 10:38:44 +00002445 // CPU Pause is in the macros for yield.
2446 //
2447 KMP_YIELD(TCR_4(__kmp_nth)
2448 > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc));
2449 KMP_YIELD_SPIN(spins);
2450
2451 // Re-read the mask and the poll pointer from the lock structure.
2452 //
2453 // Make certain that "mask" is read before "polls" !!!
2454 //
2455 // If another thread picks reconfigures the polling area and updates
2456 // their values, and we get the new value of mask and the old polls
2457 // pointer, we could access memory beyond the end of the old polling
2458 // area.
2459 //
2460 mask = TCR_8(lck->lk.mask); // volatile load
2461 polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2462 TCR_PTR(lck->lk.polls); // volatile load
2463 }
2464
2465 //
2466 // Critical section starts here
2467 //
2468 KMP_FSYNC_ACQUIRED(lck);
2469 KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld acquired lock %p\n",
2470 ticket, lck));
2471 lck->lk.now_serving = ticket; // non-volatile store
2472
2473 //
2474 // Deallocate a garbage polling area if we know that we are the last
2475 // thread that could possibly access it.
2476 //
2477 // The >= check is in case __kmp_test_drdpa_lock() allocated the cleanup
2478 // ticket.
2479 //
2480 if ((lck->lk.old_polls != NULL) && (ticket >= lck->lk.cleanup_ticket)) {
2481 __kmp_free((void *)lck->lk.old_polls);
2482 lck->lk.old_polls = NULL;
2483 lck->lk.cleanup_ticket = 0;
2484 }
2485
2486 //
2487 // Check to see if we should reconfigure the polling area.
2488 // If there is still a garbage polling area to be deallocated from a
2489 // previous reconfiguration, let a later thread reconfigure it.
2490 //
2491 if (lck->lk.old_polls == NULL) {
2492 bool reconfigure = false;
2493 volatile struct kmp_base_drdpa_lock::kmp_lock_poll *old_polls = polls;
2494 kmp_uint32 num_polls = TCR_4(lck->lk.num_polls);
2495
2496 if (TCR_4(__kmp_nth)
2497 > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) {
2498 //
2499 // We are in oversubscription mode. Contract the polling area
2500 // down to a single location, if that hasn't been done already.
2501 //
2502 if (num_polls > 1) {
2503 reconfigure = true;
2504 num_polls = TCR_4(lck->lk.num_polls);
2505 mask = 0;
2506 num_polls = 1;
2507 polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2508 __kmp_allocate(num_polls * sizeof(*polls));
2509 polls[0].poll = ticket;
2510 }
2511 }
2512 else {
2513 //
2514 // We are in under/fully subscribed mode. Check the number of
2515 // threads waiting on the lock. The size of the polling area
2516 // should be at least the number of threads waiting.
2517 //
2518 kmp_uint64 num_waiting = TCR_8(lck->lk.next_ticket) - ticket - 1;
2519 if (num_waiting > num_polls) {
2520 kmp_uint32 old_num_polls = num_polls;
2521 reconfigure = true;
2522 do {
2523 mask = (mask << 1) | 1;
2524 num_polls *= 2;
2525 } while (num_polls <= num_waiting);
2526
2527 //
2528 // Allocate the new polling area, and copy the relevant portion
2529 // of the old polling area to the new area. __kmp_allocate()
2530 // zeroes the memory it allocates, and most of the old area is
2531 // just zero padding, so we only copy the release counters.
2532 //
2533 polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2534 __kmp_allocate(num_polls * sizeof(*polls));
2535 kmp_uint32 i;
2536 for (i = 0; i < old_num_polls; i++) {
2537 polls[i].poll = old_polls[i].poll;
2538 }
2539 }
2540 }
2541
2542 if (reconfigure) {
2543 //
2544 // Now write the updated fields back to the lock structure.
2545 //
2546 // Make certain that "polls" is written before "mask" !!!
2547 //
2548 // If another thread picks up the new value of mask and the old
2549 // polls pointer , it could access memory beyond the end of the
2550 // old polling area.
2551 //
2552 // On x86, we need memory fences.
2553 //
2554 KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld reconfiguring lock %p to %d polls\n",
2555 ticket, lck, num_polls));
2556
2557 lck->lk.old_polls = old_polls; // non-volatile store
2558 lck->lk.polls = polls; // volatile store
2559
2560 KMP_MB();
2561
2562 lck->lk.num_polls = num_polls; // non-volatile store
2563 lck->lk.mask = mask; // volatile store
2564
2565 KMP_MB();
2566
2567 //
2568 // Only after the new polling area and mask have been flushed
2569 // to main memory can we update the cleanup ticket field.
2570 //
2571 // volatile load / non-volatile store
2572 //
2573 lck->lk.cleanup_ticket = TCR_8(lck->lk.next_ticket);
2574 }
2575 }
2576}
2577
2578void
2579__kmp_acquire_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2580{
2581 __kmp_acquire_drdpa_lock_timed_template( lck, gtid );
2582}
2583
2584static void
2585__kmp_acquire_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2586{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002587 char const * const func = "omp_set_lock";
2588 if ( lck->lk.initialized != lck ) {
2589 KMP_FATAL( LockIsUninitialized, func );
2590 }
2591 if ( __kmp_is_drdpa_lock_nestable( lck ) ) {
2592 KMP_FATAL( LockNestableUsedAsSimple, func );
2593 }
2594 if ( ( gtid >= 0 ) && ( __kmp_get_drdpa_lock_owner( lck ) == gtid ) ) {
2595 KMP_FATAL( LockIsAlreadyOwned, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002596 }
2597
2598 __kmp_acquire_drdpa_lock( lck, gtid );
2599
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002600 lck->lk.owner_id = gtid + 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002601}
2602
2603int
2604__kmp_test_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2605{
2606 //
2607 // First get a ticket, then read the polls pointer and the mask.
2608 // The polls pointer must be read before the mask!!! (See above)
2609 //
2610 kmp_uint64 ticket = TCR_8(lck->lk.next_ticket); // volatile load
2611 volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls
2612 = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2613 TCR_PTR(lck->lk.polls); // volatile load
2614 kmp_uint64 mask = TCR_8(lck->lk.mask); // volatile load
2615 if (TCR_8(polls[ticket & mask].poll) == ticket) {
2616 kmp_uint64 next_ticket = ticket + 1;
2617 if (KMP_COMPARE_AND_STORE_ACQ64((kmp_int64 *)&lck->lk.next_ticket,
2618 ticket, next_ticket)) {
2619 KMP_FSYNC_ACQUIRED(lck);
2620 KA_TRACE(1000, ("__kmp_test_drdpa_lock: ticket #%lld acquired lock %p\n",
2621 ticket, lck));
2622 lck->lk.now_serving = ticket; // non-volatile store
2623
2624 //
Alp Toker8f2d3f02014-02-24 10:40:15 +00002625 // Since no threads are waiting, there is no possibility that
Jim Cownie5e8470a2013-09-27 10:38:44 +00002626 // we would want to reconfigure the polling area. We might
2627 // have the cleanup ticket value (which says that it is now
2628 // safe to deallocate old_polls), but we'll let a later thread
2629 // which calls __kmp_acquire_lock do that - this routine
2630 // isn't supposed to block, and we would risk blocks if we
2631 // called __kmp_free() to do the deallocation.
2632 //
2633 return TRUE;
2634 }
2635 }
2636 return FALSE;
2637}
2638
2639static int
2640__kmp_test_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2641{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002642 char const * const func = "omp_test_lock";
2643 if ( lck->lk.initialized != lck ) {
2644 KMP_FATAL( LockIsUninitialized, func );
2645 }
2646 if ( __kmp_is_drdpa_lock_nestable( lck ) ) {
2647 KMP_FATAL( LockNestableUsedAsSimple, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002648 }
2649
2650 int retval = __kmp_test_drdpa_lock( lck, gtid );
2651
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002652 if ( retval ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00002653 lck->lk.owner_id = gtid + 1;
2654 }
2655 return retval;
2656}
2657
2658void
2659__kmp_release_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2660{
2661 //
2662 // Read the ticket value from the lock data struct, then the polls
2663 // pointer and the mask. The polls pointer must be read before the
2664 // mask!!! (See above)
2665 //
2666 kmp_uint64 ticket = lck->lk.now_serving + 1; // non-volatile load
2667 volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls
2668 = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2669 TCR_PTR(lck->lk.polls); // volatile load
2670 kmp_uint64 mask = TCR_8(lck->lk.mask); // volatile load
2671 KA_TRACE(1000, ("__kmp_release_drdpa_lock: ticket #%lld released lock %p\n",
2672 ticket - 1, lck));
2673 KMP_FSYNC_RELEASING(lck);
2674 KMP_ST_REL64(&(polls[ticket & mask].poll), ticket); // volatile store
2675}
2676
2677static void
2678__kmp_release_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2679{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002680 char const * const func = "omp_unset_lock";
2681 KMP_MB(); /* in case another processor initialized lock */
2682 if ( lck->lk.initialized != lck ) {
2683 KMP_FATAL( LockIsUninitialized, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002684 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002685 if ( __kmp_is_drdpa_lock_nestable( lck ) ) {
2686 KMP_FATAL( LockNestableUsedAsSimple, func );
2687 }
2688 if ( __kmp_get_drdpa_lock_owner( lck ) == -1 ) {
2689 KMP_FATAL( LockUnsettingFree, func );
2690 }
2691 if ( ( gtid >= 0 ) && ( __kmp_get_drdpa_lock_owner( lck ) >= 0 )
2692 && ( __kmp_get_drdpa_lock_owner( lck ) != gtid ) ) {
2693 KMP_FATAL( LockUnsettingSetByAnother, func );
2694 }
2695 lck->lk.owner_id = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002696 __kmp_release_drdpa_lock( lck, gtid );
2697}
2698
2699void
2700__kmp_init_drdpa_lock( kmp_drdpa_lock_t *lck )
2701{
2702 lck->lk.location = NULL;
2703 lck->lk.mask = 0;
2704 lck->lk.num_polls = 1;
2705 lck->lk.polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2706 __kmp_allocate(lck->lk.num_polls * sizeof(*(lck->lk.polls)));
2707 lck->lk.cleanup_ticket = 0;
2708 lck->lk.old_polls = NULL;
2709 lck->lk.next_ticket = 0;
2710 lck->lk.now_serving = 0;
2711 lck->lk.owner_id = 0; // no thread owns the lock.
2712 lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks.
2713 lck->lk.initialized = lck;
2714
2715 KA_TRACE(1000, ("__kmp_init_drdpa_lock: lock %p initialized\n", lck));
2716}
2717
2718static void
2719__kmp_init_drdpa_lock_with_checks( kmp_drdpa_lock_t * lck )
2720{
2721 __kmp_init_drdpa_lock( lck );
2722}
2723
2724void
2725__kmp_destroy_drdpa_lock( kmp_drdpa_lock_t *lck )
2726{
2727 lck->lk.initialized = NULL;
2728 lck->lk.location = NULL;
2729 if (lck->lk.polls != NULL) {
2730 __kmp_free((void *)lck->lk.polls);
2731 lck->lk.polls = NULL;
2732 }
2733 if (lck->lk.old_polls != NULL) {
2734 __kmp_free((void *)lck->lk.old_polls);
2735 lck->lk.old_polls = NULL;
2736 }
2737 lck->lk.mask = 0;
2738 lck->lk.num_polls = 0;
2739 lck->lk.cleanup_ticket = 0;
2740 lck->lk.next_ticket = 0;
2741 lck->lk.now_serving = 0;
2742 lck->lk.owner_id = 0;
2743 lck->lk.depth_locked = -1;
2744}
2745
2746static void
2747__kmp_destroy_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck )
2748{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002749 char const * const func = "omp_destroy_lock";
2750 if ( lck->lk.initialized != lck ) {
2751 KMP_FATAL( LockIsUninitialized, func );
2752 }
2753 if ( __kmp_is_drdpa_lock_nestable( lck ) ) {
2754 KMP_FATAL( LockNestableUsedAsSimple, func );
2755 }
2756 if ( __kmp_get_drdpa_lock_owner( lck ) != -1 ) {
2757 KMP_FATAL( LockStillOwned, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002758 }
2759 __kmp_destroy_drdpa_lock( lck );
2760}
2761
2762
2763//
2764// nested drdpa ticket locks
2765//
2766
2767void
2768__kmp_acquire_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2769{
2770 KMP_DEBUG_ASSERT( gtid >= 0 );
2771
2772 if ( __kmp_get_drdpa_lock_owner( lck ) == gtid ) {
2773 lck->lk.depth_locked += 1;
2774 }
2775 else {
2776 __kmp_acquire_drdpa_lock_timed_template( lck, gtid );
2777 KMP_MB();
2778 lck->lk.depth_locked = 1;
2779 KMP_MB();
2780 lck->lk.owner_id = gtid + 1;
2781 }
2782}
2783
2784static void
2785__kmp_acquire_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2786{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002787 char const * const func = "omp_set_nest_lock";
2788 if ( lck->lk.initialized != lck ) {
2789 KMP_FATAL( LockIsUninitialized, func );
2790 }
2791 if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) {
2792 KMP_FATAL( LockSimpleUsedAsNestable, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002793 }
2794 __kmp_acquire_nested_drdpa_lock( lck, gtid );
2795}
2796
2797int
2798__kmp_test_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2799{
2800 int retval;
2801
2802 KMP_DEBUG_ASSERT( gtid >= 0 );
2803
2804 if ( __kmp_get_drdpa_lock_owner( lck ) == gtid ) {
2805 retval = ++lck->lk.depth_locked;
2806 }
2807 else if ( !__kmp_test_drdpa_lock( lck, gtid ) ) {
2808 retval = 0;
2809 }
2810 else {
2811 KMP_MB();
2812 retval = lck->lk.depth_locked = 1;
2813 KMP_MB();
2814 lck->lk.owner_id = gtid + 1;
2815 }
2816 return retval;
2817}
2818
2819static int
2820__kmp_test_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2821{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002822 char const * const func = "omp_test_nest_lock";
2823 if ( lck->lk.initialized != lck ) {
2824 KMP_FATAL( LockIsUninitialized, func );
2825 }
2826 if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) {
2827 KMP_FATAL( LockSimpleUsedAsNestable, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002828 }
2829 return __kmp_test_nested_drdpa_lock( lck, gtid );
2830}
2831
2832void
2833__kmp_release_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2834{
2835 KMP_DEBUG_ASSERT( gtid >= 0 );
2836
2837 KMP_MB();
2838 if ( --(lck->lk.depth_locked) == 0 ) {
2839 KMP_MB();
2840 lck->lk.owner_id = 0;
2841 __kmp_release_drdpa_lock( lck, gtid );
2842 }
2843}
2844
2845static void
2846__kmp_release_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2847{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002848 char const * const func = "omp_unset_nest_lock";
2849 KMP_MB(); /* in case another processor initialized lock */
2850 if ( lck->lk.initialized != lck ) {
2851 KMP_FATAL( LockIsUninitialized, func );
2852 }
2853 if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) {
2854 KMP_FATAL( LockSimpleUsedAsNestable, func );
2855 }
2856 if ( __kmp_get_drdpa_lock_owner( lck ) == -1 ) {
2857 KMP_FATAL( LockUnsettingFree, func );
2858 }
2859 if ( __kmp_get_drdpa_lock_owner( lck ) != gtid ) {
2860 KMP_FATAL( LockUnsettingSetByAnother, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002861 }
2862 __kmp_release_nested_drdpa_lock( lck, gtid );
2863}
2864
2865void
2866__kmp_init_nested_drdpa_lock( kmp_drdpa_lock_t * lck )
2867{
2868 __kmp_init_drdpa_lock( lck );
2869 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
2870}
2871
2872static void
2873__kmp_init_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t * lck )
2874{
2875 __kmp_init_nested_drdpa_lock( lck );
2876}
2877
2878void
2879__kmp_destroy_nested_drdpa_lock( kmp_drdpa_lock_t *lck )
2880{
2881 __kmp_destroy_drdpa_lock( lck );
2882 lck->lk.depth_locked = 0;
2883}
2884
2885static void
2886__kmp_destroy_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck )
2887{
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002888 char const * const func = "omp_destroy_nest_lock";
2889 if ( lck->lk.initialized != lck ) {
2890 KMP_FATAL( LockIsUninitialized, func );
2891 }
2892 if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) {
2893 KMP_FATAL( LockSimpleUsedAsNestable, func );
2894 }
2895 if ( __kmp_get_drdpa_lock_owner( lck ) != -1 ) {
2896 KMP_FATAL( LockStillOwned, func );
Jim Cownie5e8470a2013-09-27 10:38:44 +00002897 }
2898 __kmp_destroy_nested_drdpa_lock( lck );
2899}
2900
2901
2902//
2903// access functions to fields which don't exist for all lock kinds.
2904//
2905
2906static int
2907__kmp_is_drdpa_lock_initialized( kmp_drdpa_lock_t *lck )
2908{
2909 return lck == lck->lk.initialized;
2910}
2911
2912static const ident_t *
2913__kmp_get_drdpa_lock_location( kmp_drdpa_lock_t *lck )
2914{
2915 return lck->lk.location;
2916}
2917
2918static void
2919__kmp_set_drdpa_lock_location( kmp_drdpa_lock_t *lck, const ident_t *loc )
2920{
2921 lck->lk.location = loc;
2922}
2923
2924static kmp_lock_flags_t
2925__kmp_get_drdpa_lock_flags( kmp_drdpa_lock_t *lck )
2926{
2927 return lck->lk.flags;
2928}
2929
2930static void
2931__kmp_set_drdpa_lock_flags( kmp_drdpa_lock_t *lck, kmp_lock_flags_t flags )
2932{
2933 lck->lk.flags = flags;
2934}
2935
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00002936#if KMP_USE_DYNAMIC_LOCK
2937
2938// Definitions of lock hints.
2939# ifndef __OMP_H
2940typedef enum kmp_lock_hint_t {
2941 kmp_lock_hint_none = 0,
2942 kmp_lock_hint_contended,
2943 kmp_lock_hint_uncontended,
2944 kmp_lock_hint_nonspeculative,
2945 kmp_lock_hint_speculative,
2946 kmp_lock_hint_adaptive,
2947} kmp_lock_hint_t;
2948# endif
2949
2950// Direct lock initializers. It simply writes a tag to the low 8 bits of the lock word.
2951#define expand_init_lock(l, a) \
2952static void init_##l##_lock(kmp_dyna_lock_t *lck, kmp_dyna_lockseq_t seq) { \
2953 *lck = DYNA_LOCK_FREE(l); \
2954 KA_TRACE(20, ("Initialized direct lock, tag = %x\n", *lck)); \
2955}
2956FOREACH_D_LOCK(expand_init_lock, 0)
2957#undef expand_init_lock
2958
2959#if DYNA_HAS_HLE
2960
2961// HLE lock functions - imported from the testbed runtime.
2962#if KMP_MIC
2963# define machine_pause() _mm_delay_32(10) // TODO: find the right argument
2964#else
2965# define machine_pause() _mm_pause()
2966#endif
2967#define HLE_ACQUIRE ".byte 0xf2;"
2968#define HLE_RELEASE ".byte 0xf3;"
2969
2970static inline kmp_uint32
2971swap4(kmp_uint32 volatile *p, kmp_uint32 v)
2972{
2973 __asm__ volatile(HLE_ACQUIRE "xchg %1,%0"
2974 : "+r"(v), "+m"(*p)
2975 :
2976 : "memory");
2977 return v;
2978}
2979
2980static void
2981__kmp_destroy_hle_lock(kmp_dyna_lock_t *lck)
2982{
2983 *lck = 0;
2984}
2985
2986static void
2987__kmp_acquire_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid)
2988{
2989 // Use gtid for DYNA_LOCK_BUSY if necessary
2990 if (swap4(lck, DYNA_LOCK_BUSY(1, hle)) != DYNA_LOCK_FREE(hle)) {
2991 int delay = 1;
2992 do {
2993 while (*(kmp_uint32 volatile *)lck != DYNA_LOCK_FREE(hle)) {
2994 for (int i = delay; i != 0; --i)
2995 machine_pause();
2996 delay = ((delay << 1) | 1) & 7;
2997 }
2998 } while (swap4(lck, DYNA_LOCK_BUSY(1, hle)) != DYNA_LOCK_FREE(hle));
2999 }
3000}
3001
3002static void
3003__kmp_acquire_hle_lock_with_checks(kmp_dyna_lock_t *lck, kmp_int32 gtid)
3004{
3005 __kmp_acquire_hle_lock(lck, gtid); // TODO: add checks
3006}
3007
3008static void
3009__kmp_release_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid)
3010{
3011 __asm__ volatile(HLE_RELEASE "movl %1,%0"
3012 : "=m"(*lck)
3013 : "r"(DYNA_LOCK_FREE(hle))
3014 : "memory");
3015}
3016
3017static void
3018__kmp_release_hle_lock_with_checks(kmp_dyna_lock_t *lck, kmp_int32 gtid)
3019{
3020 __kmp_release_hle_lock(lck, gtid); // TODO: add checks
3021}
3022
3023static int
3024__kmp_test_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid)
3025{
3026 return swap4(lck, DYNA_LOCK_BUSY(1, hle)) == DYNA_LOCK_FREE(hle);
3027}
3028
3029static int
3030__kmp_test_hle_lock_with_checks(kmp_dyna_lock_t *lck, kmp_int32 gtid)
3031{
3032 return __kmp_test_hle_lock(lck, gtid); // TODO: add checks
3033}
3034
3035#endif // DYNA_HAS_HLE
3036
3037// Entry functions for indirect locks (first element of direct_*_ops[]).
3038static void __kmp_init_indirect_lock(kmp_dyna_lock_t * l, kmp_dyna_lockseq_t tag);
3039static void __kmp_destroy_indirect_lock(kmp_dyna_lock_t * lock);
3040static void __kmp_set_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32);
3041static void __kmp_unset_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32);
3042static int __kmp_test_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32);
3043static void __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32);
3044static void __kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32);
3045static int __kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32);
3046
3047//
3048// Jump tables for the indirect lock functions.
3049// Only fill in the odd entries, that avoids the need to shift out the low bit.
3050//
3051#define expand_func0(l, op) 0,op##_##l##_##lock,
3052void (*__kmp_direct_init_ops[])(kmp_dyna_lock_t *, kmp_dyna_lockseq_t)
3053 = { __kmp_init_indirect_lock, 0, FOREACH_D_LOCK(expand_func0, init) };
3054
3055#define expand_func1(l, op) 0,(void (*)(kmp_dyna_lock_t *))__kmp_##op##_##l##_##lock,
3056void (*__kmp_direct_destroy_ops[])(kmp_dyna_lock_t *)
3057 = { __kmp_destroy_indirect_lock, 0, FOREACH_D_LOCK(expand_func1, destroy) };
3058
3059// Differentiates *lock and *lock_with_checks.
3060#define expand_func2(l, op) 0,(void (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_##lock,
3061#define expand_func2c(l, op) 0,(void (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_##lock_with_checks,
3062static void (*direct_set_tab[][DYNA_NUM_D_LOCKS*2+2])(kmp_dyna_lock_t *, kmp_int32)
3063 = { { __kmp_set_indirect_lock, 0, FOREACH_D_LOCK(expand_func2, acquire) },
3064 { __kmp_set_indirect_lock_with_checks, 0, FOREACH_D_LOCK(expand_func2c, acquire) } };
3065static void (*direct_unset_tab[][DYNA_NUM_D_LOCKS*2+2])(kmp_dyna_lock_t *, kmp_int32)
3066 = { { __kmp_unset_indirect_lock, 0, FOREACH_D_LOCK(expand_func2, release) },
3067 { __kmp_unset_indirect_lock_with_checks, 0, FOREACH_D_LOCK(expand_func2c, release) } };
3068
3069#define expand_func3(l, op) 0,(int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_##lock,
3070#define expand_func3c(l, op) 0,(int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_##lock_with_checks,
3071static int (*direct_test_tab[][DYNA_NUM_D_LOCKS*2+2])(kmp_dyna_lock_t *, kmp_int32)
3072 = { { __kmp_test_indirect_lock, 0, FOREACH_D_LOCK(expand_func3, test) },
3073 { __kmp_test_indirect_lock_with_checks, 0, FOREACH_D_LOCK(expand_func3c, test) } };
3074
3075// Exposes only one set of jump tables (*lock or *lock_with_checks).
3076void (*(*__kmp_direct_set_ops))(kmp_dyna_lock_t *, kmp_int32) = 0;
3077void (*(*__kmp_direct_unset_ops))(kmp_dyna_lock_t *, kmp_int32) = 0;
3078int (*(*__kmp_direct_test_ops))(kmp_dyna_lock_t *, kmp_int32) = 0;
3079
3080//
3081// Jump tables for the indirect lock functions.
3082//
3083#define expand_func4(l, op) (void (*)(kmp_user_lock_p))__kmp_##op##_##l##_##lock,
3084void (*__kmp_indirect_init_ops[])(kmp_user_lock_p)
3085 = { FOREACH_I_LOCK(expand_func4, init) };
3086void (*__kmp_indirect_destroy_ops[])(kmp_user_lock_p)
3087 = { FOREACH_I_LOCK(expand_func4, destroy) };
3088
3089// Differentiates *lock and *lock_with_checks.
3090#define expand_func5(l, op) (void (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock,
3091#define expand_func5c(l, op) (void (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock_with_checks,
3092static void (*indirect_set_tab[][DYNA_NUM_I_LOCKS])(kmp_user_lock_p, kmp_int32)
3093 = { { FOREACH_I_LOCK(expand_func5, acquire) },
3094 { FOREACH_I_LOCK(expand_func5c, acquire) } };
3095static void (*indirect_unset_tab[][DYNA_NUM_I_LOCKS])(kmp_user_lock_p, kmp_int32)
3096 = { { FOREACH_I_LOCK(expand_func5, release) },
3097 { FOREACH_I_LOCK(expand_func5c, release) } };
3098
3099#define expand_func6(l, op) (int (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock,
3100#define expand_func6c(l, op) (int (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock_with_checks,
3101static int (*indirect_test_tab[][DYNA_NUM_I_LOCKS])(kmp_user_lock_p, kmp_int32)
3102 = { { FOREACH_I_LOCK(expand_func6, test) },
3103 { FOREACH_I_LOCK(expand_func6c, test) } };
3104
3105// Exposes only one set of jump tables (*lock or *lock_with_checks).
3106void (*(*__kmp_indirect_set_ops))(kmp_user_lock_p, kmp_int32) = 0;
3107void (*(*__kmp_indirect_unset_ops))(kmp_user_lock_p, kmp_int32) = 0;
3108int (*(*__kmp_indirect_test_ops))(kmp_user_lock_p, kmp_int32) = 0;
3109
3110// Lock index table.
3111kmp_indirect_lock_t **__kmp_indirect_lock_table;
3112kmp_lock_index_t __kmp_indirect_lock_table_size;
3113kmp_lock_index_t __kmp_indirect_lock_table_next;
3114
3115// Size of indirect locks.
3116static kmp_uint32 __kmp_indirect_lock_size[DYNA_NUM_I_LOCKS] = {
3117 sizeof(kmp_ticket_lock_t), sizeof(kmp_queuing_lock_t),
3118#if KMP_USE_ADAPTIVE_LOCKS
3119 sizeof(kmp_adaptive_lock_t),
3120#endif
3121 sizeof(kmp_drdpa_lock_t),
3122 sizeof(kmp_tas_lock_t),
3123#if DYNA_HAS_FUTEX
3124 sizeof(kmp_futex_lock_t),
3125#endif
3126 sizeof(kmp_ticket_lock_t), sizeof(kmp_queuing_lock_t),
3127 sizeof(kmp_drdpa_lock_t)
3128};
3129
3130// Jump tables for lock accessor/modifier.
3131void (*__kmp_indirect_set_location[DYNA_NUM_I_LOCKS])(kmp_user_lock_p, const ident_t *) = { 0 };
3132void (*__kmp_indirect_set_flags[DYNA_NUM_I_LOCKS])(kmp_user_lock_p, kmp_lock_flags_t) = { 0 };
3133const ident_t * (*__kmp_indirect_get_location[DYNA_NUM_I_LOCKS])(kmp_user_lock_p) = { 0 };
3134kmp_lock_flags_t (*__kmp_indirect_get_flags[DYNA_NUM_I_LOCKS])(kmp_user_lock_p) = { 0 };
3135
3136// Use different lock pools for different lock types.
3137static kmp_indirect_lock_t * __kmp_indirect_lock_pool[DYNA_NUM_I_LOCKS] = { 0 };
3138
3139// Inserts the given lock ptr to the lock table.
3140kmp_lock_index_t
3141__kmp_insert_indirect_lock(kmp_indirect_lock_t *lck)
3142{
3143 kmp_lock_index_t next = __kmp_indirect_lock_table_next;
3144 // Check capacity and double the size if required
3145 if (next >= __kmp_indirect_lock_table_size) {
3146 kmp_lock_index_t i;
3147 kmp_lock_index_t size = __kmp_indirect_lock_table_size;
3148 kmp_indirect_lock_t **old_table = __kmp_indirect_lock_table;
3149 __kmp_indirect_lock_table = (kmp_indirect_lock_t **)__kmp_allocate(2*next*sizeof(kmp_indirect_lock_t *));
3150 memcpy(__kmp_indirect_lock_table, old_table, next*sizeof(kmp_indirect_lock_t *));
3151 __kmp_free(old_table);
3152 __kmp_indirect_lock_table_size = 2*next;
3153 }
3154 // Insert lck to the table and return the index.
3155 __kmp_indirect_lock_table[next] = lck;
3156 __kmp_indirect_lock_table_next++;
3157 return next;
3158}
3159
3160// User lock allocator for dynamically dispatched locks.
3161kmp_indirect_lock_t *
3162__kmp_allocate_indirect_lock(void **user_lock, kmp_int32 gtid, kmp_indirect_locktag_t tag)
3163{
3164 kmp_indirect_lock_t *lck;
3165 kmp_lock_index_t idx;
3166
3167 __kmp_acquire_lock(&__kmp_global_lock, gtid);
3168
3169 if (__kmp_indirect_lock_pool[tag] != NULL) {
3170 lck = __kmp_indirect_lock_pool[tag];
3171 if (OMP_LOCK_T_SIZE < sizeof(void *))
3172 idx = lck->lock->pool.index;
3173 __kmp_indirect_lock_pool[tag] = (kmp_indirect_lock_t *)lck->lock->pool.next;
3174 } else {
3175 lck = (kmp_indirect_lock_t *)__kmp_allocate(sizeof(kmp_indirect_lock_t));
3176 lck->lock = (kmp_user_lock_p)__kmp_allocate(__kmp_indirect_lock_size[tag]);
3177 if (OMP_LOCK_T_SIZE < sizeof(void *))
3178 idx = __kmp_insert_indirect_lock(lck);
3179 }
3180
3181 __kmp_release_lock(&__kmp_global_lock, gtid);
3182
3183 lck->type = tag;
3184
3185 if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3186 *((kmp_lock_index_t *)user_lock) = idx << 1; // indirect lock word must be even.
3187 } else {
3188 *((kmp_indirect_lock_t **)user_lock) = lck;
3189 }
3190
3191 return lck;
3192}
3193
3194// User lock lookup for dynamically dispatched locks.
3195static __forceinline
3196kmp_indirect_lock_t *
3197__kmp_lookup_indirect_lock(void **user_lock, const char *func)
3198{
3199 if (__kmp_env_consistency_check) {
3200 kmp_indirect_lock_t *lck = NULL;
3201 if (user_lock == NULL) {
3202 KMP_FATAL(LockIsUninitialized, func);
3203 }
3204 if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3205 kmp_lock_index_t idx = DYNA_EXTRACT_I_INDEX(user_lock);
3206 if (idx < 0 || idx >= __kmp_indirect_lock_table_size) {
3207 KMP_FATAL(LockIsUninitialized, func);
3208 }
3209 lck = __kmp_indirect_lock_table[idx];
3210 } else {
3211 lck = *((kmp_indirect_lock_t **)user_lock);
3212 }
3213 if (lck == NULL) {
3214 KMP_FATAL(LockIsUninitialized, func);
3215 }
3216 return lck;
3217 } else {
3218 if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3219 return __kmp_indirect_lock_table[DYNA_EXTRACT_I_INDEX(user_lock)];
3220 } else {
3221 return *((kmp_indirect_lock_t **)user_lock);
3222 }
3223 }
3224}
3225
3226static void
3227__kmp_init_indirect_lock(kmp_dyna_lock_t * lock, kmp_dyna_lockseq_t seq)
3228{
3229#if KMP_USE_ADAPTIVE_LOCKS
3230 if (seq == lockseq_adaptive && !__kmp_cpuinfo.rtm) {
3231 KMP_WARNING(AdaptiveNotSupported, "kmp_lockseq_t", "adaptive");
3232 seq = lockseq_queuing;
3233 }
3234#endif
3235 kmp_indirect_locktag_t tag = DYNA_GET_I_TAG(seq);
3236 kmp_indirect_lock_t *l = __kmp_allocate_indirect_lock((void **)lock, __kmp_entry_gtid(), tag);
3237 DYNA_I_LOCK_FUNC(l, init)(l->lock);
3238 KA_TRACE(20, ("__kmp_init_indirect_lock: initialized indirect lock, tag = %x\n", l->type));
3239}
3240
3241static void
3242__kmp_destroy_indirect_lock(kmp_dyna_lock_t * lock)
3243{
3244 kmp_uint32 gtid = __kmp_entry_gtid();
3245 kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_destroy_lock");
3246 DYNA_I_LOCK_FUNC(l, destroy)(l->lock);
3247 kmp_indirect_locktag_t tag = l->type;
3248
3249 __kmp_acquire_lock(&__kmp_global_lock, gtid);
3250
3251 // Use the base lock's space to keep the pool chain.
3252 l->lock->pool.next = (kmp_user_lock_p)__kmp_indirect_lock_pool[tag];
3253 if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3254 l->lock->pool.index = DYNA_EXTRACT_I_INDEX(lock);
3255 }
3256 __kmp_indirect_lock_pool[tag] = l;
3257
3258 __kmp_release_lock(&__kmp_global_lock, gtid);
3259}
3260
3261static void
3262__kmp_set_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32 gtid)
3263{
3264 kmp_indirect_lock_t *l = DYNA_LOOKUP_I_LOCK(lock);
3265 DYNA_I_LOCK_FUNC(l, set)(l->lock, gtid);
3266}
3267
3268static void
3269__kmp_unset_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32 gtid)
3270{
3271 kmp_indirect_lock_t *l = DYNA_LOOKUP_I_LOCK(lock);
3272 DYNA_I_LOCK_FUNC(l, unset)(l->lock, gtid);
3273}
3274
3275static int
3276__kmp_test_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32 gtid)
3277{
3278 kmp_indirect_lock_t *l = DYNA_LOOKUP_I_LOCK(lock);
3279 return DYNA_I_LOCK_FUNC(l, test)(l->lock, gtid);
3280}
3281
3282static void
3283__kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32 gtid)
3284{
3285 kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_set_lock");
3286 DYNA_I_LOCK_FUNC(l, set)(l->lock, gtid);
3287}
3288
3289static void
3290__kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32 gtid)
3291{
3292 kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_unset_lock");
3293 DYNA_I_LOCK_FUNC(l, unset)(l->lock, gtid);
3294}
3295
3296static int
3297__kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32 gtid)
3298{
3299 kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_test_lock");
3300 return DYNA_I_LOCK_FUNC(l, test)(l->lock, gtid);
3301}
3302
3303kmp_dyna_lockseq_t __kmp_user_lock_seq = lockseq_queuing;
3304
3305// Initialize a hinted lock.
3306void
3307__kmp_init_lock_hinted(void **lock, int hint)
3308{
3309 kmp_dyna_lockseq_t seq;
3310 switch (hint) {
3311 case kmp_lock_hint_uncontended:
3312 seq = lockseq_tas;
3313 break;
3314 case kmp_lock_hint_speculative:
3315#if DYNA_HAS_HLE
3316 seq = lockseq_hle;
3317#else
3318 seq = lockseq_tas;
3319#endif
3320 break;
3321 case kmp_lock_hint_adaptive:
3322#if KMP_USE_ADAPTIVE_LOCKS
3323 seq = lockseq_adaptive;
3324#else
3325 seq = lockseq_queuing;
3326#endif
3327 break;
3328 // Defaults to queuing locks.
3329 case kmp_lock_hint_contended:
3330 case kmp_lock_hint_nonspeculative:
3331 default:
3332 seq = lockseq_queuing;
3333 break;
3334 }
3335 if (DYNA_IS_D_LOCK(seq)) {
3336 DYNA_INIT_D_LOCK(lock, seq);
3337#if USE_ITT_BUILD
3338 __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL);
3339#endif
3340 } else {
3341 DYNA_INIT_I_LOCK(lock, seq);
3342#if USE_ITT_BUILD
3343 kmp_indirect_lock_t *ilk = DYNA_LOOKUP_I_LOCK(lock);
3344 __kmp_itt_lock_creating(ilk->lock, NULL);
3345#endif
3346 }
3347}
3348
3349// This is used only in kmp_error.c when consistency checking is on.
3350kmp_int32
3351__kmp_get_user_lock_owner(kmp_user_lock_p lck, kmp_uint32 seq)
3352{
3353 switch (seq) {
3354 case lockseq_tas:
3355 case lockseq_nested_tas:
3356 return __kmp_get_tas_lock_owner((kmp_tas_lock_t *)lck);
3357#if DYNA_HAS_FUTEX
3358 case lockseq_futex:
3359 case lockseq_nested_futex:
3360 return __kmp_get_futex_lock_owner((kmp_futex_lock_t *)lck);
3361#endif
3362 case lockseq_ticket:
3363 case lockseq_nested_ticket:
3364 return __kmp_get_ticket_lock_owner((kmp_ticket_lock_t *)lck);
3365 case lockseq_queuing:
3366 case lockseq_nested_queuing:
3367#if KMP_USE_ADAPTIVE_LOCKS
3368 case lockseq_adaptive:
3369 return __kmp_get_queuing_lock_owner((kmp_queuing_lock_t *)lck);
3370#endif
3371 case lockseq_drdpa:
3372 case lockseq_nested_drdpa:
3373 return __kmp_get_drdpa_lock_owner((kmp_drdpa_lock_t *)lck);
3374 default:
3375 return 0;
3376 }
3377}
3378
3379// The value initialized from KMP_LOCK_KIND needs to be translated to its
3380// nested version.
3381void
3382__kmp_init_nest_lock_hinted(void **lock, int hint)
3383{
3384 kmp_dyna_lockseq_t seq;
3385 switch (hint) {
3386 case kmp_lock_hint_uncontended:
3387 seq = lockseq_nested_tas;
3388 break;
3389 // Defaults to queuing locks.
3390 case kmp_lock_hint_contended:
3391 case kmp_lock_hint_nonspeculative:
3392 default:
3393 seq = lockseq_nested_queuing;
3394 break;
3395 }
3396 DYNA_INIT_I_LOCK(lock, seq);
3397#if USE_ITT_BUILD
3398 kmp_indirect_lock_t *ilk = DYNA_LOOKUP_I_LOCK(lock);
3399 __kmp_itt_lock_creating(ilk->lock, NULL);
3400#endif
3401}
3402
3403// Initializes the lock table for indirect locks.
3404static void
3405__kmp_init_indirect_lock_table()
3406{
3407 __kmp_indirect_lock_table = (kmp_indirect_lock_t **)__kmp_allocate(sizeof(kmp_indirect_lock_t *)*1024);
3408 __kmp_indirect_lock_table_size = 1024;
3409 __kmp_indirect_lock_table_next = 0;
3410}
3411
3412#if KMP_USE_ADAPTIVE_LOCKS
3413# define init_lock_func(table, expand) { \
3414 table[locktag_ticket] = expand(ticket); \
3415 table[locktag_queuing] = expand(queuing); \
3416 table[locktag_adaptive] = expand(queuing); \
3417 table[locktag_drdpa] = expand(drdpa); \
3418 table[locktag_nested_ticket] = expand(ticket); \
3419 table[locktag_nested_queuing] = expand(queuing); \
3420 table[locktag_nested_drdpa] = expand(drdpa); \
3421}
3422#else
3423# define init_lock_func(table, expand) { \
3424 table[locktag_ticket] = expand(ticket); \
3425 table[locktag_queuing] = expand(queuing); \
3426 table[locktag_drdpa] = expand(drdpa); \
3427 table[locktag_nested_ticket] = expand(ticket); \
3428 table[locktag_nested_queuing] = expand(queuing); \
3429 table[locktag_nested_drdpa] = expand(drdpa); \
3430}
3431#endif // KMP_USE_ADAPTIVE_LOCKS
3432
3433// Initializes data for dynamic user locks.
3434void
3435__kmp_init_dynamic_user_locks()
3436{
3437 // Initialize jump table location
3438 int offset = (__kmp_env_consistency_check)? 1: 0;
3439 __kmp_direct_set_ops = direct_set_tab[offset];
3440 __kmp_direct_unset_ops = direct_unset_tab[offset];
3441 __kmp_direct_test_ops = direct_test_tab[offset];
3442 __kmp_indirect_set_ops = indirect_set_tab[offset];
3443 __kmp_indirect_unset_ops = indirect_unset_tab[offset];
3444 __kmp_indirect_test_ops = indirect_test_tab[offset];
3445 __kmp_init_indirect_lock_table();
3446
3447 // Initialize lock accessor/modifier
3448 // Could have used designated initializer, but -TP /Qstd=c99 did not work with icl.exe.
3449#define expand_func(l) (void (*)(kmp_user_lock_p, const ident_t *))__kmp_set_##l##_lock_location
3450 init_lock_func(__kmp_indirect_set_location, expand_func);
3451#undef expand_func
3452#define expand_func(l) (void (*)(kmp_user_lock_p, kmp_lock_flags_t))__kmp_set_##l##_lock_flags
3453 init_lock_func(__kmp_indirect_set_flags, expand_func);
3454#undef expand_func
3455#define expand_func(l) (const ident_t * (*)(kmp_user_lock_p))__kmp_get_##l##_lock_location
3456 init_lock_func(__kmp_indirect_get_location, expand_func);
3457#undef expand_func
3458#define expand_func(l) (kmp_lock_flags_t (*)(kmp_user_lock_p))__kmp_get_##l##_lock_flags
3459 init_lock_func(__kmp_indirect_get_flags, expand_func);
3460#undef expand_func
3461
3462 __kmp_init_user_locks = TRUE;
3463}
3464
3465// Clean up the lock table.
3466void
3467__kmp_cleanup_indirect_user_locks()
3468{
3469 kmp_lock_index_t i;
3470 int k;
3471
3472 // Clean up locks in the pools first (they were already destroyed before going into the pools).
3473 for (k = 0; k < DYNA_NUM_I_LOCKS; ++k) {
3474 kmp_indirect_lock_t *l = __kmp_indirect_lock_pool[k];
3475 while (l != NULL) {
3476 kmp_indirect_lock_t *ll = l;
3477 l = (kmp_indirect_lock_t *)l->lock->pool.next;
3478 if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3479 __kmp_indirect_lock_table[ll->lock->pool.index] = NULL;
3480 }
3481 __kmp_free(ll->lock);
3482 __kmp_free(ll);
3483 }
3484 }
3485 // Clean up the remaining undestroyed locks.
3486 for (i = 0; i < __kmp_indirect_lock_table_next; i++) {
3487 kmp_indirect_lock_t *l = __kmp_indirect_lock_table[i];
3488 if (l != NULL) {
3489 // Locks not destroyed explicitly need to be destroyed here.
3490 DYNA_I_LOCK_FUNC(l, destroy)(l->lock);
3491 __kmp_free(l->lock);
3492 __kmp_free(l);
3493 }
3494 }
3495 // Free the table
3496 __kmp_free(__kmp_indirect_lock_table);
3497
3498 __kmp_init_user_locks = FALSE;
3499}
3500
3501enum kmp_lock_kind __kmp_user_lock_kind = lk_default;
3502int __kmp_num_locks_in_block = 1; // FIXME - tune this value
3503
3504#else // KMP_USE_DYNAMIC_LOCK
3505
Jim Cownie5e8470a2013-09-27 10:38:44 +00003506/* ------------------------------------------------------------------------ */
3507/* user locks
3508 *
3509 * They are implemented as a table of function pointers which are set to the
3510 * lock functions of the appropriate kind, once that has been determined.
3511 */
3512
3513enum kmp_lock_kind __kmp_user_lock_kind = lk_default;
3514
3515size_t __kmp_base_user_lock_size = 0;
3516size_t __kmp_user_lock_size = 0;
3517
3518kmp_int32 ( *__kmp_get_user_lock_owner_ )( kmp_user_lock_p lck ) = NULL;
3519void ( *__kmp_acquire_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
3520
3521int ( *__kmp_test_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
3522void ( *__kmp_release_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
3523void ( *__kmp_init_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL;
3524void ( *__kmp_destroy_user_lock_ )( kmp_user_lock_p lck ) = NULL;
3525void ( *__kmp_destroy_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL;
3526void ( *__kmp_acquire_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
3527
3528int ( *__kmp_test_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
3529void ( *__kmp_release_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
3530void ( *__kmp_init_nested_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL;
3531void ( *__kmp_destroy_nested_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL;
3532
3533int ( *__kmp_is_user_lock_initialized_ )( kmp_user_lock_p lck ) = NULL;
3534const ident_t * ( *__kmp_get_user_lock_location_ )( kmp_user_lock_p lck ) = NULL;
3535void ( *__kmp_set_user_lock_location_ )( kmp_user_lock_p lck, const ident_t *loc ) = NULL;
3536kmp_lock_flags_t ( *__kmp_get_user_lock_flags_ )( kmp_user_lock_p lck ) = NULL;
3537void ( *__kmp_set_user_lock_flags_ )( kmp_user_lock_p lck, kmp_lock_flags_t flags ) = NULL;
3538
3539void __kmp_set_user_lock_vptrs( kmp_lock_kind_t user_lock_kind )
3540{
3541 switch ( user_lock_kind ) {
3542 case lk_default:
3543 default:
3544 KMP_ASSERT( 0 );
3545
3546 case lk_tas: {
3547 __kmp_base_user_lock_size = sizeof( kmp_base_tas_lock_t );
3548 __kmp_user_lock_size = sizeof( kmp_tas_lock_t );
3549
3550 __kmp_get_user_lock_owner_ =
3551 ( kmp_int32 ( * )( kmp_user_lock_p ) )
3552 ( &__kmp_get_tas_lock_owner );
3553
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003554 if ( __kmp_env_consistency_check ) {
3555 KMP_BIND_USER_LOCK_WITH_CHECKS(tas);
3556 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(tas);
3557 }
3558 else {
3559 KMP_BIND_USER_LOCK(tas);
3560 KMP_BIND_NESTED_USER_LOCK(tas);
3561 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003562
3563 __kmp_destroy_user_lock_ =
3564 ( void ( * )( kmp_user_lock_p ) )
3565 ( &__kmp_destroy_tas_lock );
3566
Jim Cownie5e8470a2013-09-27 10:38:44 +00003567 __kmp_is_user_lock_initialized_ =
3568 ( int ( * )( kmp_user_lock_p ) ) NULL;
3569
3570 __kmp_get_user_lock_location_ =
3571 ( const ident_t * ( * )( kmp_user_lock_p ) ) NULL;
3572
3573 __kmp_set_user_lock_location_ =
3574 ( void ( * )( kmp_user_lock_p, const ident_t * ) ) NULL;
3575
3576 __kmp_get_user_lock_flags_ =
3577 ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) NULL;
3578
3579 __kmp_set_user_lock_flags_ =
3580 ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) NULL;
3581 }
3582 break;
3583
Jim Cownie181b4bb2013-12-23 17:28:57 +00003584#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
Jim Cownie5e8470a2013-09-27 10:38:44 +00003585
3586 case lk_futex: {
3587 __kmp_base_user_lock_size = sizeof( kmp_base_futex_lock_t );
3588 __kmp_user_lock_size = sizeof( kmp_futex_lock_t );
3589
3590 __kmp_get_user_lock_owner_ =
3591 ( kmp_int32 ( * )( kmp_user_lock_p ) )
3592 ( &__kmp_get_futex_lock_owner );
3593
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003594 if ( __kmp_env_consistency_check ) {
3595 KMP_BIND_USER_LOCK_WITH_CHECKS(futex);
3596 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(futex);
3597 }
3598 else {
3599 KMP_BIND_USER_LOCK(futex);
3600 KMP_BIND_NESTED_USER_LOCK(futex);
3601 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003602
3603 __kmp_destroy_user_lock_ =
3604 ( void ( * )( kmp_user_lock_p ) )
3605 ( &__kmp_destroy_futex_lock );
3606
Jim Cownie5e8470a2013-09-27 10:38:44 +00003607 __kmp_is_user_lock_initialized_ =
3608 ( int ( * )( kmp_user_lock_p ) ) NULL;
3609
3610 __kmp_get_user_lock_location_ =
3611 ( const ident_t * ( * )( kmp_user_lock_p ) ) NULL;
3612
3613 __kmp_set_user_lock_location_ =
3614 ( void ( * )( kmp_user_lock_p, const ident_t * ) ) NULL;
3615
3616 __kmp_get_user_lock_flags_ =
3617 ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) NULL;
3618
3619 __kmp_set_user_lock_flags_ =
3620 ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) NULL;
3621 }
3622 break;
3623
Jim Cownie181b4bb2013-12-23 17:28:57 +00003624#endif // KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
Jim Cownie5e8470a2013-09-27 10:38:44 +00003625
3626 case lk_ticket: {
3627 __kmp_base_user_lock_size = sizeof( kmp_base_ticket_lock_t );
3628 __kmp_user_lock_size = sizeof( kmp_ticket_lock_t );
3629
3630 __kmp_get_user_lock_owner_ =
3631 ( kmp_int32 ( * )( kmp_user_lock_p ) )
3632 ( &__kmp_get_ticket_lock_owner );
3633
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003634 if ( __kmp_env_consistency_check ) {
3635 KMP_BIND_USER_LOCK_WITH_CHECKS(ticket);
3636 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(ticket);
3637 }
3638 else {
3639 KMP_BIND_USER_LOCK(ticket);
3640 KMP_BIND_NESTED_USER_LOCK(ticket);
3641 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003642
3643 __kmp_destroy_user_lock_ =
3644 ( void ( * )( kmp_user_lock_p ) )
3645 ( &__kmp_destroy_ticket_lock );
3646
Jim Cownie5e8470a2013-09-27 10:38:44 +00003647 __kmp_is_user_lock_initialized_ =
3648 ( int ( * )( kmp_user_lock_p ) )
3649 ( &__kmp_is_ticket_lock_initialized );
3650
3651 __kmp_get_user_lock_location_ =
3652 ( const ident_t * ( * )( kmp_user_lock_p ) )
3653 ( &__kmp_get_ticket_lock_location );
3654
3655 __kmp_set_user_lock_location_ =
3656 ( void ( * )( kmp_user_lock_p, const ident_t * ) )
3657 ( &__kmp_set_ticket_lock_location );
3658
3659 __kmp_get_user_lock_flags_ =
3660 ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) )
3661 ( &__kmp_get_ticket_lock_flags );
3662
3663 __kmp_set_user_lock_flags_ =
3664 ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) )
3665 ( &__kmp_set_ticket_lock_flags );
3666 }
3667 break;
3668
3669 case lk_queuing: {
3670 __kmp_base_user_lock_size = sizeof( kmp_base_queuing_lock_t );
3671 __kmp_user_lock_size = sizeof( kmp_queuing_lock_t );
3672
3673 __kmp_get_user_lock_owner_ =
3674 ( kmp_int32 ( * )( kmp_user_lock_p ) )
3675 ( &__kmp_get_queuing_lock_owner );
3676
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003677 if ( __kmp_env_consistency_check ) {
3678 KMP_BIND_USER_LOCK_WITH_CHECKS(queuing);
3679 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(queuing);
3680 }
3681 else {
3682 KMP_BIND_USER_LOCK(queuing);
3683 KMP_BIND_NESTED_USER_LOCK(queuing);
3684 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003685
3686 __kmp_destroy_user_lock_ =
3687 ( void ( * )( kmp_user_lock_p ) )
3688 ( &__kmp_destroy_queuing_lock );
3689
Jim Cownie5e8470a2013-09-27 10:38:44 +00003690 __kmp_is_user_lock_initialized_ =
3691 ( int ( * )( kmp_user_lock_p ) )
3692 ( &__kmp_is_queuing_lock_initialized );
3693
3694 __kmp_get_user_lock_location_ =
3695 ( const ident_t * ( * )( kmp_user_lock_p ) )
3696 ( &__kmp_get_queuing_lock_location );
3697
3698 __kmp_set_user_lock_location_ =
3699 ( void ( * )( kmp_user_lock_p, const ident_t * ) )
3700 ( &__kmp_set_queuing_lock_location );
3701
3702 __kmp_get_user_lock_flags_ =
3703 ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) )
3704 ( &__kmp_get_queuing_lock_flags );
3705
3706 __kmp_set_user_lock_flags_ =
3707 ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) )
3708 ( &__kmp_set_queuing_lock_flags );
3709 }
3710 break;
3711
3712#if KMP_USE_ADAPTIVE_LOCKS
3713 case lk_adaptive: {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003714 __kmp_base_user_lock_size = sizeof( kmp_base_adaptive_lock_t );
3715 __kmp_user_lock_size = sizeof( kmp_adaptive_lock_t );
Jim Cownie5e8470a2013-09-27 10:38:44 +00003716
3717 __kmp_get_user_lock_owner_ =
3718 ( kmp_int32 ( * )( kmp_user_lock_p ) )
3719 ( &__kmp_get_queuing_lock_owner );
3720
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003721 if ( __kmp_env_consistency_check ) {
3722 KMP_BIND_USER_LOCK_WITH_CHECKS(adaptive);
3723 }
3724 else {
3725 KMP_BIND_USER_LOCK(adaptive);
3726 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003727
3728 __kmp_destroy_user_lock_ =
3729 ( void ( * )( kmp_user_lock_p ) )
3730 ( &__kmp_destroy_adaptive_lock );
3731
3732 __kmp_is_user_lock_initialized_ =
3733 ( int ( * )( kmp_user_lock_p ) )
3734 ( &__kmp_is_queuing_lock_initialized );
3735
3736 __kmp_get_user_lock_location_ =
3737 ( const ident_t * ( * )( kmp_user_lock_p ) )
3738 ( &__kmp_get_queuing_lock_location );
3739
3740 __kmp_set_user_lock_location_ =
3741 ( void ( * )( kmp_user_lock_p, const ident_t * ) )
3742 ( &__kmp_set_queuing_lock_location );
3743
3744 __kmp_get_user_lock_flags_ =
3745 ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) )
3746 ( &__kmp_get_queuing_lock_flags );
3747
3748 __kmp_set_user_lock_flags_ =
3749 ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) )
3750 ( &__kmp_set_queuing_lock_flags );
3751
3752 }
3753 break;
3754#endif // KMP_USE_ADAPTIVE_LOCKS
3755
3756 case lk_drdpa: {
3757 __kmp_base_user_lock_size = sizeof( kmp_base_drdpa_lock_t );
3758 __kmp_user_lock_size = sizeof( kmp_drdpa_lock_t );
3759
3760 __kmp_get_user_lock_owner_ =
3761 ( kmp_int32 ( * )( kmp_user_lock_p ) )
3762 ( &__kmp_get_drdpa_lock_owner );
3763
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003764 if ( __kmp_env_consistency_check ) {
3765 KMP_BIND_USER_LOCK_WITH_CHECKS(drdpa);
3766 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(drdpa);
3767 }
3768 else {
3769 KMP_BIND_USER_LOCK(drdpa);
3770 KMP_BIND_NESTED_USER_LOCK(drdpa);
3771 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00003772
3773 __kmp_destroy_user_lock_ =
3774 ( void ( * )( kmp_user_lock_p ) )
3775 ( &__kmp_destroy_drdpa_lock );
3776
Jim Cownie5e8470a2013-09-27 10:38:44 +00003777 __kmp_is_user_lock_initialized_ =
3778 ( int ( * )( kmp_user_lock_p ) )
3779 ( &__kmp_is_drdpa_lock_initialized );
3780
3781 __kmp_get_user_lock_location_ =
3782 ( const ident_t * ( * )( kmp_user_lock_p ) )
3783 ( &__kmp_get_drdpa_lock_location );
3784
3785 __kmp_set_user_lock_location_ =
3786 ( void ( * )( kmp_user_lock_p, const ident_t * ) )
3787 ( &__kmp_set_drdpa_lock_location );
3788
3789 __kmp_get_user_lock_flags_ =
3790 ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) )
3791 ( &__kmp_get_drdpa_lock_flags );
3792
3793 __kmp_set_user_lock_flags_ =
3794 ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) )
3795 ( &__kmp_set_drdpa_lock_flags );
3796 }
3797 break;
3798 }
3799}
3800
3801
3802// ----------------------------------------------------------------------------
3803// User lock table & lock allocation
3804
3805kmp_lock_table_t __kmp_user_lock_table = { 1, 0, NULL };
3806kmp_user_lock_p __kmp_lock_pool = NULL;
3807
3808// Lock block-allocation support.
3809kmp_block_of_locks* __kmp_lock_blocks = NULL;
3810int __kmp_num_locks_in_block = 1; // FIXME - tune this value
3811
3812static kmp_lock_index_t
3813__kmp_lock_table_insert( kmp_user_lock_p lck )
3814{
3815 // Assume that kmp_global_lock is held upon entry/exit.
3816 kmp_lock_index_t index;
3817 if ( __kmp_user_lock_table.used >= __kmp_user_lock_table.allocated ) {
3818 kmp_lock_index_t size;
3819 kmp_user_lock_p *table;
3820 kmp_lock_index_t i;
3821 // Reallocate lock table.
3822 if ( __kmp_user_lock_table.allocated == 0 ) {
3823 size = 1024;
3824 }
3825 else {
3826 size = __kmp_user_lock_table.allocated * 2;
3827 }
3828 table = (kmp_user_lock_p *)__kmp_allocate( sizeof( kmp_user_lock_p ) * size );
3829 memcpy( table + 1, __kmp_user_lock_table.table + 1, sizeof( kmp_user_lock_p ) * ( __kmp_user_lock_table.used - 1 ) );
3830 table[ 0 ] = (kmp_user_lock_p)__kmp_user_lock_table.table;
3831 // We cannot free the previos table now, sinse it may be in use by other
3832 // threads. So save the pointer to the previous table in in the first element of the
3833 // new table. All the tables will be organized into a list, and could be freed when
3834 // library shutting down.
3835 __kmp_user_lock_table.table = table;
3836 __kmp_user_lock_table.allocated = size;
3837 }
3838 KMP_DEBUG_ASSERT( __kmp_user_lock_table.used < __kmp_user_lock_table.allocated );
3839 index = __kmp_user_lock_table.used;
3840 __kmp_user_lock_table.table[ index ] = lck;
3841 ++ __kmp_user_lock_table.used;
3842 return index;
3843}
3844
3845static kmp_user_lock_p
3846__kmp_lock_block_allocate()
3847{
3848 // Assume that kmp_global_lock is held upon entry/exit.
3849 static int last_index = 0;
3850 if ( ( last_index >= __kmp_num_locks_in_block )
3851 || ( __kmp_lock_blocks == NULL ) ) {
3852 // Restart the index.
3853 last_index = 0;
3854 // Need to allocate a new block.
3855 KMP_DEBUG_ASSERT( __kmp_user_lock_size > 0 );
3856 size_t space_for_locks = __kmp_user_lock_size * __kmp_num_locks_in_block;
3857 char* buffer = (char*)__kmp_allocate( space_for_locks + sizeof( kmp_block_of_locks ) );
3858 // Set up the new block.
3859 kmp_block_of_locks *new_block = (kmp_block_of_locks *)(& buffer[space_for_locks]);
3860 new_block->next_block = __kmp_lock_blocks;
3861 new_block->locks = (void *)buffer;
3862 // Publish the new block.
3863 KMP_MB();
3864 __kmp_lock_blocks = new_block;
3865 }
3866 kmp_user_lock_p ret = (kmp_user_lock_p)(& ( ( (char *)( __kmp_lock_blocks->locks ) )
3867 [ last_index * __kmp_user_lock_size ] ) );
3868 last_index++;
3869 return ret;
3870}
3871
3872//
3873// Get memory for a lock. It may be freshly allocated memory or reused memory
3874// from lock pool.
3875//
3876kmp_user_lock_p
3877__kmp_user_lock_allocate( void **user_lock, kmp_int32 gtid,
3878 kmp_lock_flags_t flags )
3879{
3880 kmp_user_lock_p lck;
3881 kmp_lock_index_t index;
3882 KMP_DEBUG_ASSERT( user_lock );
3883
3884 __kmp_acquire_lock( &__kmp_global_lock, gtid );
3885
3886 if ( __kmp_lock_pool == NULL ) {
3887 // Lock pool is empty. Allocate new memory.
3888 if ( __kmp_num_locks_in_block <= 1 ) { // Tune this cutoff point.
3889 lck = (kmp_user_lock_p) __kmp_allocate( __kmp_user_lock_size );
3890 }
3891 else {
3892 lck = __kmp_lock_block_allocate();
3893 }
3894
3895 // Insert lock in the table so that it can be freed in __kmp_cleanup,
3896 // and debugger has info on all allocated locks.
3897 index = __kmp_lock_table_insert( lck );
3898 }
3899 else {
3900 // Pick up lock from pool.
3901 lck = __kmp_lock_pool;
3902 index = __kmp_lock_pool->pool.index;
3903 __kmp_lock_pool = __kmp_lock_pool->pool.next;
3904 }
3905
3906 //
3907 // We could potentially differentiate between nested and regular locks
3908 // here, and do the lock table lookup for regular locks only.
3909 //
3910 if ( OMP_LOCK_T_SIZE < sizeof(void *) ) {
3911 * ( (kmp_lock_index_t *) user_lock ) = index;
3912 }
3913 else {
3914 * ( (kmp_user_lock_p *) user_lock ) = lck;
3915 }
3916
3917 // mark the lock if it is critical section lock.
3918 __kmp_set_user_lock_flags( lck, flags );
3919
3920 __kmp_release_lock( & __kmp_global_lock, gtid ); // AC: TODO: move this line upper
3921
3922 return lck;
3923}
3924
3925// Put lock's memory to pool for reusing.
3926void
3927__kmp_user_lock_free( void **user_lock, kmp_int32 gtid, kmp_user_lock_p lck )
3928{
3929 kmp_lock_pool_t * lock_pool;
3930
3931 KMP_DEBUG_ASSERT( user_lock != NULL );
3932 KMP_DEBUG_ASSERT( lck != NULL );
3933
3934 __kmp_acquire_lock( & __kmp_global_lock, gtid );
3935
3936 lck->pool.next = __kmp_lock_pool;
3937 __kmp_lock_pool = lck;
3938 if ( OMP_LOCK_T_SIZE < sizeof(void *) ) {
3939 kmp_lock_index_t index = * ( (kmp_lock_index_t *) user_lock );
3940 KMP_DEBUG_ASSERT( 0 < index && index <= __kmp_user_lock_table.used );
3941 lck->pool.index = index;
3942 }
3943
3944 __kmp_release_lock( & __kmp_global_lock, gtid );
3945}
3946
3947kmp_user_lock_p
3948__kmp_lookup_user_lock( void **user_lock, char const *func )
3949{
3950 kmp_user_lock_p lck = NULL;
3951
3952 if ( __kmp_env_consistency_check ) {
3953 if ( user_lock == NULL ) {
3954 KMP_FATAL( LockIsUninitialized, func );
3955 }
3956 }
3957
3958 if ( OMP_LOCK_T_SIZE < sizeof(void *) ) {
3959 kmp_lock_index_t index = *( (kmp_lock_index_t *)user_lock );
3960 if ( __kmp_env_consistency_check ) {
3961 if ( ! ( 0 < index && index < __kmp_user_lock_table.used ) ) {
3962 KMP_FATAL( LockIsUninitialized, func );
3963 }
3964 }
3965 KMP_DEBUG_ASSERT( 0 < index && index < __kmp_user_lock_table.used );
3966 KMP_DEBUG_ASSERT( __kmp_user_lock_size > 0 );
3967 lck = __kmp_user_lock_table.table[index];
3968 }
3969 else {
3970 lck = *( (kmp_user_lock_p *)user_lock );
3971 }
3972
3973 if ( __kmp_env_consistency_check ) {
3974 if ( lck == NULL ) {
3975 KMP_FATAL( LockIsUninitialized, func );
3976 }
3977 }
3978
3979 return lck;
3980}
3981
3982void
3983__kmp_cleanup_user_locks( void )
3984{
3985 //
3986 // Reset lock pool. Do not worry about lock in the pool -- we will free
3987 // them when iterating through lock table (it includes all the locks,
3988 // dead or alive).
3989 //
3990 __kmp_lock_pool = NULL;
3991
3992#define IS_CRITICAL(lck) \
3993 ( ( __kmp_get_user_lock_flags_ != NULL ) && \
3994 ( ( *__kmp_get_user_lock_flags_ )( lck ) & kmp_lf_critical_section ) )
3995
3996 //
3997 // Loop through lock table, free all locks.
3998 //
3999 // Do not free item [0], it is reserved for lock tables list.
4000 //
4001 // FIXME - we are iterating through a list of (pointers to) objects of
4002 // type union kmp_user_lock, but we have no way of knowing whether the
4003 // base type is currently "pool" or whatever the global user lock type
4004 // is.
4005 //
4006 // We are relying on the fact that for all of the user lock types
4007 // (except "tas"), the first field in the lock struct is the "initialized"
4008 // field, which is set to the address of the lock object itself when
4009 // the lock is initialized. When the union is of type "pool", the
4010 // first field is a pointer to the next object in the free list, which
4011 // will not be the same address as the object itself.
4012 //
4013 // This means that the check ( *__kmp_is_user_lock_initialized_ )( lck )
4014 // will fail for "pool" objects on the free list. This must happen as
4015 // the "location" field of real user locks overlaps the "index" field
4016 // of "pool" objects.
4017 //
4018 // It would be better to run through the free list, and remove all "pool"
4019 // objects from the lock table before executing this loop. However,
4020 // "pool" objects do not always have their index field set (only on
4021 // lin_32e), and I don't want to search the lock table for the address
4022 // of every "pool" object on the free list.
4023 //
4024 while ( __kmp_user_lock_table.used > 1 ) {
4025 const ident *loc;
4026
4027 //
4028 // reduce __kmp_user_lock_table.used before freeing the lock,
4029 // so that state of locks is consistent
4030 //
4031 kmp_user_lock_p lck = __kmp_user_lock_table.table[
4032 --__kmp_user_lock_table.used ];
4033
4034 if ( ( __kmp_is_user_lock_initialized_ != NULL ) &&
4035 ( *__kmp_is_user_lock_initialized_ )( lck ) ) {
4036 //
4037 // Issue a warning if: KMP_CONSISTENCY_CHECK AND lock is
4038 // initialized AND it is NOT a critical section (user is not
4039 // responsible for destroying criticals) AND we know source
4040 // location to report.
4041 //
4042 if ( __kmp_env_consistency_check && ( ! IS_CRITICAL( lck ) ) &&
4043 ( ( loc = __kmp_get_user_lock_location( lck ) ) != NULL ) &&
4044 ( loc->psource != NULL ) ) {
4045 kmp_str_loc_t str_loc = __kmp_str_loc_init( loc->psource, 0 );
4046 KMP_WARNING( CnsLockNotDestroyed, str_loc.file, str_loc.func,
4047 str_loc.line, str_loc.col );
4048 __kmp_str_loc_free( &str_loc);
4049 }
4050
4051#ifdef KMP_DEBUG
4052 if ( IS_CRITICAL( lck ) ) {
4053 KA_TRACE( 20, ("__kmp_cleanup_user_locks: free critical section lock %p (%p)\n", lck, *(void**)lck ) );
4054 }
4055 else {
4056 KA_TRACE( 20, ("__kmp_cleanup_user_locks: free lock %p (%p)\n", lck, *(void**)lck ) );
4057 }
4058#endif // KMP_DEBUG
4059
4060 //
4061 // Cleanup internal lock dynamic resources
4062 // (for drdpa locks particularly).
4063 //
4064 __kmp_destroy_user_lock( lck );
4065 }
4066
4067 //
4068 // Free the lock if block allocation of locks is not used.
4069 //
4070 if ( __kmp_lock_blocks == NULL ) {
4071 __kmp_free( lck );
4072 }
4073 }
4074
4075#undef IS_CRITICAL
4076
4077 //
4078 // delete lock table(s).
4079 //
4080 kmp_user_lock_p *table_ptr = __kmp_user_lock_table.table;
4081 __kmp_user_lock_table.table = NULL;
4082 __kmp_user_lock_table.allocated = 0;
4083
4084 while ( table_ptr != NULL ) {
4085 //
4086 // In the first element we saved the pointer to the previous
4087 // (smaller) lock table.
4088 //
4089 kmp_user_lock_p *next = (kmp_user_lock_p *)( table_ptr[ 0 ] );
4090 __kmp_free( table_ptr );
4091 table_ptr = next;
4092 }
4093
4094 //
4095 // Free buffers allocated for blocks of locks.
4096 //
4097 kmp_block_of_locks_t *block_ptr = __kmp_lock_blocks;
4098 __kmp_lock_blocks = NULL;
4099
4100 while ( block_ptr != NULL ) {
4101 kmp_block_of_locks_t *next = block_ptr->next_block;
4102 __kmp_free( block_ptr->locks );
4103 //
4104 // *block_ptr itself was allocated at the end of the locks vector.
4105 //
4106 block_ptr = next;
4107 }
4108
4109 TCW_4(__kmp_init_user_locks, FALSE);
4110}
4111
Andrey Churbanov5c56fb52015-02-20 18:05:17 +00004112#endif // KMP_USE_DYNAMIC_LOCK