Intel® OpenMP* Runtime Library
 All Classes Functions Variables Typedefs Enumerations Enumerator Groups Pages
kmp_lock.cpp
1 /*
2  * kmp_lock.cpp -- lock-related functions
3  * $Revision: 42810 $
4  * $Date: 2013-11-07 12:06:33 -0600 (Thu, 07 Nov 2013) $
5  */
6 
7 /* <copyright>
8  Copyright (c) 1997-2013 Intel Corporation. All Rights Reserved.
9 
10  Redistribution and use in source and binary forms, with or without
11  modification, are permitted provided that the following conditions
12  are met:
13 
14  * Redistributions of source code must retain the above copyright
15  notice, this list of conditions and the following disclaimer.
16  * Redistributions in binary form must reproduce the above copyright
17  notice, this list of conditions and the following disclaimer in the
18  documentation and/or other materials provided with the distribution.
19  * Neither the name of Intel Corporation nor the names of its
20  contributors may be used to endorse or promote products derived
21  from this software without specific prior written permission.
22 
23  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27  HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
29  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
30  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
31  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
32  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
33  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 
35 </copyright> */
36 
37 #include <stddef.h>
38 
39 #include "kmp.h"
40 #include "kmp_itt.h"
41 #include "kmp_i18n.h"
42 #include "kmp_lock.h"
43 #include "kmp_io.h"
44 
45 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
46 # include <unistd.h>
47 # include <sys/syscall.h>
48 // We should really include <futex.h>, but that causes compatibility problems on different
49 // Linux* OS distributions that either require that you include (or break when you try to include)
50 // <pci/types.h>.
51 // Since all we need is the two macros below (which are part of the kernel ABI, so can't change)
52 // we just define the constants here and don't include <futex.h>
53 # ifndef FUTEX_WAIT
54 # define FUTEX_WAIT 0
55 # endif
56 # ifndef FUTEX_WAKE
57 # define FUTEX_WAKE 1
58 # endif
59 #endif
60 
61 
62 #ifndef KMP_DEBUG
63 # define __kmp_static_delay( arg ) /* nothing to do */
64 #else
65 
66 static void
67 __kmp_static_delay( int arg )
68 {
69 /* Work around weird code-gen bug that causes assert to trip */
70 # if KMP_ARCH_X86_64 && KMP_OS_LINUX
71  KMP_ASSERT( arg != 0 );
72 # else
73  KMP_ASSERT( arg >= 0 );
74 # endif
75 }
76 #endif /* KMP_DEBUG */
77 
78 static void
79 __kmp_static_yield( int arg )
80 {
81  __kmp_yield( arg );
82 }
83 
84 /* Implement spin locks for internal library use. */
85 /* The algorithm implemented is Lamport's bakery lock [1974]. */
86 
87 void
88 __kmp_validate_locks( void )
89 {
90  int i;
91  kmp_uint32 x, y;
92 
93  /* Check to make sure unsigned arithmetic does wraps properly */
94  x = ~((kmp_uint32) 0) - 2;
95  y = x - 2;
96 
97  for (i = 0; i < 8; ++i, ++x, ++y) {
98  kmp_uint32 z = (x - y);
99  KMP_ASSERT( z == 2 );
100  }
101 
102  KMP_ASSERT( offsetof( kmp_base_queuing_lock, tail_id ) % 8 == 0 );
103 }
104 
105 
106 /* ------------------------------------------------------------------------ */
107 /* test and set locks */
108 
109 //
110 // For the non-nested locks, we can only assume that the first 4 bytes were
111 // allocated, since gcc only allocates 4 bytes for omp_lock_t, and the Intel
112 // compiler only allocates a 4 byte pointer on IA-32 architecture. On
113 // Windows* OS on Intel(R) 64, we can assume that all 8 bytes were allocated.
114 //
115 // gcc reserves >= 8 bytes for nested locks, so we can assume that the
116 // entire 8 bytes were allocated for nested locks on all 64-bit platforms.
117 //
118 
119 static kmp_int32
120 __kmp_get_tas_lock_owner( kmp_tas_lock_t *lck )
121 {
122  return TCR_4( lck->lk.poll ) - 1;
123 }
124 
125 static inline bool
126 __kmp_is_tas_lock_nestable( kmp_tas_lock_t *lck )
127 {
128  return lck->lk.depth_locked != -1;
129 }
130 
131 __forceinline static void
132 __kmp_acquire_tas_lock_timed_template( kmp_tas_lock_t *lck, kmp_int32 gtid )
133 {
134  KMP_MB();
135 
136 #ifdef USE_LOCK_PROFILE
137  kmp_uint32 curr = TCR_4( lck->lk.poll );
138  if ( ( curr != 0 ) && ( curr != gtid + 1 ) )
139  __kmp_printf( "LOCK CONTENTION: %p\n", lck );
140  /* else __kmp_printf( "." );*/
141 #endif /* USE_LOCK_PROFILE */
142 
143  if ( ( lck->lk.poll == 0 )
144  && KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), 0, gtid + 1 ) ) {
145  KMP_FSYNC_ACQUIRED(lck);
146  return;
147  }
148 
149  kmp_uint32 spins;
150  KMP_FSYNC_PREPARE( lck );
151  KMP_INIT_YIELD( spins );
152  if ( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc :
153  __kmp_xproc ) ) {
154  KMP_YIELD( TRUE );
155  }
156  else {
157  KMP_YIELD_SPIN( spins );
158  }
159 
160  while ( ( lck->lk.poll != 0 ) ||
161  ( ! KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), 0, gtid + 1 ) ) ) {
162  //
163  // FIXME - use exponential backoff here
164  //
165  if ( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc :
166  __kmp_xproc ) ) {
167  KMP_YIELD( TRUE );
168  }
169  else {
170  KMP_YIELD_SPIN( spins );
171  }
172  }
173  KMP_FSYNC_ACQUIRED( lck );
174 }
175 
176 void
177 __kmp_acquire_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
178 {
179  __kmp_acquire_tas_lock_timed_template( lck, gtid );
180 }
181 
182 static void
183 __kmp_acquire_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
184 {
185  if ( __kmp_env_consistency_check ) {
186  char const * const func = "omp_set_lock";
187  if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE )
188  && __kmp_is_tas_lock_nestable( lck ) ) {
189  KMP_FATAL( LockNestableUsedAsSimple, func );
190  }
191  if ( ( gtid >= 0 ) && ( __kmp_get_tas_lock_owner( lck ) == gtid ) ) {
192  KMP_FATAL( LockIsAlreadyOwned, func );
193  }
194  }
195  __kmp_acquire_tas_lock( lck, gtid );
196 }
197 
198 int
199 __kmp_test_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
200 {
201  if ( ( lck->lk.poll == 0 )
202  && KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), 0, gtid + 1 ) ) {
203  KMP_FSYNC_ACQUIRED( lck );
204  return TRUE;
205  }
206  return FALSE;
207 }
208 
209 static int
210 __kmp_test_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
211 {
212  if ( __kmp_env_consistency_check ) {
213  char const * const func = "omp_test_lock";
214  if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE )
215  && __kmp_is_tas_lock_nestable( lck ) ) {
216  KMP_FATAL( LockNestableUsedAsSimple, func );
217  }
218  }
219  return __kmp_test_tas_lock( lck, gtid );
220 }
221 
222 void
223 __kmp_release_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
224 {
225  KMP_MB(); /* Flush all pending memory write invalidates. */
226 
227  KMP_FSYNC_RELEASING(lck);
228  KMP_ST_REL32( &(lck->lk.poll), 0 );
229 
230  KMP_MB(); /* Flush all pending memory write invalidates. */
231 
232  KMP_YIELD( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc :
233  __kmp_xproc ) );
234 }
235 
236 static void
237 __kmp_release_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
238 {
239  if ( __kmp_env_consistency_check ) {
240  char const * const func = "omp_unset_lock";
241  KMP_MB(); /* in case another processor initialized lock */
242  if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE )
243  && __kmp_is_tas_lock_nestable( lck ) ) {
244  KMP_FATAL( LockNestableUsedAsSimple, func );
245  }
246  if ( __kmp_get_tas_lock_owner( lck ) == -1 ) {
247  KMP_FATAL( LockUnsettingFree, func );
248  }
249  if ( ( gtid >= 0 ) && ( __kmp_get_tas_lock_owner( lck ) >= 0 )
250  && ( __kmp_get_tas_lock_owner( lck ) != gtid ) ) {
251  KMP_FATAL( LockUnsettingSetByAnother, func );
252  }
253  }
254  __kmp_release_tas_lock( lck, gtid );
255 }
256 
257 void
258 __kmp_init_tas_lock( kmp_tas_lock_t * lck )
259 {
260  TCW_4( lck->lk.poll, 0 );
261 }
262 
263 static void
264 __kmp_init_tas_lock_with_checks( kmp_tas_lock_t * lck )
265 {
266  __kmp_init_tas_lock( lck );
267 }
268 
269 void
270 __kmp_destroy_tas_lock( kmp_tas_lock_t *lck )
271 {
272  lck->lk.poll = 0;
273 }
274 
275 static void
276 __kmp_destroy_tas_lock_with_checks( kmp_tas_lock_t *lck )
277 {
278  if ( __kmp_env_consistency_check ) {
279  char const * const func = "omp_destroy_lock";
280  if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE )
281  && __kmp_is_tas_lock_nestable( lck ) ) {
282  KMP_FATAL( LockNestableUsedAsSimple, func );
283  }
284  if ( __kmp_get_tas_lock_owner( lck ) != -1 ) {
285  KMP_FATAL( LockStillOwned, func );
286  }
287  }
288  __kmp_destroy_tas_lock( lck );
289 }
290 
291 
292 //
293 // nested test and set locks
294 //
295 
296 void
297 __kmp_acquire_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
298 {
299  KMP_DEBUG_ASSERT( gtid >= 0 );
300 
301  if ( __kmp_get_tas_lock_owner( lck ) == gtid ) {
302  lck->lk.depth_locked += 1;
303  }
304  else {
305  __kmp_acquire_tas_lock_timed_template( lck, gtid );
306  lck->lk.depth_locked = 1;
307  }
308 }
309 
310 static void
311 __kmp_acquire_nested_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
312 {
313  if ( __kmp_env_consistency_check ) {
314  char const * const func = "omp_set_nest_lock";
315  if ( ! __kmp_is_tas_lock_nestable( lck ) ) {
316  KMP_FATAL( LockSimpleUsedAsNestable, func );
317  }
318  }
319  __kmp_acquire_nested_tas_lock( lck, gtid );
320 }
321 
322 int
323 __kmp_test_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
324 {
325  int retval;
326 
327  KMP_DEBUG_ASSERT( gtid >= 0 );
328 
329  if ( __kmp_get_tas_lock_owner( lck ) == gtid ) {
330  retval = ++lck->lk.depth_locked;
331  }
332  else if ( !__kmp_test_tas_lock( lck, gtid ) ) {
333  retval = 0;
334  }
335  else {
336  KMP_MB();
337  retval = lck->lk.depth_locked = 1;
338  }
339  return retval;
340 }
341 
342 static int
343 __kmp_test_nested_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
344 {
345  if ( __kmp_env_consistency_check ) {
346  char const * const func = "omp_test_nest_lock";
347  if ( ! __kmp_is_tas_lock_nestable( lck ) ) {
348  KMP_FATAL( LockSimpleUsedAsNestable, func );
349  }
350  }
351  return __kmp_test_nested_tas_lock( lck, gtid );
352 }
353 
354 void
355 __kmp_release_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
356 {
357  KMP_DEBUG_ASSERT( gtid >= 0 );
358 
359  KMP_MB();
360  if ( --(lck->lk.depth_locked) == 0 ) {
361  __kmp_release_tas_lock( lck, gtid );
362  }
363 }
364 
365 static void
366 __kmp_release_nested_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
367 {
368  if ( __kmp_env_consistency_check ) {
369  char const * const func = "omp_unset_nest_lock";
370  KMP_MB(); /* in case another processor initialized lock */
371  if ( ! __kmp_is_tas_lock_nestable( lck ) ) {
372  KMP_FATAL( LockSimpleUsedAsNestable, func );
373  }
374  if ( __kmp_get_tas_lock_owner( lck ) == -1 ) {
375  KMP_FATAL( LockUnsettingFree, func );
376  }
377  if ( __kmp_get_tas_lock_owner( lck ) != gtid ) {
378  KMP_FATAL( LockUnsettingSetByAnother, func );
379  }
380  }
381  __kmp_release_nested_tas_lock( lck, gtid );
382 }
383 
384 void
385 __kmp_init_nested_tas_lock( kmp_tas_lock_t * lck )
386 {
387  __kmp_init_tas_lock( lck );
388  lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
389 }
390 
391 static void
392 __kmp_init_nested_tas_lock_with_checks( kmp_tas_lock_t * lck )
393 {
394  __kmp_init_nested_tas_lock( lck );
395 }
396 
397 void
398 __kmp_destroy_nested_tas_lock( kmp_tas_lock_t *lck )
399 {
400  __kmp_destroy_tas_lock( lck );
401  lck->lk.depth_locked = 0;
402 }
403 
404 static void
405 __kmp_destroy_nested_tas_lock_with_checks( kmp_tas_lock_t *lck )
406 {
407  if ( __kmp_env_consistency_check ) {
408  char const * const func = "omp_destroy_nest_lock";
409  if ( ! __kmp_is_tas_lock_nestable( lck ) ) {
410  KMP_FATAL( LockSimpleUsedAsNestable, func );
411  }
412  if ( __kmp_get_tas_lock_owner( lck ) != -1 ) {
413  KMP_FATAL( LockStillOwned, func );
414  }
415  }
416  __kmp_destroy_nested_tas_lock( lck );
417 }
418 
419 
420 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
421 
422 /* ------------------------------------------------------------------------ */
423 /* futex locks */
424 
425 // futex locks are really just test and set locks, with a different method
426 // of handling contention. They take the same amount of space as test and
427 // set locks, and are allocated the same way (i.e. use the area allocated by
428 // the compiler for non-nested locks / allocate nested locks on the heap).
429 
430 static kmp_int32
431 __kmp_get_futex_lock_owner( kmp_futex_lock_t *lck )
432 {
433  return ( TCR_4( lck->lk.poll ) >> 1 ) - 1;
434 }
435 
436 static inline bool
437 __kmp_is_futex_lock_nestable( kmp_futex_lock_t *lck )
438 {
439  return lck->lk.depth_locked != -1;
440 }
441 
442 __forceinline static void
443 __kmp_acquire_futex_lock_timed_template( kmp_futex_lock_t *lck, kmp_int32 gtid )
444 {
445  kmp_int32 gtid_code = ( gtid + 1 ) << 1;
446 
447  KMP_MB();
448 
449 #ifdef USE_LOCK_PROFILE
450  kmp_uint32 curr = TCR_4( lck->lk.poll );
451  if ( ( curr != 0 ) && ( curr != gtid_code ) )
452  __kmp_printf( "LOCK CONTENTION: %p\n", lck );
453  /* else __kmp_printf( "." );*/
454 #endif /* USE_LOCK_PROFILE */
455 
456  KMP_FSYNC_PREPARE( lck );
457  KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d entering\n",
458  lck, lck->lk.poll, gtid ) );
459 
460  kmp_int32 poll_val;
461  while ( ( poll_val = KMP_COMPARE_AND_STORE_RET32( & ( lck->lk.poll ), 0,
462  gtid_code ) ) != 0 ) {
463  kmp_int32 cond = poll_val & 1;
464  KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d poll_val = 0x%x cond = 0x%x\n",
465  lck, gtid, poll_val, cond ) );
466 
467  //
468  // NOTE: if you try to use the following condition for this branch
469  //
470  // if ( poll_val & 1 == 0 )
471  //
472  // Then the 12.0 compiler has a bug where the following block will
473  // always be skipped, regardless of the value of the LSB of poll_val.
474  //
475  if ( ! cond ) {
476  //
477  // Try to set the lsb in the poll to indicate to the owner
478  // thread that they need to wake this thread up.
479  //
480  if ( ! KMP_COMPARE_AND_STORE_REL32( & ( lck->lk.poll ),
481  poll_val, poll_val | 1 ) ) {
482  KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d can't set bit 0\n",
483  lck, lck->lk.poll, gtid ) );
484  continue;
485  }
486  poll_val |= 1;
487 
488  KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d bit 0 set\n",
489  lck, lck->lk.poll, gtid ) );
490  }
491 
492  KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d before futex_wait(0x%x)\n",
493  lck, gtid, poll_val ) );
494 
495  kmp_int32 rc;
496  if ( ( rc = syscall( __NR_futex, & ( lck->lk.poll ), FUTEX_WAIT,
497  poll_val, NULL, NULL, 0 ) ) != 0 ) {
498  KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d futex_wait(0x%x) failed (rc=%d errno=%d)\n",
499  lck, gtid, poll_val, rc, errno ) );
500  continue;
501  }
502 
503  KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d after futex_wait(0x%x)\n",
504  lck, gtid, poll_val ) );
505  //
506  // This thread has now done a succesful futex wait call and was
507  // entered on the OS futex queue. We must now perform a futex
508  // wake call when releasing the lock, as we have no idea how many
509  // other threads are in the queue.
510  //
511  gtid_code |= 1;
512  }
513 
514  KMP_FSYNC_ACQUIRED( lck );
515  KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d exiting\n",
516  lck, lck->lk.poll, gtid ) );
517 }
518 
519 void
520 __kmp_acquire_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
521 {
522  __kmp_acquire_futex_lock_timed_template( lck, gtid );
523 }
524 
525 static void
526 __kmp_acquire_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
527 {
528  if ( __kmp_env_consistency_check ) {
529  char const * const func = "omp_set_lock";
530  if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE )
531  && __kmp_is_futex_lock_nestable( lck ) ) {
532  KMP_FATAL( LockNestableUsedAsSimple, func );
533  }
534  if ( ( gtid >= 0 ) && ( __kmp_get_futex_lock_owner( lck ) == gtid ) ) {
535  KMP_FATAL( LockIsAlreadyOwned, func );
536  }
537  }
538  __kmp_acquire_futex_lock( lck, gtid );
539 }
540 
541 int
542 __kmp_test_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
543 {
544  if ( KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), 0, ( gtid + 1 ) << 1 ) ) {
545  KMP_FSYNC_ACQUIRED( lck );
546  return TRUE;
547  }
548  return FALSE;
549 }
550 
551 static int
552 __kmp_test_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
553 {
554  if ( __kmp_env_consistency_check ) {
555  char const * const func = "omp_test_lock";
556  if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE )
557  && __kmp_is_futex_lock_nestable( lck ) ) {
558  KMP_FATAL( LockNestableUsedAsSimple, func );
559  }
560  }
561  return __kmp_test_futex_lock( lck, gtid );
562 }
563 
564 void
565 __kmp_release_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
566 {
567  KMP_MB(); /* Flush all pending memory write invalidates. */
568 
569  KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d entering\n",
570  lck, lck->lk.poll, gtid ) );
571 
572  KMP_FSYNC_RELEASING(lck);
573 
574  kmp_int32 poll_val = KMP_XCHG_FIXED32( & ( lck->lk.poll ), 0 );
575 
576  KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p, T#%d released poll_val = 0x%x\n",
577  lck, gtid, poll_val ) );
578 
579  if ( poll_val & 1 ) {
580  KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p, T#%d futex_wake 1 thread\n",
581  lck, gtid ) );
582  syscall( __NR_futex, & ( lck->lk.poll ), FUTEX_WAKE, 1, NULL, NULL, 0 );
583  }
584 
585  KMP_MB(); /* Flush all pending memory write invalidates. */
586 
587  KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d exiting\n",
588  lck, lck->lk.poll, gtid ) );
589 
590  KMP_YIELD( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc :
591  __kmp_xproc ) );
592 }
593 
594 static void
595 __kmp_release_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
596 {
597  if ( __kmp_env_consistency_check ) {
598  char const * const func = "omp_unset_lock";
599  KMP_MB(); /* in case another processor initialized lock */
600  if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE )
601  && __kmp_is_futex_lock_nestable( lck ) ) {
602  KMP_FATAL( LockNestableUsedAsSimple, func );
603  }
604  if ( __kmp_get_futex_lock_owner( lck ) == -1 ) {
605  KMP_FATAL( LockUnsettingFree, func );
606  }
607  if ( ( gtid >= 0 ) && ( __kmp_get_futex_lock_owner( lck ) >= 0 )
608  && ( __kmp_get_futex_lock_owner( lck ) != gtid ) ) {
609  KMP_FATAL( LockUnsettingSetByAnother, func );
610  }
611  }
612  __kmp_release_futex_lock( lck, gtid );
613 }
614 
615 void
616 __kmp_init_futex_lock( kmp_futex_lock_t * lck )
617 {
618  TCW_4( lck->lk.poll, 0 );
619 }
620 
621 static void
622 __kmp_init_futex_lock_with_checks( kmp_futex_lock_t * lck )
623 {
624  __kmp_init_futex_lock( lck );
625 }
626 
627 void
628 __kmp_destroy_futex_lock( kmp_futex_lock_t *lck )
629 {
630  lck->lk.poll = 0;
631 }
632 
633 static void
634 __kmp_destroy_futex_lock_with_checks( kmp_futex_lock_t *lck )
635 {
636  if ( __kmp_env_consistency_check ) {
637  char const * const func = "omp_destroy_lock";
638  if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE )
639  && __kmp_is_futex_lock_nestable( lck ) ) {
640  KMP_FATAL( LockNestableUsedAsSimple, func );
641  }
642  if ( __kmp_get_futex_lock_owner( lck ) != -1 ) {
643  KMP_FATAL( LockStillOwned, func );
644  }
645  }
646  __kmp_destroy_futex_lock( lck );
647 }
648 
649 
650 //
651 // nested futex locks
652 //
653 
654 void
655 __kmp_acquire_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
656 {
657  KMP_DEBUG_ASSERT( gtid >= 0 );
658 
659  if ( __kmp_get_futex_lock_owner( lck ) == gtid ) {
660  lck->lk.depth_locked += 1;
661  }
662  else {
663  __kmp_acquire_futex_lock_timed_template( lck, gtid );
664  lck->lk.depth_locked = 1;
665  }
666 }
667 
668 static void
669 __kmp_acquire_nested_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
670 {
671  if ( __kmp_env_consistency_check ) {
672  char const * const func = "omp_set_nest_lock";
673  if ( ! __kmp_is_futex_lock_nestable( lck ) ) {
674  KMP_FATAL( LockSimpleUsedAsNestable, func );
675  }
676  }
677  __kmp_acquire_nested_futex_lock( lck, gtid );
678 }
679 
680 int
681 __kmp_test_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
682 {
683  int retval;
684 
685  KMP_DEBUG_ASSERT( gtid >= 0 );
686 
687  if ( __kmp_get_futex_lock_owner( lck ) == gtid ) {
688  retval = ++lck->lk.depth_locked;
689  }
690  else if ( !__kmp_test_futex_lock( lck, gtid ) ) {
691  retval = 0;
692  }
693  else {
694  KMP_MB();
695  retval = lck->lk.depth_locked = 1;
696  }
697  return retval;
698 }
699 
700 static int
701 __kmp_test_nested_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
702 {
703  if ( __kmp_env_consistency_check ) {
704  char const * const func = "omp_test_nest_lock";
705  if ( ! __kmp_is_futex_lock_nestable( lck ) ) {
706  KMP_FATAL( LockSimpleUsedAsNestable, func );
707  }
708  }
709  return __kmp_test_nested_futex_lock( lck, gtid );
710 }
711 
712 void
713 __kmp_release_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
714 {
715  KMP_DEBUG_ASSERT( gtid >= 0 );
716 
717  KMP_MB();
718  if ( --(lck->lk.depth_locked) == 0 ) {
719  __kmp_release_futex_lock( lck, gtid );
720  }
721 }
722 
723 static void
724 __kmp_release_nested_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
725 {
726  if ( __kmp_env_consistency_check ) {
727  char const * const func = "omp_unset_nest_lock";
728  KMP_MB(); /* in case another processor initialized lock */
729  if ( ! __kmp_is_futex_lock_nestable( lck ) ) {
730  KMP_FATAL( LockSimpleUsedAsNestable, func );
731  }
732  if ( __kmp_get_futex_lock_owner( lck ) == -1 ) {
733  KMP_FATAL( LockUnsettingFree, func );
734  }
735  if ( __kmp_get_futex_lock_owner( lck ) != gtid ) {
736  KMP_FATAL( LockUnsettingSetByAnother, func );
737  }
738  }
739  __kmp_release_nested_futex_lock( lck, gtid );
740 }
741 
742 void
743 __kmp_init_nested_futex_lock( kmp_futex_lock_t * lck )
744 {
745  __kmp_init_futex_lock( lck );
746  lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
747 }
748 
749 static void
750 __kmp_init_nested_futex_lock_with_checks( kmp_futex_lock_t * lck )
751 {
752  __kmp_init_nested_futex_lock( lck );
753 }
754 
755 void
756 __kmp_destroy_nested_futex_lock( kmp_futex_lock_t *lck )
757 {
758  __kmp_destroy_futex_lock( lck );
759  lck->lk.depth_locked = 0;
760 }
761 
762 static void
763 __kmp_destroy_nested_futex_lock_with_checks( kmp_futex_lock_t *lck )
764 {
765  if ( __kmp_env_consistency_check ) {
766  char const * const func = "omp_destroy_nest_lock";
767  if ( ! __kmp_is_futex_lock_nestable( lck ) ) {
768  KMP_FATAL( LockSimpleUsedAsNestable, func );
769  }
770  if ( __kmp_get_futex_lock_owner( lck ) != -1 ) {
771  KMP_FATAL( LockStillOwned, func );
772  }
773  }
774  __kmp_destroy_nested_futex_lock( lck );
775 }
776 
777 #endif // KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
778 
779 
780 /* ------------------------------------------------------------------------ */
781 /* ticket (bakery) locks */
782 
783 static kmp_int32
784 __kmp_get_ticket_lock_owner( kmp_ticket_lock_t *lck )
785 {
786  return TCR_4( lck->lk.owner_id ) - 1;
787 }
788 
789 static inline bool
790 __kmp_is_ticket_lock_nestable( kmp_ticket_lock_t *lck )
791 {
792  return lck->lk.depth_locked != -1;
793 }
794 
795 static kmp_uint32
796 __kmp_bakery_check(kmp_uint value, kmp_uint checker)
797 {
798  register kmp_uint32 pause;
799 
800  if (value == checker) {
801  return TRUE;
802  }
803  for (pause = checker - value; pause != 0; --pause) {
804  __kmp_static_delay(TRUE);
805  }
806  return FALSE;
807 }
808 
809 __forceinline static void
810 __kmp_acquire_ticket_lock_timed_template( kmp_ticket_lock_t *lck, kmp_int32 gtid )
811 {
812  kmp_uint32 my_ticket;
813  KMP_MB();
814 
815  my_ticket = KMP_TEST_THEN_INC32( (kmp_int32 *) &lck->lk.next_ticket );
816 
817 #ifdef USE_LOCK_PROFILE
818  if ( TCR_4( lck->lk.now_serving ) != my_ticket )
819  __kmp_printf( "LOCK CONTENTION: %p\n", lck );
820  /* else __kmp_printf( "." );*/
821 #endif /* USE_LOCK_PROFILE */
822 
823  if ( TCR_4( lck->lk.now_serving ) == my_ticket ) {
824  KMP_FSYNC_ACQUIRED(lck);
825  return;
826  }
827  KMP_WAIT_YIELD( &lck->lk.now_serving, my_ticket, __kmp_bakery_check, lck );
828  KMP_FSYNC_ACQUIRED(lck);
829 }
830 
831 void
832 __kmp_acquire_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
833 {
834  __kmp_acquire_ticket_lock_timed_template( lck, gtid );
835 }
836 
837 static void
838 __kmp_acquire_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
839 {
840  if ( __kmp_env_consistency_check ) {
841  char const * const func = "omp_set_lock";
842  if ( lck->lk.initialized != lck ) {
843  KMP_FATAL( LockIsUninitialized, func );
844  }
845  if ( __kmp_is_ticket_lock_nestable( lck ) ) {
846  KMP_FATAL( LockNestableUsedAsSimple, func );
847  }
848  if ( ( gtid >= 0 ) && ( __kmp_get_ticket_lock_owner( lck ) == gtid ) ) {
849  KMP_FATAL( LockIsAlreadyOwned, func );
850  }
851  }
852 
853  __kmp_acquire_ticket_lock( lck, gtid );
854 
855  if ( __kmp_env_consistency_check ) {
856  lck->lk.owner_id = gtid + 1;
857  }
858 }
859 
860 int
861 __kmp_test_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
862 {
863  kmp_uint32 my_ticket = TCR_4( lck->lk.next_ticket );
864  if ( TCR_4( lck->lk.now_serving ) == my_ticket ) {
865  kmp_uint32 next_ticket = my_ticket + 1;
866  if ( KMP_COMPARE_AND_STORE_ACQ32( (kmp_int32 *) &lck->lk.next_ticket,
867  my_ticket, next_ticket ) ) {
868  KMP_FSYNC_ACQUIRED( lck );
869  return TRUE;
870  }
871  }
872  return FALSE;
873 }
874 
875 static int
876 __kmp_test_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
877 {
878  if ( __kmp_env_consistency_check ) {
879  char const * const func = "omp_test_lock";
880  if ( lck->lk.initialized != lck ) {
881  KMP_FATAL( LockIsUninitialized, func );
882  }
883  if ( __kmp_is_ticket_lock_nestable( lck ) ) {
884  KMP_FATAL( LockNestableUsedAsSimple, func );
885  }
886  }
887 
888  int retval = __kmp_test_ticket_lock( lck, gtid );
889 
890  if ( __kmp_env_consistency_check && retval ) {
891  lck->lk.owner_id = gtid + 1;
892  }
893  return retval;
894 }
895 
896 void
897 __kmp_release_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
898 {
899  kmp_uint32 distance;
900 
901  KMP_MB(); /* Flush all pending memory write invalidates. */
902 
903  KMP_FSYNC_RELEASING(lck);
904  distance = ( TCR_4( lck->lk.next_ticket ) - TCR_4( lck->lk.now_serving ) );
905 
906  KMP_ST_REL32( &(lck->lk.now_serving), lck->lk.now_serving + 1 );
907 
908  KMP_MB(); /* Flush all pending memory write invalidates. */
909 
910  KMP_YIELD( distance
911  > (kmp_uint32) (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc) );
912 }
913 
914 static void
915 __kmp_release_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
916 {
917  if ( __kmp_env_consistency_check ) {
918  char const * const func = "omp_unset_lock";
919  KMP_MB(); /* in case another processor initialized lock */
920  if ( lck->lk.initialized != lck ) {
921  KMP_FATAL( LockIsUninitialized, func );
922  }
923  if ( __kmp_is_ticket_lock_nestable( lck ) ) {
924  KMP_FATAL( LockNestableUsedAsSimple, func );
925  }
926  if ( __kmp_get_ticket_lock_owner( lck ) == -1 ) {
927  KMP_FATAL( LockUnsettingFree, func );
928  }
929  if ( ( gtid >= 0 ) && ( __kmp_get_ticket_lock_owner( lck ) >= 0 )
930  && ( __kmp_get_ticket_lock_owner( lck ) != gtid ) ) {
931  KMP_FATAL( LockUnsettingSetByAnother, func );
932  }
933  lck->lk.owner_id = 0;
934  }
935  __kmp_release_ticket_lock( lck, gtid );
936 }
937 
938 void
939 __kmp_init_ticket_lock( kmp_ticket_lock_t * lck )
940 {
941  lck->lk.location = NULL;
942  TCW_4( lck->lk.next_ticket, 0 );
943  TCW_4( lck->lk.now_serving, 0 );
944  lck->lk.owner_id = 0; // no thread owns the lock.
945  lck->lk.depth_locked = -1; // -1 => not a nested lock.
946  lck->lk.initialized = (kmp_ticket_lock *)lck;
947 }
948 
949 static void
950 __kmp_init_ticket_lock_with_checks( kmp_ticket_lock_t * lck )
951 {
952  __kmp_init_ticket_lock( lck );
953 }
954 
955 void
956 __kmp_destroy_ticket_lock( kmp_ticket_lock_t *lck )
957 {
958  lck->lk.initialized = NULL;
959  lck->lk.location = NULL;
960  lck->lk.next_ticket = 0;
961  lck->lk.now_serving = 0;
962  lck->lk.owner_id = 0;
963  lck->lk.depth_locked = -1;
964 }
965 
966 static void
967 __kmp_destroy_ticket_lock_with_checks( kmp_ticket_lock_t *lck )
968 {
969  if ( __kmp_env_consistency_check ) {
970  char const * const func = "omp_destroy_lock";
971  if ( lck->lk.initialized != lck ) {
972  KMP_FATAL( LockIsUninitialized, func );
973  }
974  if ( __kmp_is_ticket_lock_nestable( lck ) ) {
975  KMP_FATAL( LockNestableUsedAsSimple, func );
976  }
977  if ( __kmp_get_ticket_lock_owner( lck ) != -1 ) {
978  KMP_FATAL( LockStillOwned, func );
979  }
980  }
981  __kmp_destroy_ticket_lock( lck );
982 }
983 
984 
985 //
986 // nested ticket locks
987 //
988 
989 void
990 __kmp_acquire_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
991 {
992  KMP_DEBUG_ASSERT( gtid >= 0 );
993 
994  if ( __kmp_get_ticket_lock_owner( lck ) == gtid ) {
995  lck->lk.depth_locked += 1;
996  }
997  else {
998  __kmp_acquire_ticket_lock_timed_template( lck, gtid );
999  KMP_MB();
1000  lck->lk.depth_locked = 1;
1001  KMP_MB();
1002  lck->lk.owner_id = gtid + 1;
1003  }
1004 }
1005 
1006 static void
1007 __kmp_acquire_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
1008 {
1009  if ( __kmp_env_consistency_check ) {
1010  char const * const func = "omp_set_nest_lock";
1011  if ( lck->lk.initialized != lck ) {
1012  KMP_FATAL( LockIsUninitialized, func );
1013  }
1014  if ( ! __kmp_is_ticket_lock_nestable( lck ) ) {
1015  KMP_FATAL( LockSimpleUsedAsNestable, func );
1016  }
1017  }
1018  __kmp_acquire_nested_ticket_lock( lck, gtid );
1019 }
1020 
1021 int
1022 __kmp_test_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
1023 {
1024  int retval;
1025 
1026  KMP_DEBUG_ASSERT( gtid >= 0 );
1027 
1028  if ( __kmp_get_ticket_lock_owner( lck ) == gtid ) {
1029  retval = ++lck->lk.depth_locked;
1030  }
1031  else if ( !__kmp_test_ticket_lock( lck, gtid ) ) {
1032  retval = 0;
1033  }
1034  else {
1035  KMP_MB();
1036  retval = lck->lk.depth_locked = 1;
1037  KMP_MB();
1038  lck->lk.owner_id = gtid + 1;
1039  }
1040  return retval;
1041 }
1042 
1043 static int
1044 __kmp_test_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck,
1045  kmp_int32 gtid )
1046 {
1047  if ( __kmp_env_consistency_check ) {
1048  char const * const func = "omp_test_nest_lock";
1049  if ( lck->lk.initialized != lck ) {
1050  KMP_FATAL( LockIsUninitialized, func );
1051  }
1052  if ( ! __kmp_is_ticket_lock_nestable( lck ) ) {
1053  KMP_FATAL( LockSimpleUsedAsNestable, func );
1054  }
1055  }
1056  return __kmp_test_nested_ticket_lock( lck, gtid );
1057 }
1058 
1059 void
1060 __kmp_release_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
1061 {
1062  KMP_DEBUG_ASSERT( gtid >= 0 );
1063 
1064  KMP_MB();
1065  if ( --(lck->lk.depth_locked) == 0 ) {
1066  KMP_MB();
1067  lck->lk.owner_id = 0;
1068  __kmp_release_ticket_lock( lck, gtid );
1069  }
1070 }
1071 
1072 static void
1073 __kmp_release_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
1074 {
1075  if ( __kmp_env_consistency_check ) {
1076  char const * const func = "omp_unset_nest_lock";
1077  KMP_MB(); /* in case another processor initialized lock */
1078  if ( lck->lk.initialized != lck ) {
1079  KMP_FATAL( LockIsUninitialized, func );
1080  }
1081  if ( ! __kmp_is_ticket_lock_nestable( lck ) ) {
1082  KMP_FATAL( LockSimpleUsedAsNestable, func );
1083  }
1084  if ( __kmp_get_ticket_lock_owner( lck ) == -1 ) {
1085  KMP_FATAL( LockUnsettingFree, func );
1086  }
1087  if ( __kmp_get_ticket_lock_owner( lck ) != gtid ) {
1088  KMP_FATAL( LockUnsettingSetByAnother, func );
1089  }
1090  }
1091  __kmp_release_nested_ticket_lock( lck, gtid );
1092 }
1093 
1094 void
1095 __kmp_init_nested_ticket_lock( kmp_ticket_lock_t * lck )
1096 {
1097  __kmp_init_ticket_lock( lck );
1098  lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
1099 }
1100 
1101 static void
1102 __kmp_init_nested_ticket_lock_with_checks( kmp_ticket_lock_t * lck )
1103 {
1104  __kmp_init_nested_ticket_lock( lck );
1105 }
1106 
1107 void
1108 __kmp_destroy_nested_ticket_lock( kmp_ticket_lock_t *lck )
1109 {
1110  __kmp_destroy_ticket_lock( lck );
1111  lck->lk.depth_locked = 0;
1112 }
1113 
1114 static void
1115 __kmp_destroy_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck )
1116 {
1117  if ( __kmp_env_consistency_check ) {
1118  char const * const func = "omp_destroy_nest_lock";
1119  if ( lck->lk.initialized != lck ) {
1120  KMP_FATAL( LockIsUninitialized, func );
1121  }
1122  if ( ! __kmp_is_ticket_lock_nestable( lck ) ) {
1123  KMP_FATAL( LockSimpleUsedAsNestable, func );
1124  }
1125  if ( __kmp_get_ticket_lock_owner( lck ) != -1 ) {
1126  KMP_FATAL( LockStillOwned, func );
1127  }
1128  }
1129  __kmp_destroy_nested_ticket_lock( lck );
1130 }
1131 
1132 
1133 //
1134 // access functions to fields which don't exist for all lock kinds.
1135 //
1136 
1137 static int
1138 __kmp_is_ticket_lock_initialized( kmp_ticket_lock_t *lck )
1139 {
1140  return lck == lck->lk.initialized;
1141 }
1142 
1143 static const ident_t *
1144 __kmp_get_ticket_lock_location( kmp_ticket_lock_t *lck )
1145 {
1146  return lck->lk.location;
1147 }
1148 
1149 static void
1150 __kmp_set_ticket_lock_location( kmp_ticket_lock_t *lck, const ident_t *loc )
1151 {
1152  lck->lk.location = loc;
1153 }
1154 
1155 static kmp_lock_flags_t
1156 __kmp_get_ticket_lock_flags( kmp_ticket_lock_t *lck )
1157 {
1158  return lck->lk.flags;
1159 }
1160 
1161 static void
1162 __kmp_set_ticket_lock_flags( kmp_ticket_lock_t *lck, kmp_lock_flags_t flags )
1163 {
1164  lck->lk.flags = flags;
1165 }
1166 
1167 /* ------------------------------------------------------------------------ */
1168 /* queuing locks */
1169 
1170 /*
1171  * First the states
1172  * (head,tail) = 0, 0 means lock is unheld, nobody on queue
1173  * UINT_MAX or -1, 0 means lock is held, nobody on queue
1174  * h, h means lock is held or about to transition, 1 element on queue
1175  * h, t h <> t, means lock is held or about to transition, >1 elements on queue
1176  *
1177  * Now the transitions
1178  * Acquire(0,0) = -1 ,0
1179  * Release(0,0) = Error
1180  * Acquire(-1,0) = h ,h h > 0
1181  * Release(-1,0) = 0 ,0
1182  * Acquire(h,h) = h ,t h > 0, t > 0, h <> t
1183  * Release(h,h) = -1 ,0 h > 0
1184  * Acquire(h,t) = h ,t' h > 0, t > 0, t' > 0, h <> t, h <> t', t <> t'
1185  * Release(h,t) = h',t h > 0, t > 0, h <> t, h <> h', h' maybe = t
1186  *
1187  * And pictorially
1188  *
1189  *
1190  * +-----+
1191  * | 0, 0|------- release -------> Error
1192  * +-----+
1193  * | ^
1194  * acquire| |release
1195  * | |
1196  * | |
1197  * v |
1198  * +-----+
1199  * |-1, 0|
1200  * +-----+
1201  * | ^
1202  * acquire| |release
1203  * | |
1204  * | |
1205  * v |
1206  * +-----+
1207  * | h, h|
1208  * +-----+
1209  * | ^
1210  * acquire| |release
1211  * | |
1212  * | |
1213  * v |
1214  * +-----+
1215  * | h, t|----- acquire, release loopback ---+
1216  * +-----+ |
1217  * ^ |
1218  * | |
1219  * +------------------------------------+
1220  *
1221  */
1222 
1223 #ifdef DEBUG_QUEUING_LOCKS
1224 
1225 /* Stuff for circular trace buffer */
1226 #define TRACE_BUF_ELE 1024
1227 static char traces[TRACE_BUF_ELE][128] = { 0 }
1228 static int tc = 0;
1229 #define TRACE_LOCK(X,Y) sprintf( traces[tc++ % TRACE_BUF_ELE], "t%d at %s\n", X, Y );
1230 #define TRACE_LOCK_T(X,Y,Z) sprintf( traces[tc++ % TRACE_BUF_ELE], "t%d at %s%d\n", X,Y,Z );
1231 #define TRACE_LOCK_HT(X,Y,Z,Q) sprintf( traces[tc++ % TRACE_BUF_ELE], "t%d at %s %d,%d\n", X, Y, Z, Q );
1232 
1233 static void
1234 __kmp_dump_queuing_lock( kmp_info_t *this_thr, kmp_int32 gtid,
1235  kmp_queuing_lock_t *lck, kmp_int32 head_id, kmp_int32 tail_id )
1236 {
1237  kmp_int32 t, i;
1238 
1239  __kmp_printf_no_lock( "\n__kmp_dump_queuing_lock: TRACE BEGINS HERE! \n" );
1240 
1241  i = tc % TRACE_BUF_ELE;
1242  __kmp_printf_no_lock( "%s\n", traces[i] );
1243  i = (i+1) % TRACE_BUF_ELE;
1244  while ( i != (tc % TRACE_BUF_ELE) ) {
1245  __kmp_printf_no_lock( "%s", traces[i] );
1246  i = (i+1) % TRACE_BUF_ELE;
1247  }
1248  __kmp_printf_no_lock( "\n" );
1249 
1250  __kmp_printf_no_lock(
1251  "\n__kmp_dump_queuing_lock: gtid+1:%d, spin_here:%d, next_wait:%d, head_id:%d, tail_id:%d\n",
1252  gtid+1, this_thr->th.th_spin_here, this_thr->th.th_next_waiting,
1253  head_id, tail_id );
1254 
1255  __kmp_printf_no_lock( "\t\thead: %d ", lck->lk.head_id );
1256 
1257  if ( lck->lk.head_id >= 1 ) {
1258  t = __kmp_threads[lck->lk.head_id-1]->th.th_next_waiting;
1259  while (t > 0) {
1260  __kmp_printf_no_lock( "-> %d ", t );
1261  t = __kmp_threads[t-1]->th.th_next_waiting;
1262  }
1263  }
1264  __kmp_printf_no_lock( "; tail: %d ", lck->lk.tail_id );
1265  __kmp_printf_no_lock( "\n\n" );
1266 }
1267 
1268 #endif /* DEBUG_QUEUING_LOCKS */
1269 
1270 static kmp_int32
1271 __kmp_get_queuing_lock_owner( kmp_queuing_lock_t *lck )
1272 {
1273  return TCR_4( lck->lk.owner_id ) - 1;
1274 }
1275 
1276 static inline bool
1277 __kmp_is_queuing_lock_nestable( kmp_queuing_lock_t *lck )
1278 {
1279  return lck->lk.depth_locked != -1;
1280 }
1281 
1282 /* Acquire a lock using a the queuing lock implementation */
1283 template <bool takeTime>
1284 /* [TLW] The unused template above is left behind because of what BEB believes is a
1285  potential compiler problem with __forceinline. */
1286 __forceinline static void
1287 __kmp_acquire_queuing_lock_timed_template( kmp_queuing_lock_t *lck,
1288  kmp_int32 gtid )
1289 {
1290  register kmp_info_t *this_thr = __kmp_thread_from_gtid( gtid );
1291  volatile kmp_int32 *head_id_p = & lck->lk.head_id;
1292  volatile kmp_int32 *tail_id_p = & lck->lk.tail_id;
1293  volatile kmp_uint32 *spin_here_p;
1294  kmp_int32 need_mf = 1;
1295 
1296  KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d entering\n", lck, gtid ));
1297 
1298  KMP_FSYNC_PREPARE( lck );
1299  KMP_DEBUG_ASSERT( this_thr != NULL );
1300  spin_here_p = & this_thr->th.th_spin_here;
1301 
1302 #ifdef DEBUG_QUEUING_LOCKS
1303  TRACE_LOCK( gtid+1, "acq ent" );
1304  if ( *spin_here_p )
1305  __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1306  if ( this_thr->th.th_next_waiting != 0 )
1307  __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1308 #endif
1309  KMP_DEBUG_ASSERT( !*spin_here_p );
1310  KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 );
1311 
1312 
1313  /* The following st.rel to spin_here_p needs to precede the cmpxchg.acq to head_id_p
1314  that may follow, not just in execution order, but also in visibility order. This way,
1315  when a releasing thread observes the changes to the queue by this thread, it can
1316  rightly assume that spin_here_p has already been set to TRUE, so that when it sets
1317  spin_here_p to FALSE, it is not premature. If the releasing thread sets spin_here_p
1318  to FALSE before this thread sets it to TRUE, this thread will hang.
1319  */
1320  *spin_here_p = TRUE; /* before enqueuing to prevent race */
1321 
1322  while( 1 ) {
1323  kmp_int32 enqueued;
1324  kmp_int32 head;
1325  kmp_int32 tail;
1326 
1327  head = *head_id_p;
1328 
1329  switch ( head ) {
1330 
1331  case -1:
1332  {
1333 #ifdef DEBUG_QUEUING_LOCKS
1334  tail = *tail_id_p;
1335  TRACE_LOCK_HT( gtid+1, "acq read: ", head, tail );
1336 #endif
1337  tail = 0; /* to make sure next link asynchronously read is not set accidentally;
1338  this assignment prevents us from entering the if ( t > 0 )
1339  condition in the enqueued case below, which is not necessary for
1340  this state transition */
1341 
1342  need_mf = 0;
1343  /* try (-1,0)->(tid,tid) */
1344  enqueued = KMP_COMPARE_AND_STORE_ACQ64( (volatile kmp_int64 *) tail_id_p,
1345  KMP_PACK_64( -1, 0 ),
1346  KMP_PACK_64( gtid+1, gtid+1 ) );
1347 #ifdef DEBUG_QUEUING_LOCKS
1348  if ( enqueued ) TRACE_LOCK( gtid+1, "acq enq: (-1,0)->(tid,tid)" );
1349 #endif
1350  }
1351  break;
1352 
1353  default:
1354  {
1355  tail = *tail_id_p;
1356  KMP_DEBUG_ASSERT( tail != gtid + 1 );
1357 
1358 #ifdef DEBUG_QUEUING_LOCKS
1359  TRACE_LOCK_HT( gtid+1, "acq read: ", head, tail );
1360 #endif
1361 
1362  if ( tail == 0 ) {
1363  enqueued = FALSE;
1364  }
1365  else {
1366  need_mf = 0;
1367  /* try (h,t) or (h,h)->(h,tid) */
1368  enqueued = KMP_COMPARE_AND_STORE_ACQ32( tail_id_p, tail, gtid+1 );
1369 
1370 #ifdef DEBUG_QUEUING_LOCKS
1371  if ( enqueued ) TRACE_LOCK( gtid+1, "acq enq: (h,t)->(h,tid)" );
1372 #endif
1373  }
1374  }
1375  break;
1376 
1377  case 0: /* empty queue */
1378  {
1379  kmp_int32 grabbed_lock;
1380 
1381 #ifdef DEBUG_QUEUING_LOCKS
1382  tail = *tail_id_p;
1383  TRACE_LOCK_HT( gtid+1, "acq read: ", head, tail );
1384 #endif
1385  /* try (0,0)->(-1,0) */
1386 
1387  /* only legal transition out of head = 0 is head = -1 with no change to tail */
1388  grabbed_lock = KMP_COMPARE_AND_STORE_ACQ32( head_id_p, 0, -1 );
1389 
1390  if ( grabbed_lock ) {
1391 
1392  *spin_here_p = FALSE;
1393 
1394  KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: no queuing\n",
1395  lck, gtid ));
1396 #ifdef DEBUG_QUEUING_LOCKS
1397  TRACE_LOCK_HT( gtid+1, "acq exit: ", head, 0 );
1398 #endif
1399  KMP_FSYNC_ACQUIRED( lck );
1400  return; /* lock holder cannot be on queue */
1401  }
1402  enqueued = FALSE;
1403  }
1404  break;
1405  }
1406 
1407  if ( enqueued ) {
1408  if ( tail > 0 ) {
1409  kmp_info_t *tail_thr = __kmp_thread_from_gtid( tail - 1 );
1410  KMP_ASSERT( tail_thr != NULL );
1411  tail_thr->th.th_next_waiting = gtid+1;
1412  /* corresponding wait for this write in release code */
1413  }
1414  KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d waiting for lock\n", lck, gtid ));
1415 
1416 
1417  /* ToDo: May want to consider using __kmp_wait_sleep or something that sleeps for
1418  * throughput only here.
1419  */
1420  KMP_MB();
1421  KMP_WAIT_YIELD(spin_here_p, FALSE, KMP_EQ, lck);
1422 
1423 #ifdef DEBUG_QUEUING_LOCKS
1424  TRACE_LOCK( gtid+1, "acq spin" );
1425 
1426  if ( this_thr->th.th_next_waiting != 0 )
1427  __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1428 #endif
1429  KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 );
1430  KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: after waiting on queue\n",
1431  lck, gtid ));
1432 
1433 #ifdef DEBUG_QUEUING_LOCKS
1434  TRACE_LOCK( gtid+1, "acq exit 2" );
1435 #endif
1436  /* got lock, we were dequeued by the thread that released lock */
1437  return;
1438  }
1439 
1440  /* Yield if number of threads > number of logical processors */
1441  /* ToDo: Not sure why this should only be in oversubscription case,
1442  maybe should be traditional YIELD_INIT/YIELD_WHEN loop */
1443  KMP_YIELD( TCR_4( __kmp_nth ) > (__kmp_avail_proc ? __kmp_avail_proc :
1444  __kmp_xproc ) );
1445 #ifdef DEBUG_QUEUING_LOCKS
1446  TRACE_LOCK( gtid+1, "acq retry" );
1447 #endif
1448 
1449  }
1450  KMP_ASSERT2( 0, "should not get here" );
1451 }
1452 
1453 void
1454 __kmp_acquire_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1455 {
1456  KMP_DEBUG_ASSERT( gtid >= 0 );
1457 
1458  __kmp_acquire_queuing_lock_timed_template<false>( lck, gtid );
1459 }
1460 
1461 static void
1462 __kmp_acquire_queuing_lock_with_checks( kmp_queuing_lock_t *lck,
1463  kmp_int32 gtid )
1464 {
1465  if ( __kmp_env_consistency_check ) {
1466  char const * const func = "omp_set_lock";
1467  if ( lck->lk.initialized != lck ) {
1468  KMP_FATAL( LockIsUninitialized, func );
1469  }
1470  if ( __kmp_is_queuing_lock_nestable( lck ) ) {
1471  KMP_FATAL( LockNestableUsedAsSimple, func );
1472  }
1473  if ( __kmp_get_queuing_lock_owner( lck ) == gtid ) {
1474  KMP_FATAL( LockIsAlreadyOwned, func );
1475  }
1476  }
1477 
1478  __kmp_acquire_queuing_lock( lck, gtid );
1479 
1480  if ( __kmp_env_consistency_check ) {
1481  lck->lk.owner_id = gtid + 1;
1482  }
1483 }
1484 
1485 int
1486 __kmp_test_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1487 {
1488  volatile kmp_int32 *head_id_p = & lck->lk.head_id;
1489  kmp_int32 head;
1490 #ifdef KMP_DEBUG
1491  kmp_info_t *this_thr;
1492 #endif
1493 
1494  KA_TRACE( 1000, ("__kmp_test_queuing_lock: T#%d entering\n", gtid ));
1495  KMP_DEBUG_ASSERT( gtid >= 0 );
1496 #ifdef KMP_DEBUG
1497  this_thr = __kmp_thread_from_gtid( gtid );
1498  KMP_DEBUG_ASSERT( this_thr != NULL );
1499  KMP_DEBUG_ASSERT( !this_thr->th.th_spin_here );
1500 #endif
1501 
1502  head = *head_id_p;
1503 
1504  if ( head == 0 ) { /* nobody on queue, nobody holding */
1505 
1506  /* try (0,0)->(-1,0) */
1507 
1508  if ( KMP_COMPARE_AND_STORE_ACQ32( head_id_p, 0, -1 ) ) {
1509  KA_TRACE( 1000, ("__kmp_test_queuing_lock: T#%d exiting: holding lock\n", gtid ));
1510  KMP_FSYNC_ACQUIRED(lck);
1511  return TRUE;
1512  }
1513  }
1514 
1515  KA_TRACE( 1000, ("__kmp_test_queuing_lock: T#%d exiting: without lock\n", gtid ));
1516  return FALSE;
1517 }
1518 
1519 static int
1520 __kmp_test_queuing_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1521 {
1522  if ( __kmp_env_consistency_check ) {
1523  char const * const func = "omp_test_lock";
1524  if ( lck->lk.initialized != lck ) {
1525  KMP_FATAL( LockIsUninitialized, func );
1526  }
1527  if ( __kmp_is_queuing_lock_nestable( lck ) ) {
1528  KMP_FATAL( LockNestableUsedAsSimple, func );
1529  }
1530  }
1531 
1532  int retval = __kmp_test_queuing_lock( lck, gtid );
1533 
1534  if ( __kmp_env_consistency_check && retval ) {
1535  lck->lk.owner_id = gtid + 1;
1536  }
1537  return retval;
1538 }
1539 
1540 void
1541 __kmp_release_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1542 {
1543  register kmp_info_t *this_thr;
1544  volatile kmp_int32 *head_id_p = & lck->lk.head_id;
1545  volatile kmp_int32 *tail_id_p = & lck->lk.tail_id;
1546 
1547  KA_TRACE( 1000, ("__kmp_release_queuing_lock: lck:%p, T#%d entering\n", lck, gtid ));
1548  KMP_DEBUG_ASSERT( gtid >= 0 );
1549  this_thr = __kmp_thread_from_gtid( gtid );
1550  KMP_DEBUG_ASSERT( this_thr != NULL );
1551 #ifdef DEBUG_QUEUING_LOCKS
1552  TRACE_LOCK( gtid+1, "rel ent" );
1553 
1554  if ( this_thr->th.th_spin_here )
1555  __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1556  if ( this_thr->th.th_next_waiting != 0 )
1557  __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1558 #endif
1559  KMP_DEBUG_ASSERT( !this_thr->th.th_spin_here );
1560  KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 );
1561 
1562  KMP_FSYNC_RELEASING(lck);
1563 
1564  while( 1 ) {
1565  kmp_int32 dequeued;
1566  kmp_int32 head;
1567  kmp_int32 tail;
1568 
1569  head = *head_id_p;
1570 
1571 #ifdef DEBUG_QUEUING_LOCKS
1572  tail = *tail_id_p;
1573  TRACE_LOCK_HT( gtid+1, "rel read: ", head, tail );
1574  if ( head == 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail );
1575 #endif
1576  KMP_DEBUG_ASSERT( head != 0 ); /* holding the lock, head must be -1 or queue head */
1577 
1578  if ( head == -1 ) { /* nobody on queue */
1579 
1580  /* try (-1,0)->(0,0) */
1581  if ( KMP_COMPARE_AND_STORE_REL32( head_id_p, -1, 0 ) ) {
1582  KA_TRACE( 1000, ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: queue empty\n",
1583  lck, gtid ));
1584 #ifdef DEBUG_QUEUING_LOCKS
1585  TRACE_LOCK_HT( gtid+1, "rel exit: ", 0, 0 );
1586 #endif
1587  return;
1588  }
1589  dequeued = FALSE;
1590 
1591  }
1592  else {
1593 
1594  tail = *tail_id_p;
1595  if ( head == tail ) { /* only one thread on the queue */
1596 
1597 #ifdef DEBUG_QUEUING_LOCKS
1598  if ( head <= 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail );
1599 #endif
1600  KMP_DEBUG_ASSERT( head > 0 );
1601 
1602  /* try (h,h)->(-1,0) */
1603  dequeued = KMP_COMPARE_AND_STORE_REL64( (kmp_int64 *) tail_id_p,
1604  KMP_PACK_64( head, head ), KMP_PACK_64( -1, 0 ) );
1605 #ifdef DEBUG_QUEUING_LOCKS
1606  TRACE_LOCK( gtid+1, "rel deq: (h,h)->(-1,0)" );
1607 #endif
1608 
1609  }
1610  else {
1611  volatile kmp_int32 *waiting_id_p;
1612  kmp_info_t *head_thr = __kmp_thread_from_gtid( head - 1 );
1613  KMP_DEBUG_ASSERT( head_thr != NULL );
1614  waiting_id_p = & head_thr->th.th_next_waiting;
1615 
1616  /* Does this require synchronous reads? */
1617 #ifdef DEBUG_QUEUING_LOCKS
1618  if ( head <= 0 || tail <= 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail );
1619 #endif
1620  KMP_DEBUG_ASSERT( head > 0 && tail > 0 );
1621 
1622  /* try (h,t)->(h',t) or (t,t) */
1623 
1624  KMP_MB();
1625  /* make sure enqueuing thread has time to update next waiting thread field */
1626  *head_id_p = (kmp_int32) KMP_WAIT_YIELD((volatile kmp_uint*) waiting_id_p, 0, KMP_NEQ, NULL);
1627 #ifdef DEBUG_QUEUING_LOCKS
1628  TRACE_LOCK( gtid+1, "rel deq: (h,t)->(h',t)" );
1629 #endif
1630  dequeued = TRUE;
1631  }
1632  }
1633 
1634  if ( dequeued ) {
1635  kmp_info_t *head_thr = __kmp_thread_from_gtid( head - 1 );
1636  KMP_DEBUG_ASSERT( head_thr != NULL );
1637 
1638  /* Does this require synchronous reads? */
1639 #ifdef DEBUG_QUEUING_LOCKS
1640  if ( head <= 0 || tail <= 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail );
1641 #endif
1642  KMP_DEBUG_ASSERT( head > 0 && tail > 0 );
1643 
1644  /* For clean code only.
1645  * Thread not released until next statement prevents race with acquire code.
1646  */
1647  head_thr->th.th_next_waiting = 0;
1648 #ifdef DEBUG_QUEUING_LOCKS
1649  TRACE_LOCK_T( gtid+1, "rel nw=0 for t=", head );
1650 #endif
1651 
1652  KMP_MB();
1653  /* reset spin value */
1654  head_thr->th.th_spin_here = FALSE;
1655 
1656  KA_TRACE( 1000, ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: after dequeuing\n",
1657  lck, gtid ));
1658 #ifdef DEBUG_QUEUING_LOCKS
1659  TRACE_LOCK( gtid+1, "rel exit 2" );
1660 #endif
1661  return;
1662  }
1663  /* KMP_CPU_PAUSE( ); don't want to make releasing thread hold up acquiring threads */
1664 
1665 #ifdef DEBUG_QUEUING_LOCKS
1666  TRACE_LOCK( gtid+1, "rel retry" );
1667 #endif
1668 
1669  } /* while */
1670  KMP_ASSERT2( 0, "should not get here" );
1671 }
1672 
1673 static void
1674 __kmp_release_queuing_lock_with_checks( kmp_queuing_lock_t *lck,
1675  kmp_int32 gtid )
1676 {
1677  if ( __kmp_env_consistency_check ) {
1678  char const * const func = "omp_unset_lock";
1679  KMP_MB(); /* in case another processor initialized lock */
1680  if ( lck->lk.initialized != lck ) {
1681  KMP_FATAL( LockIsUninitialized, func );
1682  }
1683  if ( __kmp_is_queuing_lock_nestable( lck ) ) {
1684  KMP_FATAL( LockNestableUsedAsSimple, func );
1685  }
1686  if ( __kmp_get_queuing_lock_owner( lck ) == -1 ) {
1687  KMP_FATAL( LockUnsettingFree, func );
1688  }
1689  if ( __kmp_get_queuing_lock_owner( lck ) != gtid ) {
1690  KMP_FATAL( LockUnsettingSetByAnother, func );
1691  }
1692  lck->lk.owner_id = 0;
1693  }
1694  __kmp_release_queuing_lock( lck, gtid );
1695 }
1696 
1697 void
1698 __kmp_init_queuing_lock( kmp_queuing_lock_t *lck )
1699 {
1700  lck->lk.location = NULL;
1701  lck->lk.head_id = 0;
1702  lck->lk.tail_id = 0;
1703  lck->lk.next_ticket = 0;
1704  lck->lk.now_serving = 0;
1705  lck->lk.owner_id = 0; // no thread owns the lock.
1706  lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks.
1707  lck->lk.initialized = lck;
1708 
1709  KA_TRACE(1000, ("__kmp_init_queuing_lock: lock %p initialized\n", lck));
1710 }
1711 
1712 static void
1713 __kmp_init_queuing_lock_with_checks( kmp_queuing_lock_t * lck )
1714 {
1715  __kmp_init_queuing_lock( lck );
1716 }
1717 
1718 void
1719 __kmp_destroy_queuing_lock( kmp_queuing_lock_t *lck )
1720 {
1721  lck->lk.initialized = NULL;
1722  lck->lk.location = NULL;
1723  lck->lk.head_id = 0;
1724  lck->lk.tail_id = 0;
1725  lck->lk.next_ticket = 0;
1726  lck->lk.now_serving = 0;
1727  lck->lk.owner_id = 0;
1728  lck->lk.depth_locked = -1;
1729 }
1730 
1731 static void
1732 __kmp_destroy_queuing_lock_with_checks( kmp_queuing_lock_t *lck )
1733 {
1734  if ( __kmp_env_consistency_check ) {
1735  char const * const func = "omp_destroy_lock";
1736  if ( lck->lk.initialized != lck ) {
1737  KMP_FATAL( LockIsUninitialized, func );
1738  }
1739  if ( __kmp_is_queuing_lock_nestable( lck ) ) {
1740  KMP_FATAL( LockNestableUsedAsSimple, func );
1741  }
1742  if ( __kmp_get_queuing_lock_owner( lck ) != -1 ) {
1743  KMP_FATAL( LockStillOwned, func );
1744  }
1745  }
1746  __kmp_destroy_queuing_lock( lck );
1747 }
1748 
1749 
1750 //
1751 // nested queuing locks
1752 //
1753 
1754 void
1755 __kmp_acquire_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1756 {
1757  KMP_DEBUG_ASSERT( gtid >= 0 );
1758 
1759  if ( __kmp_get_queuing_lock_owner( lck ) == gtid ) {
1760  lck->lk.depth_locked += 1;
1761  }
1762  else {
1763  __kmp_acquire_queuing_lock_timed_template<false>( lck, gtid );
1764  KMP_MB();
1765  lck->lk.depth_locked = 1;
1766  KMP_MB();
1767  lck->lk.owner_id = gtid + 1;
1768  }
1769 }
1770 
1771 static void
1772 __kmp_acquire_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1773 {
1774  if ( __kmp_env_consistency_check ) {
1775  char const * const func = "omp_set_nest_lock";
1776  if ( lck->lk.initialized != lck ) {
1777  KMP_FATAL( LockIsUninitialized, func );
1778  }
1779  if ( ! __kmp_is_queuing_lock_nestable( lck ) ) {
1780  KMP_FATAL( LockSimpleUsedAsNestable, func );
1781  }
1782  }
1783  __kmp_acquire_nested_queuing_lock( lck, gtid );
1784 }
1785 
1786 int
1787 __kmp_test_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1788 {
1789  int retval;
1790 
1791  KMP_DEBUG_ASSERT( gtid >= 0 );
1792 
1793  if ( __kmp_get_queuing_lock_owner( lck ) == gtid ) {
1794  retval = ++lck->lk.depth_locked;
1795  }
1796  else if ( !__kmp_test_queuing_lock( lck, gtid ) ) {
1797  retval = 0;
1798  }
1799  else {
1800  KMP_MB();
1801  retval = lck->lk.depth_locked = 1;
1802  KMP_MB();
1803  lck->lk.owner_id = gtid + 1;
1804  }
1805  return retval;
1806 }
1807 
1808 static int
1809 __kmp_test_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck,
1810  kmp_int32 gtid )
1811 {
1812  if ( __kmp_env_consistency_check ) {
1813  char const * const func = "omp_test_nest_lock";
1814  if ( lck->lk.initialized != lck ) {
1815  KMP_FATAL( LockIsUninitialized, func );
1816  }
1817  if ( ! __kmp_is_queuing_lock_nestable( lck ) ) {
1818  KMP_FATAL( LockSimpleUsedAsNestable, func );
1819  }
1820  }
1821  return __kmp_test_nested_queuing_lock( lck, gtid );
1822 }
1823 
1824 void
1825 __kmp_release_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1826 {
1827  KMP_DEBUG_ASSERT( gtid >= 0 );
1828 
1829  KMP_MB();
1830  if ( --(lck->lk.depth_locked) == 0 ) {
1831  KMP_MB();
1832  lck->lk.owner_id = 0;
1833  __kmp_release_queuing_lock( lck, gtid );
1834  }
1835 }
1836 
1837 static void
1838 __kmp_release_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1839 {
1840  if ( __kmp_env_consistency_check ) {
1841  char const * const func = "omp_unset_nest_lock";
1842  KMP_MB(); /* in case another processor initialized lock */
1843  if ( lck->lk.initialized != lck ) {
1844  KMP_FATAL( LockIsUninitialized, func );
1845  }
1846  if ( ! __kmp_is_queuing_lock_nestable( lck ) ) {
1847  KMP_FATAL( LockSimpleUsedAsNestable, func );
1848  }
1849  if ( __kmp_get_queuing_lock_owner( lck ) == -1 ) {
1850  KMP_FATAL( LockUnsettingFree, func );
1851  }
1852  if ( __kmp_get_queuing_lock_owner( lck ) != gtid ) {
1853  KMP_FATAL( LockUnsettingSetByAnother, func );
1854  }
1855  }
1856  __kmp_release_nested_queuing_lock( lck, gtid );
1857 }
1858 
1859 void
1860 __kmp_init_nested_queuing_lock( kmp_queuing_lock_t * lck )
1861 {
1862  __kmp_init_queuing_lock( lck );
1863  lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
1864 }
1865 
1866 static void
1867 __kmp_init_nested_queuing_lock_with_checks( kmp_queuing_lock_t * lck )
1868 {
1869  __kmp_init_nested_queuing_lock( lck );
1870 }
1871 
1872 void
1873 __kmp_destroy_nested_queuing_lock( kmp_queuing_lock_t *lck )
1874 {
1875  __kmp_destroy_queuing_lock( lck );
1876  lck->lk.depth_locked = 0;
1877 }
1878 
1879 static void
1880 __kmp_destroy_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck )
1881 {
1882  if ( __kmp_env_consistency_check ) {
1883  char const * const func = "omp_destroy_nest_lock";
1884  if ( lck->lk.initialized != lck ) {
1885  KMP_FATAL( LockIsUninitialized, func );
1886  }
1887  if ( ! __kmp_is_queuing_lock_nestable( lck ) ) {
1888  KMP_FATAL( LockSimpleUsedAsNestable, func );
1889  }
1890  if ( __kmp_get_queuing_lock_owner( lck ) != -1 ) {
1891  KMP_FATAL( LockStillOwned, func );
1892  }
1893  }
1894  __kmp_destroy_nested_queuing_lock( lck );
1895 }
1896 
1897 
1898 //
1899 // access functions to fields which don't exist for all lock kinds.
1900 //
1901 
1902 static int
1903 __kmp_is_queuing_lock_initialized( kmp_queuing_lock_t *lck )
1904 {
1905  return lck == lck->lk.initialized;
1906 }
1907 
1908 static const ident_t *
1909 __kmp_get_queuing_lock_location( kmp_queuing_lock_t *lck )
1910 {
1911  return lck->lk.location;
1912 }
1913 
1914 static void
1915 __kmp_set_queuing_lock_location( kmp_queuing_lock_t *lck, const ident_t *loc )
1916 {
1917  lck->lk.location = loc;
1918 }
1919 
1920 static kmp_lock_flags_t
1921 __kmp_get_queuing_lock_flags( kmp_queuing_lock_t *lck )
1922 {
1923  return lck->lk.flags;
1924 }
1925 
1926 static void
1927 __kmp_set_queuing_lock_flags( kmp_queuing_lock_t *lck, kmp_lock_flags_t flags )
1928 {
1929  lck->lk.flags = flags;
1930 }
1931 
1932 #if KMP_USE_ADAPTIVE_LOCKS
1933 
1934 /*
1935  RTM Adaptive locks
1936 */
1937 
1938 // TODO: Use the header for intrinsics below with the compiler 13.0
1939 //#include <immintrin.h>
1940 
1941 // Values from the status register after failed speculation.
1942 #define _XBEGIN_STARTED (~0u)
1943 #define _XABORT_EXPLICIT (1 << 0)
1944 #define _XABORT_RETRY (1 << 1)
1945 #define _XABORT_CONFLICT (1 << 2)
1946 #define _XABORT_CAPACITY (1 << 3)
1947 #define _XABORT_DEBUG (1 << 4)
1948 #define _XABORT_NESTED (1 << 5)
1949 #define _XABORT_CODE(x) ((unsigned char)(((x) >> 24) & 0xFF))
1950 
1951 // Aborts for which it's worth trying again immediately
1952 #define SOFT_ABORT_MASK (_XABORT_RETRY | _XABORT_CONFLICT | _XABORT_EXPLICIT)
1953 
1954 #define STRINGIZE_INTERNAL(arg) #arg
1955 #define STRINGIZE(arg) STRINGIZE_INTERNAL(arg)
1956 
1957 // Access to RTM instructions
1958 
1959 /*
1960  A version of XBegin which returns -1 on speculation, and the value of EAX on an abort.
1961  This is the same definition as the compiler intrinsic that will be supported at some point.
1962 */
1963 static __inline int _xbegin()
1964 {
1965  int res = -1;
1966 
1967 #if KMP_OS_WINDOWS
1968 #if KMP_ARCH_X86_64
1969  _asm {
1970  _emit 0xC7
1971  _emit 0xF8
1972  _emit 2
1973  _emit 0
1974  _emit 0
1975  _emit 0
1976  jmp L2
1977  mov res, eax
1978  L2:
1979  }
1980 #else /* IA32 */
1981  _asm {
1982  _emit 0xC7
1983  _emit 0xF8
1984  _emit 2
1985  _emit 0
1986  _emit 0
1987  _emit 0
1988  jmp L2
1989  mov res, eax
1990  L2:
1991  }
1992 #endif // KMP_ARCH_X86_64
1993 #else
1994  /* Note that %eax must be noted as killed (clobbered), because
1995  * the XSR is returned in %eax(%rax) on abort. Other register
1996  * values are restored, so don't need to be killed.
1997  *
1998  * We must also mark 'res' as an input and an output, since otherwise
1999  * 'res=-1' may be dropped as being dead, whereas we do need the
2000  * assignment on the successful (i.e., non-abort) path.
2001  */
2002  __asm__ volatile ("1: .byte 0xC7; .byte 0xF8;\n"
2003  " .long 1f-1b-6\n"
2004  " jmp 2f\n"
2005  "1: movl %%eax,%0\n"
2006  "2:"
2007  :"+r"(res)::"memory","%eax");
2008 #endif // KMP_OS_WINDOWS
2009  return res;
2010 }
2011 
2012 /*
2013  Transaction end
2014 */
2015 static __inline void _xend()
2016 {
2017 #if KMP_OS_WINDOWS
2018  __asm {
2019  _emit 0x0f
2020  _emit 0x01
2021  _emit 0xd5
2022  }
2023 #else
2024  __asm__ volatile (".byte 0x0f; .byte 0x01; .byte 0xd5" :::"memory");
2025 #endif
2026 }
2027 
2028 /*
2029  This is a macro, the argument must be a single byte constant which
2030  can be evaluated by the inline assembler, since it is emitted as a
2031  byte into the assembly code.
2032 */
2033 #if KMP_OS_WINDOWS
2034 #define _xabort(ARG) \
2035  _asm _emit 0xc6 \
2036  _asm _emit 0xf8 \
2037  _asm _emit ARG
2038 #else
2039 #define _xabort(ARG) \
2040  __asm__ volatile (".byte 0xC6; .byte 0xF8; .byte " STRINGIZE(ARG) :::"memory");
2041 #endif
2042 
2043 //
2044 // Statistics is collected for testing purpose
2045 //
2046 #if KMP_DEBUG_ADAPTIVE_LOCKS
2047 
2048 // We accumulate speculative lock statistics when the lock is destroyed.
2049 // We keep locks that haven't been destroyed in the liveLocks list
2050 // so that we can grab their statistics too.
2051 static kmp_adaptive_lock_statistics_t destroyedStats;
2052 
2053 // To hold the list of live locks.
2054 static kmp_adaptive_lock_t liveLocks;
2055 
2056 // A lock so we can safely update the list of locks.
2057 static kmp_bootstrap_lock_t chain_lock;
2058 
2059 // Initialize the list of stats.
2060 void
2061 __kmp_init_speculative_stats()
2062 {
2063  kmp_adaptive_lock *lck = &liveLocks;
2064 
2065  memset( ( void * ) & ( lck->stats ), 0, sizeof( lck->stats ) );
2066  lck->stats.next = lck;
2067  lck->stats.prev = lck;
2068 
2069  KMP_ASSERT( lck->stats.next->stats.prev == lck );
2070  KMP_ASSERT( lck->stats.prev->stats.next == lck );
2071 
2072  __kmp_init_bootstrap_lock( &chain_lock );
2073 
2074 }
2075 
2076 // Insert the lock into the circular list
2077 static void
2078 __kmp_remember_lock( kmp_adaptive_lock * lck )
2079 {
2080  __kmp_acquire_bootstrap_lock( &chain_lock );
2081 
2082  lck->stats.next = liveLocks.stats.next;
2083  lck->stats.prev = &liveLocks;
2084 
2085  liveLocks.stats.next = lck;
2086  lck->stats.next->stats.prev = lck;
2087 
2088  KMP_ASSERT( lck->stats.next->stats.prev == lck );
2089  KMP_ASSERT( lck->stats.prev->stats.next == lck );
2090 
2091  __kmp_release_bootstrap_lock( &chain_lock );
2092 }
2093 
2094 static void
2095 __kmp_forget_lock( kmp_adaptive_lock * lck )
2096 {
2097  KMP_ASSERT( lck->stats.next->stats.prev == lck );
2098  KMP_ASSERT( lck->stats.prev->stats.next == lck );
2099 
2100  kmp_adaptive_lock * n = lck->stats.next;
2101  kmp_adaptive_lock * p = lck->stats.prev;
2102 
2103  n->stats.prev = p;
2104  p->stats.next = n;
2105 }
2106 
2107 static void
2108 __kmp_zero_speculative_stats( kmp_adaptive_lock * lck )
2109 {
2110  memset( ( void * )&lck->stats, 0, sizeof( lck->stats ) );
2111  __kmp_remember_lock( lck );
2112 }
2113 
2114 static void
2115 __kmp_add_stats( kmp_adaptive_lock_statistics_t * t, kmp_adaptive_lock_t * lck )
2116 {
2117  kmp_adaptive_lock_statistics_t volatile *s = &lck->stats;
2118 
2119  t->nonSpeculativeAcquireAttempts += lck->acquire_attempts;
2120  t->successfulSpeculations += s->successfulSpeculations;
2121  t->hardFailedSpeculations += s->hardFailedSpeculations;
2122  t->softFailedSpeculations += s->softFailedSpeculations;
2123  t->nonSpeculativeAcquires += s->nonSpeculativeAcquires;
2124  t->lemmingYields += s->lemmingYields;
2125 }
2126 
2127 static void
2128 __kmp_accumulate_speculative_stats( kmp_adaptive_lock * lck)
2129 {
2130  kmp_adaptive_lock_statistics_t *t = &destroyedStats;
2131 
2132  __kmp_acquire_bootstrap_lock( &chain_lock );
2133 
2134  __kmp_add_stats( &destroyedStats, lck );
2135  __kmp_forget_lock( lck );
2136 
2137  __kmp_release_bootstrap_lock( &chain_lock );
2138 }
2139 
2140 static float
2141 percent (kmp_uint32 count, kmp_uint32 total)
2142 {
2143  return (total == 0) ? 0.0: (100.0 * count)/total;
2144 }
2145 
2146 static
2147 FILE * __kmp_open_stats_file()
2148 {
2149  if (strcmp (__kmp_speculative_statsfile, "-") == 0)
2150  return stdout;
2151 
2152  size_t buffLen = strlen( __kmp_speculative_statsfile ) + 20;
2153  char buffer[buffLen];
2154  snprintf (&buffer[0], buffLen, __kmp_speculative_statsfile, getpid());
2155  FILE * result = fopen(&buffer[0], "w");
2156 
2157  // Maybe we should issue a warning here...
2158  return result ? result : stdout;
2159 }
2160 
2161 void
2162 __kmp_print_speculative_stats()
2163 {
2164  if (__kmp_user_lock_kind != lk_adaptive)
2165  return;
2166 
2167  FILE * statsFile = __kmp_open_stats_file();
2168 
2169  kmp_adaptive_lock_statistics_t total = destroyedStats;
2170  kmp_adaptive_lock *lck;
2171 
2172  for (lck = liveLocks.stats.next; lck != &liveLocks; lck = lck->stats.next) {
2173  __kmp_add_stats( &total, lck );
2174  }
2175  kmp_adaptive_lock_statistics_t *t = &total;
2176  kmp_uint32 totalSections = t->nonSpeculativeAcquires + t->successfulSpeculations;
2177  kmp_uint32 totalSpeculations = t->successfulSpeculations + t->hardFailedSpeculations +
2178  t->softFailedSpeculations;
2179 
2180  fprintf ( statsFile, "Speculative lock statistics (all approximate!)\n");
2181  fprintf ( statsFile, " Lock parameters: \n"
2182  " max_soft_retries : %10d\n"
2183  " max_badness : %10d\n",
2184  __kmp_adaptive_backoff_params.max_soft_retries,
2185  __kmp_adaptive_backoff_params.max_badness);
2186  fprintf( statsFile, " Non-speculative acquire attempts : %10d\n", t->nonSpeculativeAcquireAttempts );
2187  fprintf( statsFile, " Total critical sections : %10d\n", totalSections );
2188  fprintf( statsFile, " Successful speculations : %10d (%5.1f%%)\n",
2189  t->successfulSpeculations, percent( t->successfulSpeculations, totalSections ) );
2190  fprintf( statsFile, " Non-speculative acquires : %10d (%5.1f%%)\n",
2191  t->nonSpeculativeAcquires, percent( t->nonSpeculativeAcquires, totalSections ) );
2192  fprintf( statsFile, " Lemming yields : %10d\n\n", t->lemmingYields );
2193 
2194  fprintf( statsFile, " Speculative acquire attempts : %10d\n", totalSpeculations );
2195  fprintf( statsFile, " Successes : %10d (%5.1f%%)\n",
2196  t->successfulSpeculations, percent( t->successfulSpeculations, totalSpeculations ) );
2197  fprintf( statsFile, " Soft failures : %10d (%5.1f%%)\n",
2198  t->softFailedSpeculations, percent( t->softFailedSpeculations, totalSpeculations ) );
2199  fprintf( statsFile, " Hard failures : %10d (%5.1f%%)\n",
2200  t->hardFailedSpeculations, percent( t->hardFailedSpeculations, totalSpeculations ) );
2201 
2202  if (statsFile != stdout)
2203  fclose( statsFile );
2204 }
2205 
2206 # define KMP_INC_STAT(lck,stat) ( lck->lk.adaptive.stats.stat++ )
2207 #else
2208 # define KMP_INC_STAT(lck,stat)
2209 
2210 #endif // KMP_DEBUG_ADAPTIVE_LOCKS
2211 
2212 static inline bool
2213 __kmp_is_unlocked_queuing_lock( kmp_queuing_lock_t *lck )
2214 {
2215  // It is enough to check that the head_id is zero.
2216  // We don't also need to check the tail.
2217  bool res = lck->lk.head_id == 0;
2218 
2219  // We need a fence here, since we must ensure that no memory operations
2220  // from later in this thread float above that read.
2221 #if KMP_COMPILER_ICC
2222  _mm_mfence();
2223 #else
2224  __sync_synchronize();
2225 #endif
2226 
2227  return res;
2228 }
2229 
2230 // Functions for manipulating the badness
2231 static __inline void
2232 __kmp_update_badness_after_success( kmp_queuing_lock_t *lck )
2233 {
2234  // Reset the badness to zero so we eagerly try to speculate again
2235  lck->lk.adaptive.badness = 0;
2236  KMP_INC_STAT(lck,successfulSpeculations);
2237 }
2238 
2239 // Create a bit mask with one more set bit.
2240 static __inline void
2241 __kmp_step_badness( kmp_queuing_lock_t *lck )
2242 {
2243  kmp_uint32 newBadness = ( lck->lk.adaptive.badness << 1 ) | 1;
2244  if ( newBadness > lck->lk.adaptive.max_badness) {
2245  return;
2246  } else {
2247  lck->lk.adaptive.badness = newBadness;
2248  }
2249 }
2250 
2251 // Check whether speculation should be attempted.
2252 static __inline int
2253 __kmp_should_speculate( kmp_queuing_lock_t *lck, kmp_int32 gtid )
2254 {
2255  kmp_uint32 badness = lck->lk.adaptive.badness;
2256  kmp_uint32 attempts= lck->lk.adaptive.acquire_attempts;
2257  int res = (attempts & badness) == 0;
2258  return res;
2259 }
2260 
2261 // Attempt to acquire only the speculative lock.
2262 // Does not back off to the non-speculative lock.
2263 //
2264 static int
2265 __kmp_test_adaptive_lock_only( kmp_queuing_lock_t * lck, kmp_int32 gtid )
2266 {
2267  int retries = lck->lk.adaptive.max_soft_retries;
2268 
2269  // We don't explicitly count the start of speculation, rather we record
2270  // the results (success, hard fail, soft fail). The sum of all of those
2271  // is the total number of times we started speculation since all
2272  // speculations must end one of those ways.
2273  do
2274  {
2275  kmp_uint32 status = _xbegin();
2276  // Switch this in to disable actual speculation but exercise
2277  // at least some of the rest of the code. Useful for debugging...
2278  // kmp_uint32 status = _XABORT_NESTED;
2279 
2280  if (status == _XBEGIN_STARTED )
2281  { /* We have successfully started speculation
2282  * Check that no-one acquired the lock for real between when we last looked
2283  * and now. This also gets the lock cache line into our read-set,
2284  * which we need so that we'll abort if anyone later claims it for real.
2285  */
2286  if (! __kmp_is_unlocked_queuing_lock( lck ) )
2287  {
2288  // Lock is now visibly acquired, so someone beat us to it.
2289  // Abort the transaction so we'll restart from _xbegin with the
2290  // failure status.
2291  _xabort(0x01)
2292  KMP_ASSERT2( 0, "should not get here" );
2293  }
2294  return 1; // Lock has been acquired (speculatively)
2295  } else {
2296  // We have aborted, update the statistics
2297  if ( status & SOFT_ABORT_MASK)
2298  {
2299  KMP_INC_STAT(lck,softFailedSpeculations);
2300  // and loop round to retry.
2301  }
2302  else
2303  {
2304  KMP_INC_STAT(lck,hardFailedSpeculations);
2305  // Give up if we had a hard failure.
2306  break;
2307  }
2308  }
2309  } while( retries-- ); // Loop while we have retries, and didn't fail hard.
2310 
2311  // Either we had a hard failure or we didn't succeed softly after
2312  // the full set of attempts, so back off the badness.
2313  __kmp_step_badness( lck );
2314  return 0;
2315 }
2316 
2317 // Attempt to acquire the speculative lock, or back off to the non-speculative one
2318 // if the speculative lock cannot be acquired.
2319 // We can succeed speculatively, non-speculatively, or fail.
2320 static int
2321 __kmp_test_adaptive_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
2322 {
2323  // First try to acquire the lock speculatively
2324  if ( __kmp_should_speculate( lck, gtid ) && __kmp_test_adaptive_lock_only( lck, gtid ) )
2325  return 1;
2326 
2327  // Speculative acquisition failed, so try to acquire it non-speculatively.
2328  // Count the non-speculative acquire attempt
2329  lck->lk.adaptive.acquire_attempts++;
2330 
2331  // Use base, non-speculative lock.
2332  if ( __kmp_test_queuing_lock( lck, gtid ) )
2333  {
2334  KMP_INC_STAT(lck,nonSpeculativeAcquires);
2335  return 1; // Lock is acquired (non-speculatively)
2336  }
2337  else
2338  {
2339  return 0; // Failed to acquire the lock, it's already visibly locked.
2340  }
2341 }
2342 
2343 static int
2344 __kmp_test_adaptive_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid )
2345 {
2346  if ( __kmp_env_consistency_check ) {
2347  char const * const func = "omp_test_lock";
2348  if ( lck->lk.initialized != lck ) {
2349  KMP_FATAL( LockIsUninitialized, func );
2350  }
2351  }
2352 
2353  int retval = __kmp_test_adaptive_lock( lck, gtid );
2354 
2355  if ( __kmp_env_consistency_check && retval ) {
2356  lck->lk.owner_id = gtid + 1;
2357  }
2358  return retval;
2359 }
2360 
2361 // Block until we can acquire a speculative, adaptive lock.
2362 // We check whether we should be trying to speculate.
2363 // If we should be, we check the real lock to see if it is free,
2364 // and, if not, pause without attempting to acquire it until it is.
2365 // Then we try the speculative acquire.
2366 // This means that although we suffer from lemmings a little (
2367 // because all we can't acquire the lock speculatively until
2368 // the queue of threads waiting has cleared), we don't get into a
2369 // state where we can never acquire the lock speculatively (because we
2370 // force the queue to clear by preventing new arrivals from entering the
2371 // queue).
2372 // This does mean that when we're trying to break lemmings, the lock
2373 // is no longer fair. However OpenMP makes no guarantee that its
2374 // locks are fair, so this isn't a real problem.
2375 static void
2376 __kmp_acquire_adaptive_lock( kmp_queuing_lock_t * lck, kmp_int32 gtid )
2377 {
2378  if ( __kmp_should_speculate( lck, gtid ) )
2379  {
2380  if ( __kmp_is_unlocked_queuing_lock( lck ) )
2381  {
2382  if ( __kmp_test_adaptive_lock_only( lck , gtid ) )
2383  return;
2384  // We tried speculation and failed, so give up.
2385  }
2386  else
2387  {
2388  // We can't try speculation until the lock is free, so we
2389  // pause here (without suspending on the queueing lock,
2390  // to allow it to drain, then try again.
2391  // All other threads will also see the same result for
2392  // shouldSpeculate, so will be doing the same if they
2393  // try to claim the lock from now on.
2394  while ( ! __kmp_is_unlocked_queuing_lock( lck ) )
2395  {
2396  KMP_INC_STAT(lck,lemmingYields);
2397  __kmp_yield (TRUE);
2398  }
2399 
2400  if ( __kmp_test_adaptive_lock_only( lck, gtid ) )
2401  return;
2402  }
2403  }
2404 
2405  // Speculative acquisition failed, so acquire it non-speculatively.
2406  // Count the non-speculative acquire attempt
2407  lck->lk.adaptive.acquire_attempts++;
2408 
2409  __kmp_acquire_queuing_lock_timed_template<FALSE>( lck, gtid );
2410  // We have acquired the base lock, so count that.
2411  KMP_INC_STAT(lck,nonSpeculativeAcquires );
2412 }
2413 
2414 static void
2415 __kmp_acquire_adaptive_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid )
2416 {
2417  if ( __kmp_env_consistency_check ) {
2418  char const * const func = "omp_set_lock";
2419  if ( lck->lk.initialized != lck ) {
2420  KMP_FATAL( LockIsUninitialized, func );
2421  }
2422  if ( __kmp_get_queuing_lock_owner( lck ) == gtid ) {
2423  KMP_FATAL( LockIsAlreadyOwned, func );
2424  }
2425  }
2426 
2427  __kmp_acquire_adaptive_lock( lck, gtid );
2428 
2429  if ( __kmp_env_consistency_check ) {
2430  lck->lk.owner_id = gtid + 1;
2431  }
2432 }
2433 
2434 static void
2435 __kmp_release_adaptive_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
2436 {
2437  if ( __kmp_is_unlocked_queuing_lock( lck ) )
2438  { // If the lock doesn't look claimed we must be speculating.
2439  // (Or the user's code is buggy and they're releasing without locking;
2440  // if we had XTEST we'd be able to check that case...)
2441  _xend(); // Exit speculation
2442  __kmp_update_badness_after_success( lck );
2443  }
2444  else
2445  { // Since the lock *is* visibly locked we're not speculating,
2446  // so should use the underlying lock's release scheme.
2447  __kmp_release_queuing_lock( lck, gtid );
2448  }
2449 }
2450 
2451 static void
2452 __kmp_release_adaptive_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid )
2453 {
2454  if ( __kmp_env_consistency_check ) {
2455  char const * const func = "omp_unset_lock";
2456  KMP_MB(); /* in case another processor initialized lock */
2457  if ( lck->lk.initialized != lck ) {
2458  KMP_FATAL( LockIsUninitialized, func );
2459  }
2460  if ( __kmp_get_queuing_lock_owner( lck ) == -1 ) {
2461  KMP_FATAL( LockUnsettingFree, func );
2462  }
2463  if ( __kmp_get_queuing_lock_owner( lck ) != gtid ) {
2464  KMP_FATAL( LockUnsettingSetByAnother, func );
2465  }
2466  lck->lk.owner_id = 0;
2467  }
2468  __kmp_release_adaptive_lock( lck, gtid );
2469 }
2470 
2471 static void
2472 __kmp_init_adaptive_lock( kmp_queuing_lock_t *lck )
2473 {
2474  __kmp_init_queuing_lock( lck );
2475  lck->lk.adaptive.badness = 0;
2476  lck->lk.adaptive.acquire_attempts = 0; //nonSpeculativeAcquireAttempts = 0;
2477  lck->lk.adaptive.max_soft_retries = __kmp_adaptive_backoff_params.max_soft_retries;
2478  lck->lk.adaptive.max_badness = __kmp_adaptive_backoff_params.max_badness;
2479 #if KMP_DEBUG_ADAPTIVE_LOCKS
2480  __kmp_zero_speculative_stats( &lck->lk.adaptive );
2481 #endif
2482  KA_TRACE(1000, ("__kmp_init_adaptive_lock: lock %p initialized\n", lck));
2483 }
2484 
2485 static void
2486 __kmp_init_adaptive_lock_with_checks( kmp_queuing_lock_t * lck )
2487 {
2488  __kmp_init_adaptive_lock( lck );
2489 }
2490 
2491 static void
2492 __kmp_destroy_adaptive_lock( kmp_queuing_lock_t *lck )
2493 {
2494 #if KMP_DEBUG_ADAPTIVE_LOCKS
2495  __kmp_accumulate_speculative_stats( &lck->lk.adaptive );
2496 #endif
2497  __kmp_destroy_queuing_lock (lck);
2498  // Nothing needed for the speculative part.
2499 }
2500 
2501 static void
2502 __kmp_destroy_adaptive_lock_with_checks( kmp_queuing_lock_t *lck )
2503 {
2504  if ( __kmp_env_consistency_check ) {
2505  char const * const func = "omp_destroy_lock";
2506  if ( lck->lk.initialized != lck ) {
2507  KMP_FATAL( LockIsUninitialized, func );
2508  }
2509  if ( __kmp_get_queuing_lock_owner( lck ) != -1 ) {
2510  KMP_FATAL( LockStillOwned, func );
2511  }
2512  }
2513  __kmp_destroy_adaptive_lock( lck );
2514 }
2515 
2516 
2517 #endif // KMP_USE_ADAPTIVE_LOCKS
2518 
2519 
2520 /* ------------------------------------------------------------------------ */
2521 /* DRDPA ticket locks */
2522 /* "DRDPA" means Dynamically Reconfigurable Distributed Polling Area */
2523 
2524 static kmp_int32
2525 __kmp_get_drdpa_lock_owner( kmp_drdpa_lock_t *lck )
2526 {
2527  return TCR_4( lck->lk.owner_id ) - 1;
2528 }
2529 
2530 static inline bool
2531 __kmp_is_drdpa_lock_nestable( kmp_drdpa_lock_t *lck )
2532 {
2533  return lck->lk.depth_locked != -1;
2534 }
2535 
2536 __forceinline static void
2537 __kmp_acquire_drdpa_lock_timed_template( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2538 {
2539  kmp_uint64 ticket = KMP_TEST_THEN_INC64((kmp_int64 *)&lck->lk.next_ticket);
2540  kmp_uint64 mask = TCR_8(lck->lk.mask); // volatile load
2541  volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls
2542  = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2543  TCR_PTR(lck->lk.polls); // volatile load
2544 
2545 #ifdef USE_LOCK_PROFILE
2546  if (TCR_8(polls[ticket & mask].poll) != ticket)
2547  __kmp_printf("LOCK CONTENTION: %p\n", lck);
2548  /* else __kmp_printf( "." );*/
2549 #endif /* USE_LOCK_PROFILE */
2550 
2551  //
2552  // Now spin-wait, but reload the polls pointer and mask, in case the
2553  // polling area has been reconfigured. Unless it is reconfigured, the
2554  // reloads stay in L1 cache and are cheap.
2555  //
2556  // Keep this code in sync with KMP_WAIT_YIELD, in kmp_dispatch.c !!!
2557  //
2558  // The current implementation of KMP_WAIT_YIELD doesn't allow for mask
2559  // and poll to be re-read every spin iteration.
2560  //
2561  kmp_uint32 spins;
2562 
2563  KMP_FSYNC_PREPARE(lck);
2564  KMP_INIT_YIELD(spins);
2565  while (TCR_8(polls[ticket & mask]).poll < ticket) { // volatile load
2566  __kmp_static_delay(TRUE);
2567 
2568  //
2569  // If we are oversubscribed,
2570  // or ahve waited a bit (and KMP_LIBRARY=turnaround), then yield.
2571  // CPU Pause is in the macros for yield.
2572  //
2573  KMP_YIELD(TCR_4(__kmp_nth)
2574  > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc));
2575  KMP_YIELD_SPIN(spins);
2576 
2577  // Re-read the mask and the poll pointer from the lock structure.
2578  //
2579  // Make certain that "mask" is read before "polls" !!!
2580  //
2581  // If another thread picks reconfigures the polling area and updates
2582  // their values, and we get the new value of mask and the old polls
2583  // pointer, we could access memory beyond the end of the old polling
2584  // area.
2585  //
2586  mask = TCR_8(lck->lk.mask); // volatile load
2587  polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2588  TCR_PTR(lck->lk.polls); // volatile load
2589  }
2590 
2591  //
2592  // Critical section starts here
2593  //
2594  KMP_FSYNC_ACQUIRED(lck);
2595  KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld acquired lock %p\n",
2596  ticket, lck));
2597  lck->lk.now_serving = ticket; // non-volatile store
2598 
2599  //
2600  // Deallocate a garbage polling area if we know that we are the last
2601  // thread that could possibly access it.
2602  //
2603  // The >= check is in case __kmp_test_drdpa_lock() allocated the cleanup
2604  // ticket.
2605  //
2606  if ((lck->lk.old_polls != NULL) && (ticket >= lck->lk.cleanup_ticket)) {
2607  __kmp_free((void *)lck->lk.old_polls);
2608  lck->lk.old_polls = NULL;
2609  lck->lk.cleanup_ticket = 0;
2610  }
2611 
2612  //
2613  // Check to see if we should reconfigure the polling area.
2614  // If there is still a garbage polling area to be deallocated from a
2615  // previous reconfiguration, let a later thread reconfigure it.
2616  //
2617  if (lck->lk.old_polls == NULL) {
2618  bool reconfigure = false;
2619  volatile struct kmp_base_drdpa_lock::kmp_lock_poll *old_polls = polls;
2620  kmp_uint32 num_polls = TCR_4(lck->lk.num_polls);
2621 
2622  if (TCR_4(__kmp_nth)
2623  > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) {
2624  //
2625  // We are in oversubscription mode. Contract the polling area
2626  // down to a single location, if that hasn't been done already.
2627  //
2628  if (num_polls > 1) {
2629  reconfigure = true;
2630  num_polls = TCR_4(lck->lk.num_polls);
2631  mask = 0;
2632  num_polls = 1;
2633  polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2634  __kmp_allocate(num_polls * sizeof(*polls));
2635  polls[0].poll = ticket;
2636  }
2637  }
2638  else {
2639  //
2640  // We are in under/fully subscribed mode. Check the number of
2641  // threads waiting on the lock. The size of the polling area
2642  // should be at least the number of threads waiting.
2643  //
2644  kmp_uint64 num_waiting = TCR_8(lck->lk.next_ticket) - ticket - 1;
2645  if (num_waiting > num_polls) {
2646  kmp_uint32 old_num_polls = num_polls;
2647  reconfigure = true;
2648  do {
2649  mask = (mask << 1) | 1;
2650  num_polls *= 2;
2651  } while (num_polls <= num_waiting);
2652 
2653  //
2654  // Allocate the new polling area, and copy the relevant portion
2655  // of the old polling area to the new area. __kmp_allocate()
2656  // zeroes the memory it allocates, and most of the old area is
2657  // just zero padding, so we only copy the release counters.
2658  //
2659  polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2660  __kmp_allocate(num_polls * sizeof(*polls));
2661  kmp_uint32 i;
2662  for (i = 0; i < old_num_polls; i++) {
2663  polls[i].poll = old_polls[i].poll;
2664  }
2665  }
2666  }
2667 
2668  if (reconfigure) {
2669  //
2670  // Now write the updated fields back to the lock structure.
2671  //
2672  // Make certain that "polls" is written before "mask" !!!
2673  //
2674  // If another thread picks up the new value of mask and the old
2675  // polls pointer , it could access memory beyond the end of the
2676  // old polling area.
2677  //
2678  // On x86, we need memory fences.
2679  //
2680  KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld reconfiguring lock %p to %d polls\n",
2681  ticket, lck, num_polls));
2682 
2683  lck->lk.old_polls = old_polls; // non-volatile store
2684  lck->lk.polls = polls; // volatile store
2685 
2686  KMP_MB();
2687 
2688  lck->lk.num_polls = num_polls; // non-volatile store
2689  lck->lk.mask = mask; // volatile store
2690 
2691  KMP_MB();
2692 
2693  //
2694  // Only after the new polling area and mask have been flushed
2695  // to main memory can we update the cleanup ticket field.
2696  //
2697  // volatile load / non-volatile store
2698  //
2699  lck->lk.cleanup_ticket = TCR_8(lck->lk.next_ticket);
2700  }
2701  }
2702 }
2703 
2704 void
2705 __kmp_acquire_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2706 {
2707  __kmp_acquire_drdpa_lock_timed_template( lck, gtid );
2708 }
2709 
2710 static void
2711 __kmp_acquire_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2712 {
2713  if ( __kmp_env_consistency_check ) {
2714  char const * const func = "omp_set_lock";
2715  if ( lck->lk.initialized != lck ) {
2716  KMP_FATAL( LockIsUninitialized, func );
2717  }
2718  if ( __kmp_is_drdpa_lock_nestable( lck ) ) {
2719  KMP_FATAL( LockNestableUsedAsSimple, func );
2720  }
2721  if ( ( gtid >= 0 ) && ( __kmp_get_drdpa_lock_owner( lck ) == gtid ) ) {
2722  KMP_FATAL( LockIsAlreadyOwned, func );
2723  }
2724  }
2725 
2726  __kmp_acquire_drdpa_lock( lck, gtid );
2727 
2728  if ( __kmp_env_consistency_check ) {
2729  lck->lk.owner_id = gtid + 1;
2730  }
2731 }
2732 
2733 int
2734 __kmp_test_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2735 {
2736  //
2737  // First get a ticket, then read the polls pointer and the mask.
2738  // The polls pointer must be read before the mask!!! (See above)
2739  //
2740  kmp_uint64 ticket = TCR_8(lck->lk.next_ticket); // volatile load
2741  volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls
2742  = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2743  TCR_PTR(lck->lk.polls); // volatile load
2744  kmp_uint64 mask = TCR_8(lck->lk.mask); // volatile load
2745  if (TCR_8(polls[ticket & mask].poll) == ticket) {
2746  kmp_uint64 next_ticket = ticket + 1;
2747  if (KMP_COMPARE_AND_STORE_ACQ64((kmp_int64 *)&lck->lk.next_ticket,
2748  ticket, next_ticket)) {
2749  KMP_FSYNC_ACQUIRED(lck);
2750  KA_TRACE(1000, ("__kmp_test_drdpa_lock: ticket #%lld acquired lock %p\n",
2751  ticket, lck));
2752  lck->lk.now_serving = ticket; // non-volatile store
2753 
2754  //
2755  // Since no threads are waiting, there is no possiblity that
2756  // we would want to reconfigure the polling area. We might
2757  // have the cleanup ticket value (which says that it is now
2758  // safe to deallocate old_polls), but we'll let a later thread
2759  // which calls __kmp_acquire_lock do that - this routine
2760  // isn't supposed to block, and we would risk blocks if we
2761  // called __kmp_free() to do the deallocation.
2762  //
2763  return TRUE;
2764  }
2765  }
2766  return FALSE;
2767 }
2768 
2769 static int
2770 __kmp_test_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2771 {
2772  if ( __kmp_env_consistency_check ) {
2773  char const * const func = "omp_test_lock";
2774  if ( lck->lk.initialized != lck ) {
2775  KMP_FATAL( LockIsUninitialized, func );
2776  }
2777  if ( __kmp_is_drdpa_lock_nestable( lck ) ) {
2778  KMP_FATAL( LockNestableUsedAsSimple, func );
2779  }
2780  }
2781 
2782  int retval = __kmp_test_drdpa_lock( lck, gtid );
2783 
2784  if ( __kmp_env_consistency_check && retval ) {
2785  lck->lk.owner_id = gtid + 1;
2786  }
2787  return retval;
2788 }
2789 
2790 void
2791 __kmp_release_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2792 {
2793  //
2794  // Read the ticket value from the lock data struct, then the polls
2795  // pointer and the mask. The polls pointer must be read before the
2796  // mask!!! (See above)
2797  //
2798  kmp_uint64 ticket = lck->lk.now_serving + 1; // non-volatile load
2799  volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls
2800  = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2801  TCR_PTR(lck->lk.polls); // volatile load
2802  kmp_uint64 mask = TCR_8(lck->lk.mask); // volatile load
2803  KA_TRACE(1000, ("__kmp_release_drdpa_lock: ticket #%lld released lock %p\n",
2804  ticket - 1, lck));
2805  KMP_FSYNC_RELEASING(lck);
2806  KMP_ST_REL64(&(polls[ticket & mask].poll), ticket); // volatile store
2807 }
2808 
2809 static void
2810 __kmp_release_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2811 {
2812  if ( __kmp_env_consistency_check ) {
2813  char const * const func = "omp_unset_lock";
2814  KMP_MB(); /* in case another processor initialized lock */
2815  if ( lck->lk.initialized != lck ) {
2816  KMP_FATAL( LockIsUninitialized, func );
2817  }
2818  if ( __kmp_is_drdpa_lock_nestable( lck ) ) {
2819  KMP_FATAL( LockNestableUsedAsSimple, func );
2820  }
2821  if ( __kmp_get_drdpa_lock_owner( lck ) == -1 ) {
2822  KMP_FATAL( LockUnsettingFree, func );
2823  }
2824  if ( ( gtid >= 0 ) && ( __kmp_get_drdpa_lock_owner( lck ) >= 0 )
2825  && ( __kmp_get_drdpa_lock_owner( lck ) != gtid ) ) {
2826  KMP_FATAL( LockUnsettingSetByAnother, func );
2827  }
2828  lck->lk.owner_id = 0;
2829  }
2830  __kmp_release_drdpa_lock( lck, gtid );
2831 }
2832 
2833 void
2834 __kmp_init_drdpa_lock( kmp_drdpa_lock_t *lck )
2835 {
2836  lck->lk.location = NULL;
2837  lck->lk.mask = 0;
2838  lck->lk.num_polls = 1;
2839  lck->lk.polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2840  __kmp_allocate(lck->lk.num_polls * sizeof(*(lck->lk.polls)));
2841  lck->lk.cleanup_ticket = 0;
2842  lck->lk.old_polls = NULL;
2843  lck->lk.next_ticket = 0;
2844  lck->lk.now_serving = 0;
2845  lck->lk.owner_id = 0; // no thread owns the lock.
2846  lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks.
2847  lck->lk.initialized = lck;
2848 
2849  KA_TRACE(1000, ("__kmp_init_drdpa_lock: lock %p initialized\n", lck));
2850 }
2851 
2852 static void
2853 __kmp_init_drdpa_lock_with_checks( kmp_drdpa_lock_t * lck )
2854 {
2855  __kmp_init_drdpa_lock( lck );
2856 }
2857 
2858 void
2859 __kmp_destroy_drdpa_lock( kmp_drdpa_lock_t *lck )
2860 {
2861  lck->lk.initialized = NULL;
2862  lck->lk.location = NULL;
2863  if (lck->lk.polls != NULL) {
2864  __kmp_free((void *)lck->lk.polls);
2865  lck->lk.polls = NULL;
2866  }
2867  if (lck->lk.old_polls != NULL) {
2868  __kmp_free((void *)lck->lk.old_polls);
2869  lck->lk.old_polls = NULL;
2870  }
2871  lck->lk.mask = 0;
2872  lck->lk.num_polls = 0;
2873  lck->lk.cleanup_ticket = 0;
2874  lck->lk.next_ticket = 0;
2875  lck->lk.now_serving = 0;
2876  lck->lk.owner_id = 0;
2877  lck->lk.depth_locked = -1;
2878 }
2879 
2880 static void
2881 __kmp_destroy_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck )
2882 {
2883  if ( __kmp_env_consistency_check ) {
2884  char const * const func = "omp_destroy_lock";
2885  if ( lck->lk.initialized != lck ) {
2886  KMP_FATAL( LockIsUninitialized, func );
2887  }
2888  if ( __kmp_is_drdpa_lock_nestable( lck ) ) {
2889  KMP_FATAL( LockNestableUsedAsSimple, func );
2890  }
2891  if ( __kmp_get_drdpa_lock_owner( lck ) != -1 ) {
2892  KMP_FATAL( LockStillOwned, func );
2893  }
2894  }
2895  __kmp_destroy_drdpa_lock( lck );
2896 }
2897 
2898 
2899 //
2900 // nested drdpa ticket locks
2901 //
2902 
2903 void
2904 __kmp_acquire_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2905 {
2906  KMP_DEBUG_ASSERT( gtid >= 0 );
2907 
2908  if ( __kmp_get_drdpa_lock_owner( lck ) == gtid ) {
2909  lck->lk.depth_locked += 1;
2910  }
2911  else {
2912  __kmp_acquire_drdpa_lock_timed_template( lck, gtid );
2913  KMP_MB();
2914  lck->lk.depth_locked = 1;
2915  KMP_MB();
2916  lck->lk.owner_id = gtid + 1;
2917  }
2918 }
2919 
2920 static void
2921 __kmp_acquire_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2922 {
2923  if ( __kmp_env_consistency_check ) {
2924  char const * const func = "omp_set_nest_lock";
2925  if ( lck->lk.initialized != lck ) {
2926  KMP_FATAL( LockIsUninitialized, func );
2927  }
2928  if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) {
2929  KMP_FATAL( LockSimpleUsedAsNestable, func );
2930  }
2931  }
2932  __kmp_acquire_nested_drdpa_lock( lck, gtid );
2933 }
2934 
2935 int
2936 __kmp_test_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2937 {
2938  int retval;
2939 
2940  KMP_DEBUG_ASSERT( gtid >= 0 );
2941 
2942  if ( __kmp_get_drdpa_lock_owner( lck ) == gtid ) {
2943  retval = ++lck->lk.depth_locked;
2944  }
2945  else if ( !__kmp_test_drdpa_lock( lck, gtid ) ) {
2946  retval = 0;
2947  }
2948  else {
2949  KMP_MB();
2950  retval = lck->lk.depth_locked = 1;
2951  KMP_MB();
2952  lck->lk.owner_id = gtid + 1;
2953  }
2954  return retval;
2955 }
2956 
2957 static int
2958 __kmp_test_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2959 {
2960  if ( __kmp_env_consistency_check ) {
2961  char const * const func = "omp_test_nest_lock";
2962  if ( lck->lk.initialized != lck ) {
2963  KMP_FATAL( LockIsUninitialized, func );
2964  }
2965  if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) {
2966  KMP_FATAL( LockSimpleUsedAsNestable, func );
2967  }
2968  }
2969  return __kmp_test_nested_drdpa_lock( lck, gtid );
2970 }
2971 
2972 void
2973 __kmp_release_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2974 {
2975  KMP_DEBUG_ASSERT( gtid >= 0 );
2976 
2977  KMP_MB();
2978  if ( --(lck->lk.depth_locked) == 0 ) {
2979  KMP_MB();
2980  lck->lk.owner_id = 0;
2981  __kmp_release_drdpa_lock( lck, gtid );
2982  }
2983 }
2984 
2985 static void
2986 __kmp_release_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2987 {
2988  if ( __kmp_env_consistency_check ) {
2989  char const * const func = "omp_unset_nest_lock";
2990  KMP_MB(); /* in case another processor initialized lock */
2991  if ( lck->lk.initialized != lck ) {
2992  KMP_FATAL( LockIsUninitialized, func );
2993  }
2994  if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) {
2995  KMP_FATAL( LockSimpleUsedAsNestable, func );
2996  }
2997  if ( __kmp_get_drdpa_lock_owner( lck ) == -1 ) {
2998  KMP_FATAL( LockUnsettingFree, func );
2999  }
3000  if ( __kmp_get_drdpa_lock_owner( lck ) != gtid ) {
3001  KMP_FATAL( LockUnsettingSetByAnother, func );
3002  }
3003  }
3004  __kmp_release_nested_drdpa_lock( lck, gtid );
3005 }
3006 
3007 void
3008 __kmp_init_nested_drdpa_lock( kmp_drdpa_lock_t * lck )
3009 {
3010  __kmp_init_drdpa_lock( lck );
3011  lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
3012 }
3013 
3014 static void
3015 __kmp_init_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t * lck )
3016 {
3017  __kmp_init_nested_drdpa_lock( lck );
3018 }
3019 
3020 void
3021 __kmp_destroy_nested_drdpa_lock( kmp_drdpa_lock_t *lck )
3022 {
3023  __kmp_destroy_drdpa_lock( lck );
3024  lck->lk.depth_locked = 0;
3025 }
3026 
3027 static void
3028 __kmp_destroy_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck )
3029 {
3030  if ( __kmp_env_consistency_check ) {
3031  char const * const func = "omp_destroy_nest_lock";
3032  if ( lck->lk.initialized != lck ) {
3033  KMP_FATAL( LockIsUninitialized, func );
3034  }
3035  if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) {
3036  KMP_FATAL( LockSimpleUsedAsNestable, func );
3037  }
3038  if ( __kmp_get_drdpa_lock_owner( lck ) != -1 ) {
3039  KMP_FATAL( LockStillOwned, func );
3040  }
3041  }
3042  __kmp_destroy_nested_drdpa_lock( lck );
3043 }
3044 
3045 
3046 //
3047 // access functions to fields which don't exist for all lock kinds.
3048 //
3049 
3050 static int
3051 __kmp_is_drdpa_lock_initialized( kmp_drdpa_lock_t *lck )
3052 {
3053  return lck == lck->lk.initialized;
3054 }
3055 
3056 static const ident_t *
3057 __kmp_get_drdpa_lock_location( kmp_drdpa_lock_t *lck )
3058 {
3059  return lck->lk.location;
3060 }
3061 
3062 static void
3063 __kmp_set_drdpa_lock_location( kmp_drdpa_lock_t *lck, const ident_t *loc )
3064 {
3065  lck->lk.location = loc;
3066 }
3067 
3068 static kmp_lock_flags_t
3069 __kmp_get_drdpa_lock_flags( kmp_drdpa_lock_t *lck )
3070 {
3071  return lck->lk.flags;
3072 }
3073 
3074 static void
3075 __kmp_set_drdpa_lock_flags( kmp_drdpa_lock_t *lck, kmp_lock_flags_t flags )
3076 {
3077  lck->lk.flags = flags;
3078 }
3079 
3080 /* ------------------------------------------------------------------------ */
3081 /* user locks
3082  *
3083  * They are implemented as a table of function pointers which are set to the
3084  * lock functions of the appropriate kind, once that has been determined.
3085  */
3086 
3087 enum kmp_lock_kind __kmp_user_lock_kind = lk_default;
3088 
3089 size_t __kmp_base_user_lock_size = 0;
3090 size_t __kmp_user_lock_size = 0;
3091 
3092 kmp_int32 ( *__kmp_get_user_lock_owner_ )( kmp_user_lock_p lck ) = NULL;
3093 void ( *__kmp_acquire_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
3094 
3095 int ( *__kmp_test_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
3096 void ( *__kmp_release_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
3097 void ( *__kmp_init_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL;
3098 void ( *__kmp_destroy_user_lock_ )( kmp_user_lock_p lck ) = NULL;
3099 void ( *__kmp_destroy_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL;
3100 void ( *__kmp_acquire_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
3101 
3102 int ( *__kmp_test_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
3103 void ( *__kmp_release_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
3104 void ( *__kmp_init_nested_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL;
3105 void ( *__kmp_destroy_nested_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL;
3106 
3107 int ( *__kmp_is_user_lock_initialized_ )( kmp_user_lock_p lck ) = NULL;
3108 const ident_t * ( *__kmp_get_user_lock_location_ )( kmp_user_lock_p lck ) = NULL;
3109 void ( *__kmp_set_user_lock_location_ )( kmp_user_lock_p lck, const ident_t *loc ) = NULL;
3110 kmp_lock_flags_t ( *__kmp_get_user_lock_flags_ )( kmp_user_lock_p lck ) = NULL;
3111 void ( *__kmp_set_user_lock_flags_ )( kmp_user_lock_p lck, kmp_lock_flags_t flags ) = NULL;
3112 
3113 void __kmp_set_user_lock_vptrs( kmp_lock_kind_t user_lock_kind )
3114 {
3115  switch ( user_lock_kind ) {
3116  case lk_default:
3117  default:
3118  KMP_ASSERT( 0 );
3119 
3120  case lk_tas: {
3121  __kmp_base_user_lock_size = sizeof( kmp_base_tas_lock_t );
3122  __kmp_user_lock_size = sizeof( kmp_tas_lock_t );
3123 
3124  __kmp_get_user_lock_owner_ =
3125  ( kmp_int32 ( * )( kmp_user_lock_p ) )
3126  ( &__kmp_get_tas_lock_owner );
3127 
3128  __kmp_acquire_user_lock_with_checks_ =
3129  ( void ( * )( kmp_user_lock_p, kmp_int32 ) )
3130  ( &__kmp_acquire_tas_lock_with_checks );
3131 
3132  __kmp_test_user_lock_with_checks_ =
3133  ( int ( * )( kmp_user_lock_p, kmp_int32 ) )
3134  ( &__kmp_test_tas_lock_with_checks );
3135 
3136  __kmp_release_user_lock_with_checks_ =
3137  ( void ( * )( kmp_user_lock_p, kmp_int32 ) )
3138  ( &__kmp_release_tas_lock_with_checks );
3139 
3140  __kmp_init_user_lock_with_checks_ =
3141  ( void ( * )( kmp_user_lock_p ) )
3142  ( &__kmp_init_tas_lock_with_checks );
3143 
3144  __kmp_destroy_user_lock_ =
3145  ( void ( * )( kmp_user_lock_p ) )
3146  ( &__kmp_destroy_tas_lock );
3147 
3148  __kmp_destroy_user_lock_with_checks_ =
3149  ( void ( * )( kmp_user_lock_p ) )
3150  ( &__kmp_destroy_tas_lock_with_checks );
3151 
3152  __kmp_acquire_nested_user_lock_with_checks_ =
3153  ( void ( * )( kmp_user_lock_p, kmp_int32 ) )
3154  ( &__kmp_acquire_nested_tas_lock_with_checks );
3155 
3156  __kmp_test_nested_user_lock_with_checks_ =
3157  ( int ( * )( kmp_user_lock_p, kmp_int32 ) )
3158  ( &__kmp_test_nested_tas_lock_with_checks );
3159 
3160  __kmp_release_nested_user_lock_with_checks_ =
3161  ( void ( * )( kmp_user_lock_p, kmp_int32 ) )
3162  ( &__kmp_release_nested_tas_lock_with_checks );
3163 
3164  __kmp_init_nested_user_lock_with_checks_ =
3165  ( void ( * )( kmp_user_lock_p ) )
3166  ( &__kmp_init_nested_tas_lock_with_checks );
3167 
3168  __kmp_destroy_nested_user_lock_with_checks_ =
3169  ( void ( * )( kmp_user_lock_p ) )
3170  ( &__kmp_destroy_nested_tas_lock_with_checks );
3171 
3172  __kmp_is_user_lock_initialized_ =
3173  ( int ( * )( kmp_user_lock_p ) ) NULL;
3174 
3175  __kmp_get_user_lock_location_ =
3176  ( const ident_t * ( * )( kmp_user_lock_p ) ) NULL;
3177 
3178  __kmp_set_user_lock_location_ =
3179  ( void ( * )( kmp_user_lock_p, const ident_t * ) ) NULL;
3180 
3181  __kmp_get_user_lock_flags_ =
3182  ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) NULL;
3183 
3184  __kmp_set_user_lock_flags_ =
3185  ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) NULL;
3186  }
3187  break;
3188 
3189 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
3190 
3191  case lk_futex: {
3192  __kmp_base_user_lock_size = sizeof( kmp_base_futex_lock_t );
3193  __kmp_user_lock_size = sizeof( kmp_futex_lock_t );
3194 
3195  __kmp_get_user_lock_owner_ =
3196  ( kmp_int32 ( * )( kmp_user_lock_p ) )
3197  ( &__kmp_get_futex_lock_owner );
3198 
3199  __kmp_acquire_user_lock_with_checks_ =
3200  ( void ( * )( kmp_user_lock_p, kmp_int32 ) )
3201  ( &__kmp_acquire_futex_lock_with_checks );
3202 
3203  __kmp_test_user_lock_with_checks_ =
3204  ( int ( * )( kmp_user_lock_p, kmp_int32 ) )
3205  ( &__kmp_test_futex_lock_with_checks );
3206 
3207  __kmp_release_user_lock_with_checks_ =
3208  ( void ( * )( kmp_user_lock_p, kmp_int32 ) )
3209  ( &__kmp_release_futex_lock_with_checks );
3210 
3211  __kmp_init_user_lock_with_checks_ =
3212  ( void ( * )( kmp_user_lock_p ) )
3213  ( &__kmp_init_futex_lock_with_checks );
3214 
3215  __kmp_destroy_user_lock_ =
3216  ( void ( * )( kmp_user_lock_p ) )
3217  ( &__kmp_destroy_futex_lock );
3218 
3219  __kmp_destroy_user_lock_with_checks_ =
3220  ( void ( * )( kmp_user_lock_p ) )
3221  ( &__kmp_destroy_futex_lock_with_checks );
3222 
3223  __kmp_acquire_nested_user_lock_with_checks_ =
3224  ( void ( * )( kmp_user_lock_p, kmp_int32 ) )
3225  ( &__kmp_acquire_nested_futex_lock_with_checks );
3226 
3227  __kmp_test_nested_user_lock_with_checks_ =
3228  ( int ( * )( kmp_user_lock_p, kmp_int32 ) )
3229  ( &__kmp_test_nested_futex_lock_with_checks );
3230 
3231  __kmp_release_nested_user_lock_with_checks_ =
3232  ( void ( * )( kmp_user_lock_p, kmp_int32 ) )
3233  ( &__kmp_release_nested_futex_lock_with_checks );
3234 
3235  __kmp_init_nested_user_lock_with_checks_ =
3236  ( void ( * )( kmp_user_lock_p ) )
3237  ( &__kmp_init_nested_futex_lock_with_checks );
3238 
3239  __kmp_destroy_nested_user_lock_with_checks_ =
3240  ( void ( * )( kmp_user_lock_p ) )
3241  ( &__kmp_destroy_nested_futex_lock_with_checks );
3242 
3243  __kmp_is_user_lock_initialized_ =
3244  ( int ( * )( kmp_user_lock_p ) ) NULL;
3245 
3246  __kmp_get_user_lock_location_ =
3247  ( const ident_t * ( * )( kmp_user_lock_p ) ) NULL;
3248 
3249  __kmp_set_user_lock_location_ =
3250  ( void ( * )( kmp_user_lock_p, const ident_t * ) ) NULL;
3251 
3252  __kmp_get_user_lock_flags_ =
3253  ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) NULL;
3254 
3255  __kmp_set_user_lock_flags_ =
3256  ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) NULL;
3257  }
3258  break;
3259 
3260 #endif // KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
3261 
3262  case lk_ticket: {
3263  __kmp_base_user_lock_size = sizeof( kmp_base_ticket_lock_t );
3264  __kmp_user_lock_size = sizeof( kmp_ticket_lock_t );
3265 
3266  __kmp_get_user_lock_owner_ =
3267  ( kmp_int32 ( * )( kmp_user_lock_p ) )
3268  ( &__kmp_get_ticket_lock_owner );
3269 
3270  __kmp_acquire_user_lock_with_checks_ =
3271  ( void ( * )( kmp_user_lock_p, kmp_int32 ) )
3272  ( &__kmp_acquire_ticket_lock_with_checks );
3273 
3274  __kmp_test_user_lock_with_checks_ =
3275  ( int ( * )( kmp_user_lock_p, kmp_int32 ) )
3276  ( &__kmp_test_ticket_lock_with_checks );
3277 
3278  __kmp_release_user_lock_with_checks_ =
3279  ( void ( * )( kmp_user_lock_p, kmp_int32 ) )
3280  ( &__kmp_release_ticket_lock_with_checks );
3281 
3282  __kmp_init_user_lock_with_checks_ =
3283  ( void ( * )( kmp_user_lock_p ) )
3284  ( &__kmp_init_ticket_lock_with_checks );
3285 
3286  __kmp_destroy_user_lock_ =
3287  ( void ( * )( kmp_user_lock_p ) )
3288  ( &__kmp_destroy_ticket_lock );
3289 
3290  __kmp_destroy_user_lock_with_checks_ =
3291  ( void ( * )( kmp_user_lock_p ) )
3292  ( &__kmp_destroy_ticket_lock_with_checks );
3293 
3294  __kmp_acquire_nested_user_lock_with_checks_ =
3295  ( void ( * )( kmp_user_lock_p, kmp_int32 ) )
3296  ( &__kmp_acquire_nested_ticket_lock_with_checks );
3297 
3298  __kmp_test_nested_user_lock_with_checks_ =
3299  ( int ( * )( kmp_user_lock_p, kmp_int32 ) )
3300  ( &__kmp_test_nested_ticket_lock_with_checks );
3301 
3302  __kmp_release_nested_user_lock_with_checks_ =
3303  ( void ( * )( kmp_user_lock_p, kmp_int32 ) )
3304  ( &__kmp_release_nested_ticket_lock_with_checks );
3305 
3306  __kmp_init_nested_user_lock_with_checks_ =
3307  ( void ( * )( kmp_user_lock_p ) )
3308  ( &__kmp_init_nested_ticket_lock_with_checks );
3309 
3310  __kmp_destroy_nested_user_lock_with_checks_ =
3311  ( void ( * )( kmp_user_lock_p ) )
3312  ( &__kmp_destroy_nested_ticket_lock_with_checks );
3313 
3314  __kmp_is_user_lock_initialized_ =
3315  ( int ( * )( kmp_user_lock_p ) )
3316  ( &__kmp_is_ticket_lock_initialized );
3317 
3318  __kmp_get_user_lock_location_ =
3319  ( const ident_t * ( * )( kmp_user_lock_p ) )
3320  ( &__kmp_get_ticket_lock_location );
3321 
3322  __kmp_set_user_lock_location_ =
3323  ( void ( * )( kmp_user_lock_p, const ident_t * ) )
3324  ( &__kmp_set_ticket_lock_location );
3325 
3326  __kmp_get_user_lock_flags_ =
3327  ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) )
3328  ( &__kmp_get_ticket_lock_flags );
3329 
3330  __kmp_set_user_lock_flags_ =
3331  ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) )
3332  ( &__kmp_set_ticket_lock_flags );
3333  }
3334  break;
3335 
3336  case lk_queuing: {
3337  __kmp_base_user_lock_size = sizeof( kmp_base_queuing_lock_t );
3338  __kmp_user_lock_size = sizeof( kmp_queuing_lock_t );
3339 
3340  __kmp_get_user_lock_owner_ =
3341  ( kmp_int32 ( * )( kmp_user_lock_p ) )
3342  ( &__kmp_get_queuing_lock_owner );
3343 
3344  __kmp_acquire_user_lock_with_checks_ =
3345  ( void ( * )( kmp_user_lock_p, kmp_int32 ) )
3346  ( &__kmp_acquire_queuing_lock_with_checks );
3347 
3348  __kmp_test_user_lock_with_checks_ =
3349  ( int ( * )( kmp_user_lock_p, kmp_int32 ) )
3350  ( &__kmp_test_queuing_lock_with_checks );
3351 
3352  __kmp_release_user_lock_with_checks_ =
3353  ( void ( * )( kmp_user_lock_p, kmp_int32 ) )
3354  ( &__kmp_release_queuing_lock_with_checks );
3355 
3356  __kmp_init_user_lock_with_checks_ =
3357  ( void ( * )( kmp_user_lock_p ) )
3358  ( &__kmp_init_queuing_lock_with_checks );
3359 
3360  __kmp_destroy_user_lock_ =
3361  ( void ( * )( kmp_user_lock_p ) )
3362  ( &__kmp_destroy_queuing_lock );
3363 
3364  __kmp_destroy_user_lock_with_checks_ =
3365  ( void ( * )( kmp_user_lock_p ) )
3366  ( &__kmp_destroy_queuing_lock_with_checks );
3367 
3368  __kmp_acquire_nested_user_lock_with_checks_ =
3369  ( void ( * )( kmp_user_lock_p, kmp_int32 ) )
3370  ( &__kmp_acquire_nested_queuing_lock_with_checks );
3371 
3372  __kmp_test_nested_user_lock_with_checks_ =
3373  ( int ( * )( kmp_user_lock_p, kmp_int32 ) )
3374  ( &__kmp_test_nested_queuing_lock_with_checks );
3375 
3376  __kmp_release_nested_user_lock_with_checks_ =
3377  ( void ( * )( kmp_user_lock_p, kmp_int32 ) )
3378  ( &__kmp_release_nested_queuing_lock_with_checks );
3379 
3380  __kmp_init_nested_user_lock_with_checks_ =
3381  ( void ( * )( kmp_user_lock_p ) )
3382  ( &__kmp_init_nested_queuing_lock_with_checks );
3383 
3384  __kmp_destroy_nested_user_lock_with_checks_ =
3385  ( void ( * )( kmp_user_lock_p ) )
3386  ( &__kmp_destroy_nested_queuing_lock_with_checks );
3387 
3388  __kmp_is_user_lock_initialized_ =
3389  ( int ( * )( kmp_user_lock_p ) )
3390  ( &__kmp_is_queuing_lock_initialized );
3391 
3392  __kmp_get_user_lock_location_ =
3393  ( const ident_t * ( * )( kmp_user_lock_p ) )
3394  ( &__kmp_get_queuing_lock_location );
3395 
3396  __kmp_set_user_lock_location_ =
3397  ( void ( * )( kmp_user_lock_p, const ident_t * ) )
3398  ( &__kmp_set_queuing_lock_location );
3399 
3400  __kmp_get_user_lock_flags_ =
3401  ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) )
3402  ( &__kmp_get_queuing_lock_flags );
3403 
3404  __kmp_set_user_lock_flags_ =
3405  ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) )
3406  ( &__kmp_set_queuing_lock_flags );
3407  }
3408  break;
3409 
3410 #if KMP_USE_ADAPTIVE_LOCKS
3411  case lk_adaptive: {
3412  __kmp_base_user_lock_size = sizeof( kmp_base_queuing_lock_t );
3413  __kmp_user_lock_size = sizeof( kmp_queuing_lock_t );
3414 
3415  __kmp_get_user_lock_owner_ =
3416  ( kmp_int32 ( * )( kmp_user_lock_p ) )
3417  ( &__kmp_get_queuing_lock_owner );
3418 
3419  __kmp_acquire_user_lock_with_checks_ =
3420  ( void ( * )( kmp_user_lock_p, kmp_int32 ) )
3421  ( &__kmp_acquire_adaptive_lock_with_checks );
3422 
3423  __kmp_test_user_lock_with_checks_ =
3424  ( int ( * )( kmp_user_lock_p, kmp_int32 ) )
3425  ( &__kmp_test_adaptive_lock_with_checks );
3426 
3427  __kmp_release_user_lock_with_checks_ =
3428  ( void ( * )( kmp_user_lock_p, kmp_int32 ) )
3429  ( &__kmp_release_adaptive_lock_with_checks );
3430 
3431  __kmp_init_user_lock_with_checks_ =
3432  ( void ( * )( kmp_user_lock_p ) )
3433  ( &__kmp_init_adaptive_lock_with_checks );
3434 
3435  __kmp_destroy_user_lock_with_checks_ =
3436  ( void ( * )( kmp_user_lock_p ) )
3437  ( &__kmp_destroy_adaptive_lock_with_checks );
3438 
3439  __kmp_destroy_user_lock_ =
3440  ( void ( * )( kmp_user_lock_p ) )
3441  ( &__kmp_destroy_adaptive_lock );
3442 
3443  __kmp_is_user_lock_initialized_ =
3444  ( int ( * )( kmp_user_lock_p ) )
3445  ( &__kmp_is_queuing_lock_initialized );
3446 
3447  __kmp_get_user_lock_location_ =
3448  ( const ident_t * ( * )( kmp_user_lock_p ) )
3449  ( &__kmp_get_queuing_lock_location );
3450 
3451  __kmp_set_user_lock_location_ =
3452  ( void ( * )( kmp_user_lock_p, const ident_t * ) )
3453  ( &__kmp_set_queuing_lock_location );
3454 
3455  __kmp_get_user_lock_flags_ =
3456  ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) )
3457  ( &__kmp_get_queuing_lock_flags );
3458 
3459  __kmp_set_user_lock_flags_ =
3460  ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) )
3461  ( &__kmp_set_queuing_lock_flags );
3462 
3463  }
3464  break;
3465 #endif // KMP_USE_ADAPTIVE_LOCKS
3466 
3467  case lk_drdpa: {
3468  __kmp_base_user_lock_size = sizeof( kmp_base_drdpa_lock_t );
3469  __kmp_user_lock_size = sizeof( kmp_drdpa_lock_t );
3470 
3471  __kmp_get_user_lock_owner_ =
3472  ( kmp_int32 ( * )( kmp_user_lock_p ) )
3473  ( &__kmp_get_drdpa_lock_owner );
3474 
3475  __kmp_acquire_user_lock_with_checks_ =
3476  ( void ( * )( kmp_user_lock_p, kmp_int32 ) )
3477  ( &__kmp_acquire_drdpa_lock_with_checks );
3478 
3479  __kmp_test_user_lock_with_checks_ =
3480  ( int ( * )( kmp_user_lock_p, kmp_int32 ) )
3481  ( &__kmp_test_drdpa_lock_with_checks );
3482 
3483  __kmp_release_user_lock_with_checks_ =
3484  ( void ( * )( kmp_user_lock_p, kmp_int32 ) )
3485  ( &__kmp_release_drdpa_lock_with_checks );
3486 
3487  __kmp_init_user_lock_with_checks_ =
3488  ( void ( * )( kmp_user_lock_p ) )
3489  ( &__kmp_init_drdpa_lock_with_checks );
3490 
3491  __kmp_destroy_user_lock_ =
3492  ( void ( * )( kmp_user_lock_p ) )
3493  ( &__kmp_destroy_drdpa_lock );
3494 
3495  __kmp_destroy_user_lock_with_checks_ =
3496  ( void ( * )( kmp_user_lock_p ) )
3497  ( &__kmp_destroy_drdpa_lock_with_checks );
3498 
3499  __kmp_acquire_nested_user_lock_with_checks_ =
3500  ( void ( * )( kmp_user_lock_p, kmp_int32 ) )
3501  ( &__kmp_acquire_nested_drdpa_lock_with_checks );
3502 
3503  __kmp_test_nested_user_lock_with_checks_ =
3504  ( int ( * )( kmp_user_lock_p, kmp_int32 ) )
3505  ( &__kmp_test_nested_drdpa_lock_with_checks );
3506 
3507  __kmp_release_nested_user_lock_with_checks_ =
3508  ( void ( * )( kmp_user_lock_p, kmp_int32 ) )
3509  ( &__kmp_release_nested_drdpa_lock_with_checks );
3510 
3511  __kmp_init_nested_user_lock_with_checks_ =
3512  ( void ( * )( kmp_user_lock_p ) )
3513  ( &__kmp_init_nested_drdpa_lock_with_checks );
3514 
3515  __kmp_destroy_nested_user_lock_with_checks_ =
3516  ( void ( * )( kmp_user_lock_p ) )
3517  ( &__kmp_destroy_nested_drdpa_lock_with_checks );
3518 
3519  __kmp_is_user_lock_initialized_ =
3520  ( int ( * )( kmp_user_lock_p ) )
3521  ( &__kmp_is_drdpa_lock_initialized );
3522 
3523  __kmp_get_user_lock_location_ =
3524  ( const ident_t * ( * )( kmp_user_lock_p ) )
3525  ( &__kmp_get_drdpa_lock_location );
3526 
3527  __kmp_set_user_lock_location_ =
3528  ( void ( * )( kmp_user_lock_p, const ident_t * ) )
3529  ( &__kmp_set_drdpa_lock_location );
3530 
3531  __kmp_get_user_lock_flags_ =
3532  ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) )
3533  ( &__kmp_get_drdpa_lock_flags );
3534 
3535  __kmp_set_user_lock_flags_ =
3536  ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) )
3537  ( &__kmp_set_drdpa_lock_flags );
3538  }
3539  break;
3540  }
3541 }
3542 
3543 
3544 // ----------------------------------------------------------------------------
3545 // User lock table & lock allocation
3546 
3547 kmp_lock_table_t __kmp_user_lock_table = { 1, 0, NULL };
3548 kmp_user_lock_p __kmp_lock_pool = NULL;
3549 
3550 // Lock block-allocation support.
3551 kmp_block_of_locks* __kmp_lock_blocks = NULL;
3552 int __kmp_num_locks_in_block = 1; // FIXME - tune this value
3553 
3554 static kmp_lock_index_t
3555 __kmp_lock_table_insert( kmp_user_lock_p lck )
3556 {
3557  // Assume that kmp_global_lock is held upon entry/exit.
3558  kmp_lock_index_t index;
3559  if ( __kmp_user_lock_table.used >= __kmp_user_lock_table.allocated ) {
3560  kmp_lock_index_t size;
3561  kmp_user_lock_p *table;
3562  kmp_lock_index_t i;
3563  // Reallocate lock table.
3564  if ( __kmp_user_lock_table.allocated == 0 ) {
3565  size = 1024;
3566  }
3567  else {
3568  size = __kmp_user_lock_table.allocated * 2;
3569  }
3570  table = (kmp_user_lock_p *)__kmp_allocate( sizeof( kmp_user_lock_p ) * size );
3571  memcpy( table + 1, __kmp_user_lock_table.table + 1, sizeof( kmp_user_lock_p ) * ( __kmp_user_lock_table.used - 1 ) );
3572  table[ 0 ] = (kmp_user_lock_p)__kmp_user_lock_table.table;
3573  // We cannot free the previos table now, sinse it may be in use by other
3574  // threads. So save the pointer to the previous table in in the first element of the
3575  // new table. All the tables will be organized into a list, and could be freed when
3576  // library shutting down.
3577  __kmp_user_lock_table.table = table;
3578  __kmp_user_lock_table.allocated = size;
3579  }
3580  KMP_DEBUG_ASSERT( __kmp_user_lock_table.used < __kmp_user_lock_table.allocated );
3581  index = __kmp_user_lock_table.used;
3582  __kmp_user_lock_table.table[ index ] = lck;
3583  ++ __kmp_user_lock_table.used;
3584  return index;
3585 }
3586 
3587 static kmp_user_lock_p
3588 __kmp_lock_block_allocate()
3589 {
3590  // Assume that kmp_global_lock is held upon entry/exit.
3591  static int last_index = 0;
3592  if ( ( last_index >= __kmp_num_locks_in_block )
3593  || ( __kmp_lock_blocks == NULL ) ) {
3594  // Restart the index.
3595  last_index = 0;
3596  // Need to allocate a new block.
3597  KMP_DEBUG_ASSERT( __kmp_user_lock_size > 0 );
3598  size_t space_for_locks = __kmp_user_lock_size * __kmp_num_locks_in_block;
3599  char* buffer = (char*)__kmp_allocate( space_for_locks + sizeof( kmp_block_of_locks ) );
3600  // Set up the new block.
3601  kmp_block_of_locks *new_block = (kmp_block_of_locks *)(& buffer[space_for_locks]);
3602  new_block->next_block = __kmp_lock_blocks;
3603  new_block->locks = (void *)buffer;
3604  // Publish the new block.
3605  KMP_MB();
3606  __kmp_lock_blocks = new_block;
3607  }
3608  kmp_user_lock_p ret = (kmp_user_lock_p)(& ( ( (char *)( __kmp_lock_blocks->locks ) )
3609  [ last_index * __kmp_user_lock_size ] ) );
3610  last_index++;
3611  return ret;
3612 }
3613 
3614 //
3615 // Get memory for a lock. It may be freshly allocated memory or reused memory
3616 // from lock pool.
3617 //
3618 kmp_user_lock_p
3619 __kmp_user_lock_allocate( void **user_lock, kmp_int32 gtid,
3620  kmp_lock_flags_t flags )
3621 {
3622  kmp_user_lock_p lck;
3623  kmp_lock_index_t index;
3624  KMP_DEBUG_ASSERT( user_lock );
3625 
3626  __kmp_acquire_lock( &__kmp_global_lock, gtid );
3627 
3628  if ( __kmp_lock_pool == NULL ) {
3629  // Lock pool is empty. Allocate new memory.
3630  if ( __kmp_num_locks_in_block <= 1 ) { // Tune this cutoff point.
3631  lck = (kmp_user_lock_p) __kmp_allocate( __kmp_user_lock_size );
3632  }
3633  else {
3634  lck = __kmp_lock_block_allocate();
3635  }
3636 
3637  // Insert lock in the table so that it can be freed in __kmp_cleanup,
3638  // and debugger has info on all allocated locks.
3639  index = __kmp_lock_table_insert( lck );
3640  }
3641  else {
3642  // Pick up lock from pool.
3643  lck = __kmp_lock_pool;
3644  index = __kmp_lock_pool->pool.index;
3645  __kmp_lock_pool = __kmp_lock_pool->pool.next;
3646  }
3647 
3648  //
3649  // We could potentially differentiate between nested and regular locks
3650  // here, and do the lock table lookup for regular locks only.
3651  //
3652  if ( OMP_LOCK_T_SIZE < sizeof(void *) ) {
3653  * ( (kmp_lock_index_t *) user_lock ) = index;
3654  }
3655  else {
3656  * ( (kmp_user_lock_p *) user_lock ) = lck;
3657  }
3658 
3659  // mark the lock if it is critical section lock.
3660  __kmp_set_user_lock_flags( lck, flags );
3661 
3662  __kmp_release_lock( & __kmp_global_lock, gtid ); // AC: TODO: move this line upper
3663 
3664  return lck;
3665 }
3666 
3667 // Put lock's memory to pool for reusing.
3668 void
3669 __kmp_user_lock_free( void **user_lock, kmp_int32 gtid, kmp_user_lock_p lck )
3670 {
3671  kmp_lock_pool_t * lock_pool;
3672 
3673  KMP_DEBUG_ASSERT( user_lock != NULL );
3674  KMP_DEBUG_ASSERT( lck != NULL );
3675 
3676  __kmp_acquire_lock( & __kmp_global_lock, gtid );
3677 
3678  lck->pool.next = __kmp_lock_pool;
3679  __kmp_lock_pool = lck;
3680  if ( OMP_LOCK_T_SIZE < sizeof(void *) ) {
3681  kmp_lock_index_t index = * ( (kmp_lock_index_t *) user_lock );
3682  KMP_DEBUG_ASSERT( 0 < index && index <= __kmp_user_lock_table.used );
3683  lck->pool.index = index;
3684  }
3685 
3686  __kmp_release_lock( & __kmp_global_lock, gtid );
3687 }
3688 
3689 kmp_user_lock_p
3690 __kmp_lookup_user_lock( void **user_lock, char const *func )
3691 {
3692  kmp_user_lock_p lck = NULL;
3693 
3694  if ( __kmp_env_consistency_check ) {
3695  if ( user_lock == NULL ) {
3696  KMP_FATAL( LockIsUninitialized, func );
3697  }
3698  }
3699 
3700  if ( OMP_LOCK_T_SIZE < sizeof(void *) ) {
3701  kmp_lock_index_t index = *( (kmp_lock_index_t *)user_lock );
3702  if ( __kmp_env_consistency_check ) {
3703  if ( ! ( 0 < index && index < __kmp_user_lock_table.used ) ) {
3704  KMP_FATAL( LockIsUninitialized, func );
3705  }
3706  }
3707  KMP_DEBUG_ASSERT( 0 < index && index < __kmp_user_lock_table.used );
3708  KMP_DEBUG_ASSERT( __kmp_user_lock_size > 0 );
3709  lck = __kmp_user_lock_table.table[index];
3710  }
3711  else {
3712  lck = *( (kmp_user_lock_p *)user_lock );
3713  }
3714 
3715  if ( __kmp_env_consistency_check ) {
3716  if ( lck == NULL ) {
3717  KMP_FATAL( LockIsUninitialized, func );
3718  }
3719  }
3720 
3721  return lck;
3722 }
3723 
3724 void
3725 __kmp_cleanup_user_locks( void )
3726 {
3727  //
3728  // Reset lock pool. Do not worry about lock in the pool -- we will free
3729  // them when iterating through lock table (it includes all the locks,
3730  // dead or alive).
3731  //
3732  __kmp_lock_pool = NULL;
3733 
3734 #define IS_CRITICAL(lck) \
3735  ( ( __kmp_get_user_lock_flags_ != NULL ) && \
3736  ( ( *__kmp_get_user_lock_flags_ )( lck ) & kmp_lf_critical_section ) )
3737 
3738  //
3739  // Loop through lock table, free all locks.
3740  //
3741  // Do not free item [0], it is reserved for lock tables list.
3742  //
3743  // FIXME - we are iterating through a list of (pointers to) objects of
3744  // type union kmp_user_lock, but we have no way of knowing whether the
3745  // base type is currently "pool" or whatever the global user lock type
3746  // is.
3747  //
3748  // We are relying on the fact that for all of the user lock types
3749  // (except "tas"), the first field in the lock struct is the "initialized"
3750  // field, which is set to the address of the lock object itself when
3751  // the lock is initialized. When the union is of type "pool", the
3752  // first field is a pointer to the next object in the free list, which
3753  // will not be the same address as the object itself.
3754  //
3755  // This means that the check ( *__kmp_is_user_lock_initialized_ )( lck )
3756  // will fail for "pool" objects on the free list. This must happen as
3757  // the "location" field of real user locks overlaps the "index" field
3758  // of "pool" objects.
3759  //
3760  // It would be better to run through the free list, and remove all "pool"
3761  // objects from the lock table before executing this loop. However,
3762  // "pool" objects do not always have their index field set (only on
3763  // lin_32e), and I don't want to search the lock table for the address
3764  // of every "pool" object on the free list.
3765  //
3766  while ( __kmp_user_lock_table.used > 1 ) {
3767  const ident *loc;
3768 
3769  //
3770  // reduce __kmp_user_lock_table.used before freeing the lock,
3771  // so that state of locks is consistent
3772  //
3773  kmp_user_lock_p lck = __kmp_user_lock_table.table[
3774  --__kmp_user_lock_table.used ];
3775 
3776  if ( ( __kmp_is_user_lock_initialized_ != NULL ) &&
3777  ( *__kmp_is_user_lock_initialized_ )( lck ) ) {
3778  //
3779  // Issue a warning if: KMP_CONSISTENCY_CHECK AND lock is
3780  // initialized AND it is NOT a critical section (user is not
3781  // responsible for destroying criticals) AND we know source
3782  // location to report.
3783  //
3784  if ( __kmp_env_consistency_check && ( ! IS_CRITICAL( lck ) ) &&
3785  ( ( loc = __kmp_get_user_lock_location( lck ) ) != NULL ) &&
3786  ( loc->psource != NULL ) ) {
3787  kmp_str_loc_t str_loc = __kmp_str_loc_init( loc->psource, 0 );
3788  KMP_WARNING( CnsLockNotDestroyed, str_loc.file, str_loc.func,
3789  str_loc.line, str_loc.col );
3790  __kmp_str_loc_free( &str_loc);
3791  }
3792 
3793 #ifdef KMP_DEBUG
3794  if ( IS_CRITICAL( lck ) ) {
3795  KA_TRACE( 20, ("__kmp_cleanup_user_locks: free critical section lock %p (%p)\n", lck, *(void**)lck ) );
3796  }
3797  else {
3798  KA_TRACE( 20, ("__kmp_cleanup_user_locks: free lock %p (%p)\n", lck, *(void**)lck ) );
3799  }
3800 #endif // KMP_DEBUG
3801 
3802  //
3803  // Cleanup internal lock dynamic resources
3804  // (for drdpa locks particularly).
3805  //
3806  __kmp_destroy_user_lock( lck );
3807  }
3808 
3809  //
3810  // Free the lock if block allocation of locks is not used.
3811  //
3812  if ( __kmp_lock_blocks == NULL ) {
3813  __kmp_free( lck );
3814  }
3815  }
3816 
3817 #undef IS_CRITICAL
3818 
3819  //
3820  // delete lock table(s).
3821  //
3822  kmp_user_lock_p *table_ptr = __kmp_user_lock_table.table;
3823  __kmp_user_lock_table.table = NULL;
3824  __kmp_user_lock_table.allocated = 0;
3825 
3826  while ( table_ptr != NULL ) {
3827  //
3828  // In the first element we saved the pointer to the previous
3829  // (smaller) lock table.
3830  //
3831  kmp_user_lock_p *next = (kmp_user_lock_p *)( table_ptr[ 0 ] );
3832  __kmp_free( table_ptr );
3833  table_ptr = next;
3834  }
3835 
3836  //
3837  // Free buffers allocated for blocks of locks.
3838  //
3839  kmp_block_of_locks_t *block_ptr = __kmp_lock_blocks;
3840  __kmp_lock_blocks = NULL;
3841 
3842  while ( block_ptr != NULL ) {
3843  kmp_block_of_locks_t *next = block_ptr->next_block;
3844  __kmp_free( block_ptr->locks );
3845  //
3846  // *block_ptr itself was allocated at the end of the locks vector.
3847  //
3848  block_ptr = next;
3849  }
3850 
3851  TCW_4(__kmp_init_user_locks, FALSE);
3852 }
3853 
Definition: kmp.h:200
char const * psource
Definition: kmp.h:209