Intel® OpenMP* Runtime Library
 All Classes Functions Variables Typedefs Enumerations Enumerator Groups Pages
kmp_runtime.c
1 /*
2  * kmp_runtime.c -- KPTS runtime support library
3  * $Revision: 42839 $
4  * $Date: 2013-11-24 13:01:00 -0600 (Sun, 24 Nov 2013) $
5  */
6 
7 /* <copyright>
8  Copyright (c) 1997-2013 Intel Corporation. All Rights Reserved.
9 
10  Redistribution and use in source and binary forms, with or without
11  modification, are permitted provided that the following conditions
12  are met:
13 
14  * Redistributions of source code must retain the above copyright
15  notice, this list of conditions and the following disclaimer.
16  * Redistributions in binary form must reproduce the above copyright
17  notice, this list of conditions and the following disclaimer in the
18  documentation and/or other materials provided with the distribution.
19  * Neither the name of Intel Corporation nor the names of its
20  contributors may be used to endorse or promote products derived
21  from this software without specific prior written permission.
22 
23  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27  HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
29  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
30  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
31  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
32  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
33  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 
35 </copyright> */
36 
37 #include "kmp.h"
38 #include "kmp_atomic.h"
39 #include "kmp_wrapper_getpid.h"
40 #include "kmp_environment.h"
41 #include "kmp_itt.h"
42 #include "kmp_str.h"
43 #include "kmp_settings.h"
44 #include "kmp_i18n.h"
45 #include "kmp_io.h"
46 #include "kmp_error.h"
47 
48 /* these are temporary issues to be dealt with */
49 #define KMP_USE_PRCTL 0
50 #define KMP_USE_POOLED_ALLOC 0
51 
52 #if KMP_MIC
53 #include <immintrin.h>
54 #define USE_NGO_STORES 1
55 #endif // KMP_MIC
56 
57 #if KMP_MIC && USE_NGO_STORES
58 #define load_icvs(src) __m512d Vt_icvs = _mm512_load_pd((void *)(src))
59 #define store_icvs(dst, src) _mm512_storenrngo_pd((void *)(dst), Vt_icvs)
60 #define sync_icvs() __asm__ volatile ("lock; addl $0,0(%%rsp)" ::: "memory")
61 #else
62 #define load_icvs(src) ((void)0)
63 #define store_icvs(dst, src) copy_icvs((dst), (src))
64 #define sync_icvs() ((void)0)
65 #endif /* KMP_MIC && USE_NGO_STORES */
66 
67 #if KMP_OS_WINDOWS
68 #include <process.h>
69 #endif
70 
71 
72 #if defined(KMP_GOMP_COMPAT)
73 char const __kmp_version_alt_comp[] = KMP_VERSION_PREFIX "alternative compiler support: yes";
74 #endif /* defined(KMP_GOMP_COMPAT) */
75 
76 char const __kmp_version_omp_api[] = KMP_VERSION_PREFIX "API version: "
77 #if OMP_40_ENABLED
78  "4.0 (201307)";
79 #elif OMP_30_ENABLED
80  "3.1 (201107)";
81 #else
82  "2.5 (200505)";
83 #endif
84 
85 #ifdef KMP_DEBUG
86 
87 char const __kmp_version_lock[] = KMP_VERSION_PREFIX "lock type: run time selectable";
88 
89 char const __kmp_version_perf_v19[] = KMP_VERSION_PREFIX "perf v19: "
90 #if KMP_PERF_V19 == KMP_ON
91  "on";
92 #elif KMP_PERF_V19 == KMP_OFF
93  "off";
94 #else
95  #error "Must specify KMP_PERF_V19 option"
96 #endif
97 
98 char const __kmp_version_perf_v106[] = KMP_VERSION_PREFIX "perf v106: "
99 #if KMP_PERF_V106 == KMP_ON
100  "on";
101 #elif KMP_PERF_V106 == KMP_OFF
102  "off";
103 #else
104  #error "Must specify KMP_PERF_V106 option"
105 #endif
106 
107 #endif /* KMP_DEBUG */
108 
109 
110 #define KMP_MIN( x, y ) ( (x) < (y) ? (x) : (y) )
111 
112 /* ------------------------------------------------------------------------ */
113 /* ------------------------------------------------------------------------ */
114 
115 kmp_info_t __kmp_monitor;
116 
117 /* ------------------------------------------------------------------------ */
118 /* ------------------------------------------------------------------------ */
119 
120 /* Forward declarations */
121 
122 void __kmp_cleanup( void );
123 
124 static void __kmp_initialize_info( kmp_info_t *, kmp_team_t *, int tid, int gtid );
125 static void __kmp_initialize_team(
126  kmp_team_t * team,
127  int new_nproc,
128  #if OMP_30_ENABLED
129  kmp_internal_control_t * new_icvs,
130  ident_t * loc
131  #else
132  int new_set_nproc, int new_set_dynamic, int new_set_nested,
133  int new_set_blocktime, int new_bt_intervals, int new_bt_set
134  #endif // OMP_30_ENABLED
135 );
136 static void __kmp_partition_places( kmp_team_t *team );
137 static void __kmp_do_serial_initialize( void );
138 
139 
140 #ifdef USE_LOAD_BALANCE
141 static int __kmp_load_balance_nproc( kmp_root_t * root, int set_nproc );
142 #endif
143 
144 static int __kmp_expand_threads(int nWish, int nNeed);
145 static int __kmp_unregister_root_other_thread( int gtid );
146 static void __kmp_unregister_library( void ); // called by __kmp_internal_end()
147 static void __kmp_reap_thread( kmp_info_t * thread, int is_root );
148 static kmp_info_t *__kmp_thread_pool_insert_pt = NULL;
149 
150 /* ------------------------------------------------------------------------ */
151 /* ------------------------------------------------------------------------ */
152 
153 /* Calculate the identifier of the current thread */
154 /* fast (and somewhat portable) way to get unique */
155 /* identifier of executing thread. */
156 /* returns KMP_GTID_DNE if we haven't been assigned a gtid */
157 
158 int
159 __kmp_get_global_thread_id( )
160 {
161  int i;
162  kmp_info_t **other_threads;
163  size_t stack_data;
164  char *stack_addr;
165  size_t stack_size;
166  char *stack_base;
167 
168  KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id: entering, nproc=%d all_nproc=%d\n",
169  __kmp_nth, __kmp_all_nth ));
170 
171  /* JPH - to handle the case where __kmpc_end(0) is called immediately prior to a
172  parallel region, made it return KMP_GTID_DNE to force serial_initialize by
173  caller. Had to handle KMP_GTID_DNE at all call-sites, or else guarantee
174  __kmp_init_gtid for this to work. */
175 
176  if ( !TCR_4(__kmp_init_gtid) ) return KMP_GTID_DNE;
177 
178 #ifdef KMP_TDATA_GTID
179  if ( TCR_4(__kmp_gtid_mode) >= 3) {
180  KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id: using TDATA\n" ));
181  return __kmp_gtid;
182  }
183 #endif
184  if ( TCR_4(__kmp_gtid_mode) >= 2) {
185  KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id: using keyed TLS\n" ));
186  return __kmp_gtid_get_specific();
187  }
188  KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id: using internal alg.\n" ));
189 
190  stack_addr = (char*) & stack_data;
191  other_threads = __kmp_threads;
192 
193  /*
194  ATT: The code below is a source of potential bugs due to unsynchronized access to
195  __kmp_threads array. For example:
196  1. Current thread loads other_threads[i] to thr and checks it, it is non-NULL.
197  2. Current thread is suspended by OS.
198  3. Another thread unregisters and finishes (debug versions of free() may fill memory
199  with something like 0xEF).
200  4. Current thread is resumed.
201  5. Current thread reads junk from *thr.
202  TODO: Fix it.
203  --ln
204  */
205 
206  for( i = 0 ; i < __kmp_threads_capacity ; i++ ) {
207 
208  kmp_info_t *thr = (kmp_info_t *)TCR_SYNC_PTR(other_threads[i]);
209  if( !thr ) continue;
210 
211  stack_size = (size_t)TCR_PTR(thr -> th.th_info.ds.ds_stacksize);
212  stack_base = (char *)TCR_PTR(thr -> th.th_info.ds.ds_stackbase);
213 
214  /* stack grows down -- search through all of the active threads */
215 
216  if( stack_addr <= stack_base ) {
217  size_t stack_diff = stack_base - stack_addr;
218 
219  if( stack_diff <= stack_size ) {
220  /* The only way we can be closer than the allocated */
221  /* stack size is if we are running on this thread. */
222  KMP_DEBUG_ASSERT( __kmp_gtid_get_specific() == i );
223  return i;
224  }
225  }
226  }
227 
228  /* get specific to try and determine our gtid */
229  KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id: internal alg. failed to find "
230  "thread, using TLS\n" ));
231  i = __kmp_gtid_get_specific();
232 
233  /*fprintf( stderr, "=== %d\n", i ); */ /* GROO */
234 
235  /* if we havn't been assigned a gtid, then return code */
236  if( i<0 ) return i;
237 
238  /* dynamically updated stack window for uber threads to avoid get_specific call */
239  if( ! TCR_4(other_threads[i]->th.th_info.ds.ds_stackgrow) ) {
240  KMP_FATAL( StackOverflow, i );
241  }
242 
243  stack_base = (char *) other_threads[i] -> th.th_info.ds.ds_stackbase;
244  if( stack_addr > stack_base ) {
245  TCW_PTR(other_threads[i]->th.th_info.ds.ds_stackbase, stack_addr);
246  TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,
247  other_threads[i]->th.th_info.ds.ds_stacksize + stack_addr - stack_base);
248  } else {
249  TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize, stack_base - stack_addr);
250  }
251 
252  /* Reprint stack bounds for ubermaster since they have been refined */
253  if ( __kmp_storage_map ) {
254  char *stack_end = (char *) other_threads[i] -> th.th_info.ds.ds_stackbase;
255  char *stack_beg = stack_end - other_threads[i] -> th.th_info.ds.ds_stacksize;
256  __kmp_print_storage_map_gtid( i, stack_beg, stack_end,
257  other_threads[i] -> th.th_info.ds.ds_stacksize,
258  "th_%d stack (refinement)", i );
259  }
260  return i;
261 }
262 
263 int
264 __kmp_get_global_thread_id_reg( )
265 {
266  int gtid;
267 
268  if ( !__kmp_init_serial ) {
269  gtid = KMP_GTID_DNE;
270  } else
271 #ifdef KMP_TDATA_GTID
272  if ( TCR_4(__kmp_gtid_mode) >= 3 ) {
273  KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id_reg: using TDATA\n" ));
274  gtid = __kmp_gtid;
275  } else
276 #endif
277  if ( TCR_4(__kmp_gtid_mode) >= 2 ) {
278  KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id_reg: using keyed TLS\n" ));
279  gtid = __kmp_gtid_get_specific();
280  } else {
281  KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id_reg: using internal alg.\n" ));
282  gtid = __kmp_get_global_thread_id();
283  }
284 
285  /* we must be a new uber master sibling thread */
286  if( gtid == KMP_GTID_DNE ) {
287  KA_TRACE( 10, ( "__kmp_get_global_thread_id_reg: Encountered new root thread. "
288  "Registering a new gtid.\n" ));
289  __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
290  if( !__kmp_init_serial ) {
291  __kmp_do_serial_initialize();
292  gtid = __kmp_gtid_get_specific();
293  } else {
294  gtid = __kmp_register_root(FALSE);
295  }
296  __kmp_release_bootstrap_lock( &__kmp_initz_lock );
297  /*__kmp_printf( "+++ %d\n", gtid ); */ /* GROO */
298  }
299 
300  KMP_DEBUG_ASSERT( gtid >=0 );
301 
302  return gtid;
303 }
304 
305 /* caller must hold forkjoin_lock */
306 void
307 __kmp_check_stack_overlap( kmp_info_t *th )
308 {
309  int f;
310  char *stack_beg = NULL;
311  char *stack_end = NULL;
312  int gtid;
313 
314  KA_TRACE(10,("__kmp_check_stack_overlap: called\n"));
315  if ( __kmp_storage_map ) {
316  stack_end = (char *) th -> th.th_info.ds.ds_stackbase;
317  stack_beg = stack_end - th -> th.th_info.ds.ds_stacksize;
318 
319  gtid = __kmp_gtid_from_thread( th );
320 
321  if (gtid == KMP_GTID_MONITOR) {
322  __kmp_print_storage_map_gtid( gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
323  "th_%s stack (%s)", "mon",
324  ( th->th.th_info.ds.ds_stackgrow ) ? "initial" : "actual" );
325  } else {
326  __kmp_print_storage_map_gtid( gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
327  "th_%d stack (%s)", gtid,
328  ( th->th.th_info.ds.ds_stackgrow ) ? "initial" : "actual" );
329  }
330  }
331 
332  /* No point in checking ubermaster threads since they use refinement and cannot overlap */
333  if ( __kmp_env_checks == TRUE && !KMP_UBER_GTID(gtid = __kmp_gtid_from_thread( th )))
334  {
335  KA_TRACE(10,("__kmp_check_stack_overlap: performing extensive checking\n"));
336  if ( stack_beg == NULL ) {
337  stack_end = (char *) th -> th.th_info.ds.ds_stackbase;
338  stack_beg = stack_end - th -> th.th_info.ds.ds_stacksize;
339  }
340 
341  for( f=0 ; f < __kmp_threads_capacity ; f++ ) {
342  kmp_info_t *f_th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[f]);
343 
344  if( f_th && f_th != th ) {
345  char *other_stack_end = (char *)TCR_PTR(f_th->th.th_info.ds.ds_stackbase);
346  char *other_stack_beg = other_stack_end -
347  (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize);
348  if((stack_beg > other_stack_beg && stack_beg < other_stack_end) ||
349  (stack_end > other_stack_beg && stack_end < other_stack_end)) {
350 
351  /* Print the other stack values before the abort */
352  if ( __kmp_storage_map )
353  __kmp_print_storage_map_gtid( -1, other_stack_beg, other_stack_end,
354  (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize),
355  "th_%d stack (overlapped)",
356  __kmp_gtid_from_thread( f_th ) );
357 
358  __kmp_msg( kmp_ms_fatal, KMP_MSG( StackOverlap ), KMP_HNT( ChangeStackLimit ), __kmp_msg_null );
359  }
360  }
361  }
362  }
363  KA_TRACE(10,("__kmp_check_stack_overlap: returning\n"));
364 }
365 
366 
367 /* ------------------------------------------------------------------------ */
368 
369 #ifndef KMP_DEBUG
370 # define __kmp_static_delay( arg ) /* nothing to do */
371 #else
372 
373 static void
374 __kmp_static_delay( int arg )
375 {
376 /* Work around weird code-gen bug that causes assert to trip */
377 # if KMP_ARCH_X86_64 && KMP_OS_LINUX
378  KMP_ASSERT( arg != 0 );
379 # else
380  KMP_ASSERT( arg >= 0 );
381 # endif
382 }
383 #endif /* KMP_DEBUG */
384 
385 static void
386 __kmp_static_yield( int arg )
387 {
388  __kmp_yield( arg );
389 }
390 
391 /*
392  * Spin wait loop that first does pause, then yield, then sleep.
393  * Wait until spinner is equal to checker to exit.
394  *
395  * A thread that calls __kmp_wait_sleep must make certain that another thread
396  * calls __kmp_release to wake it back up up to prevent deadlocks!
397  */
398 
399 void
400 __kmp_wait_sleep( kmp_info_t *this_thr,
401  volatile kmp_uint *spinner,
402  kmp_uint checker,
403  int final_spin
404  USE_ITT_BUILD_ARG (void * itt_sync_obj)
405 )
406 {
407  /* note: we may not belong to a team at this point */
408  register volatile kmp_uint *spin = spinner;
409  register kmp_uint check = checker;
410  register kmp_uint32 spins;
411  register kmp_uint32 hibernate;
412  int th_gtid, th_tid;
413 #if OMP_30_ENABLED
414  int flag = FALSE;
415 #endif /* OMP_30_ENABLED */
416 
417  KMP_FSYNC_SPIN_INIT( spin, NULL );
418  if( TCR_4(*spin) == check ) {
419  KMP_FSYNC_SPIN_ACQUIRED( spin );
420  return;
421  }
422 
423  th_gtid = this_thr->th.th_info.ds.ds_gtid;
424 
425  KA_TRACE( 20, ("__kmp_wait_sleep: T#%d waiting for spin(%p) == %d\n",
426  th_gtid,
427  spin, check ) );
428 
429  /* setup for waiting */
430  KMP_INIT_YIELD( spins );
431 
432  if ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) {
433  //
434  // The worker threads cannot rely on the team struct existing at this
435  // point. Use the bt values cached in the thread struct instead.
436  //
437  #ifdef KMP_ADJUST_BLOCKTIME
438  if ( __kmp_zero_bt && ! this_thr->th.th_team_bt_set ) {
439  /* force immediate suspend if not set by user and more threads than available procs */
440  hibernate = 0;
441  } else {
442  hibernate = this_thr->th.th_team_bt_intervals;
443  }
444  #else
445  hibernate = this_thr->th.th_team_bt_intervals;
446  #endif /* KMP_ADJUST_BLOCKTIME */
447 
448  //
449  // If the blocktime is nonzero, we want to make sure that we spin
450  // wait for the entirety of the specified #intervals, plus up to
451  // one interval more. This increment make certain that this thread
452  // doesn't go to sleep too soon.
453  //
454  if ( hibernate != 0 ) {
455  hibernate++;
456  }
457 
458  //
459  // Add in the current time value.
460  //
461  hibernate += TCR_4( __kmp_global.g.g_time.dt.t_value );
462 
463  KF_TRACE( 20, ("__kmp_wait_sleep: T#%d now=%d, hibernate=%d, intervals=%d\n",
464  th_gtid, __kmp_global.g.g_time.dt.t_value, hibernate,
465  hibernate - __kmp_global.g.g_time.dt.t_value ));
466  }
467 
468  KMP_MB();
469 
470  /* main wait spin loop */
471  while( TCR_4(*spin) != check ) {
472  int in_pool;
473 
474 #if OMP_30_ENABLED
475  //
476  // If the task team is NULL, it means one of things:
477  // 1) A newly-created thread is first being released by
478  // __kmp_fork_barrier(), and its task team has not been set up
479  // yet.
480  // 2) All tasks have been executed to completion, this thread has
481  // decremented the task team's ref ct and possibly deallocated
482  // it, and should no longer reference it.
483  // 3) Tasking is off for this region. This could be because we
484  // are in a serialized region (perhaps the outer one), or else
485  // tasking was manually disabled (KMP_TASKING=0).
486  //
487  kmp_task_team_t * task_team = NULL;
488  if ( __kmp_tasking_mode != tskm_immediate_exec ) {
489  task_team = this_thr->th.th_task_team;
490  if ( task_team != NULL ) {
491  if ( ! TCR_SYNC_4( task_team->tt.tt_active ) ) {
492  KMP_DEBUG_ASSERT( ! KMP_MASTER_TID( this_thr->th.th_info.ds.ds_tid ) );
493  __kmp_unref_task_team( task_team, this_thr );
494  } else if ( KMP_TASKING_ENABLED( task_team, this_thr->th.th_task_state ) ) {
495  __kmp_execute_tasks( this_thr, th_gtid, spin, check, final_spin, &flag
496  USE_ITT_BUILD_ARG( itt_sync_obj ), 0);
497  }
498  }; // if
499  }; // if
500 #endif /* OMP_30_ENABLED */
501 
502  KMP_FSYNC_SPIN_PREPARE( spin );
503  if( TCR_4(__kmp_global.g.g_done) ) {
504  if( __kmp_global.g.g_abort )
505  __kmp_abort_thread( );
506  break;
507  }
508 
509  __kmp_static_delay( 1 );
510 
511  /* if we are oversubscribed,
512  or have waited a bit (and KMP_LIBRARY=throughput), then yield */
513  KMP_YIELD( TCR_4(__kmp_nth) > __kmp_avail_proc );
514  // TODO: Should it be number of cores instead of thread contexts? Like:
515  // KMP_YIELD( TCR_4(__kmp_nth) > __kmp_ncores );
516  // Need performance improvement data to make the change...
517  KMP_YIELD_SPIN( spins );
518 
519  //
520  // Check if this thread was transferred from a team
521  // to the thread pool (or vice-versa) while spinning.
522  //
523  in_pool = !!TCR_4(this_thr->th.th_in_pool);
524  if ( in_pool != !!this_thr->th.th_active_in_pool ) {
525  if ( in_pool ) {
526  //
527  // recently transferred from team to pool
528  //
529  KMP_TEST_THEN_INC32(
530  (kmp_int32 *) &__kmp_thread_pool_active_nth );
531  this_thr->th.th_active_in_pool = TRUE;
532 
533  //
534  // Here, we cannot assert that
535  //
536  // KMP_DEBUG_ASSERT( TCR_4(__kmp_thread_pool_active_nth)
537  // <= __kmp_thread_pool_nth );
538  //
539  // __kmp_thread_pool_nth is inc/dec'd by the master thread
540  // while the fork/join lock is held, whereas
541  // __kmp_thread_pool_active_nth is inc/dec'd asynchronously
542  // by the workers. The two can get out of sync for brief
543  // periods of time.
544  //
545  }
546  else {
547  //
548  // recently transferred from pool to team
549  //
550  KMP_TEST_THEN_DEC32(
551  (kmp_int32 *) &__kmp_thread_pool_active_nth );
552  KMP_DEBUG_ASSERT( TCR_4(__kmp_thread_pool_active_nth) >= 0 );
553  this_thr->th.th_active_in_pool = FALSE;
554  }
555  }
556 
557 #if OMP_30_ENABLED
558  // Don't suspend if there is a likelihood of new tasks being spawned.
559  if ( ( task_team != NULL ) && TCR_4(task_team->tt.tt_found_tasks) ) {
560  continue;
561  }
562 #endif /* OMP_30_ENABLED */
563 
564  /* Don't suspend if KMP_BLOCKTIME is set to "infinite" */
565  if ( __kmp_dflt_blocktime == KMP_MAX_BLOCKTIME ) {
566  continue;
567  }
568 
569  /* if we have waited a bit more, fall asleep */
570  if ( TCR_4( __kmp_global.g.g_time.dt.t_value ) < hibernate ) {
571  continue;
572  }
573 
574  KF_TRACE( 50, ("__kmp_wait_sleep: T#%d suspend time reached\n", th_gtid ) );
575 
576  __kmp_suspend( th_gtid, spin, check );
577 
578  if( TCR_4( __kmp_global.g.g_done ) && __kmp_global.g.g_abort ) {
579  __kmp_abort_thread( );
580  }
581 
582  /* TODO */
583  /* if thread is done with work and timesout, disband/free */
584  }
585 
586  KMP_FSYNC_SPIN_ACQUIRED( spin );
587 }
588 
589 
590 /*
591  * Release the thread specified by target_thr from waiting by setting the location
592  * specified by spin and resume the thread if indicated by the sleep parameter.
593  *
594  * A thread that calls __kmp_wait_sleep must call this function to wake up the
595  * potentially sleeping thread and prevent deadlocks!
596  */
597 
598 void
599 __kmp_release( kmp_info_t *target_thr, volatile kmp_uint *spin,
600  enum kmp_mem_fence_type fetchadd_fence )
601 {
602  kmp_uint old_spin;
603  #ifdef KMP_DEBUG
604  int target_gtid = target_thr->th.th_info.ds.ds_gtid;
605  int gtid = TCR_4(__kmp_init_gtid) ? __kmp_get_gtid() : -1;
606  #endif
607 
608  KF_TRACE( 20, ( "__kmp_release: T#%d releasing T#%d spin(%p) fence_type(%d)\n",
609  gtid, target_gtid, spin, fetchadd_fence ));
610 
611  KMP_DEBUG_ASSERT( spin );
612 
613  KMP_DEBUG_ASSERT( fetchadd_fence == kmp_acquire_fence ||
614  fetchadd_fence == kmp_release_fence );
615 
616  KMP_FSYNC_RELEASING( spin );
617 
618  old_spin = ( fetchadd_fence == kmp_acquire_fence )
619  ? KMP_TEST_THEN_ADD4_ACQ32( (volatile kmp_int32 *) spin )
620  : KMP_TEST_THEN_ADD4_32( (volatile kmp_int32 *) spin );
621 
622  KF_TRACE( 100, ( "__kmp_release: T#%d old spin(%p)=%d, set new spin=%d\n",
623  gtid, spin, old_spin, *spin ) );
624 
625  if ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) {
626  /* Only need to check sleep stuff if infinite block time not set */
627  if ( old_spin & KMP_BARRIER_SLEEP_STATE ) {
628  #ifndef KMP_DEBUG
629  int target_gtid = target_thr->th.th_info.ds.ds_gtid;
630  #endif
631  /* wake up thread if needed */
632  KF_TRACE( 50, ( "__kmp_release: T#%d waking up thread T#%d since sleep spin(%p) set\n",
633  gtid, target_gtid, spin ));
634  __kmp_resume( target_gtid, spin );
635  } else {
636  KF_TRACE( 50, ( "__kmp_release: T#%d don't wake up thread T#%d since sleep spin(%p) not set\n",
637  gtid, target_gtid, spin ));
638  }
639  }
640 }
641 
642 /* ------------------------------------------------------------------------ */
643 
644 void
645 __kmp_infinite_loop( void )
646 {
647  static int done = FALSE;
648 
649  while (! done) {
650  KMP_YIELD( 1 );
651  }
652 }
653 
654 #define MAX_MESSAGE 512
655 
656 void
657 __kmp_print_storage_map_gtid( int gtid, void *p1, void *p2, size_t size, char const *format, ...) {
658  char buffer[MAX_MESSAGE];
659  int node;
660  va_list ap;
661 
662  va_start( ap, format);
663  sprintf( buffer, "OMP storage map: %p %p%8lu %s\n", p1, p2, (unsigned long) size, format );
664  __kmp_acquire_bootstrap_lock( & __kmp_stdio_lock );
665  __kmp_vprintf( kmp_err, buffer, ap );
666 #if KMP_PRINT_DATA_PLACEMENT
667  if(gtid >= 0) {
668  if(p1 <= p2 && (char*)p2 - (char*)p1 == size) {
669  if( __kmp_storage_map_verbose ) {
670  node = __kmp_get_host_node(p1);
671  if(node < 0) /* doesn't work, so don't try this next time */
672  __kmp_storage_map_verbose = FALSE;
673  else {
674  char *last;
675  int lastNode;
676  int localProc = __kmp_get_cpu_from_gtid(gtid);
677 
678  p1 = (void *)( (size_t)p1 & ~((size_t)PAGE_SIZE - 1) );
679  p2 = (void *)( ((size_t) p2 - 1) & ~((size_t)PAGE_SIZE - 1) );
680  if(localProc >= 0)
681  __kmp_printf_no_lock(" GTID %d localNode %d\n", gtid, localProc>>1);
682  else
683  __kmp_printf_no_lock(" GTID %d\n", gtid);
684 # if KMP_USE_PRCTL
685 /* The more elaborate format is disabled for now because of the prctl hanging bug. */
686  do {
687  last = p1;
688  lastNode = node;
689  /* This loop collates adjacent pages with the same host node. */
690  do {
691  (char*)p1 += PAGE_SIZE;
692  } while(p1 <= p2 && (node = __kmp_get_host_node(p1)) == lastNode);
693  __kmp_printf_no_lock(" %p-%p memNode %d\n", last,
694  (char*)p1 - 1, lastNode);
695  } while(p1 <= p2);
696 # else
697  __kmp_printf_no_lock(" %p-%p memNode %d\n", p1,
698  (char*)p1 + (PAGE_SIZE - 1), __kmp_get_host_node(p1));
699  if(p1 < p2) {
700  __kmp_printf_no_lock(" %p-%p memNode %d\n", p2,
701  (char*)p2 + (PAGE_SIZE - 1), __kmp_get_host_node(p2));
702  }
703 # endif
704  }
705  }
706  } else
707  __kmp_printf_no_lock(" %s\n", KMP_I18N_STR( StorageMapWarning ) );
708  }
709 #endif /* KMP_PRINT_DATA_PLACEMENT */
710  __kmp_release_bootstrap_lock( & __kmp_stdio_lock );
711 }
712 
713 void
714 __kmp_warn( char const * format, ... )
715 {
716  char buffer[MAX_MESSAGE];
717  va_list ap;
718 
719  if ( __kmp_generate_warnings == kmp_warnings_off ) {
720  return;
721  }
722 
723  va_start( ap, format );
724 
725  snprintf( buffer, sizeof(buffer) , "OMP warning: %s\n", format );
726  __kmp_acquire_bootstrap_lock( & __kmp_stdio_lock );
727  __kmp_vprintf( kmp_err, buffer, ap );
728  __kmp_release_bootstrap_lock( & __kmp_stdio_lock );
729 
730  va_end( ap );
731 }
732 
733 void
734 __kmp_abort_process()
735 {
736 
737  // Later threads may stall here, but that's ok because abort() will kill them.
738  __kmp_acquire_bootstrap_lock( & __kmp_exit_lock );
739 
740  if ( __kmp_debug_buf ) {
741  __kmp_dump_debug_buffer();
742  }; // if
743 
744  if ( KMP_OS_WINDOWS ) {
745  // Let other threads know of abnormal termination and prevent deadlock
746  // if abort happened during library initialization or shutdown
747  __kmp_global.g.g_abort = SIGABRT;
748 
749  /*
750  On Windows* OS by default abort() causes pop-up error box, which stalls nightly testing.
751  Unfortunately, we cannot reliably suppress pop-up error boxes. _set_abort_behavior()
752  works well, but this function is not available in VS7 (this is not problem for DLL, but
753  it is a problem for static OpenMP RTL). SetErrorMode (and so, timelimit utility) does
754  not help, at least in some versions of MS C RTL.
755 
756  It seems following sequence is the only way to simulate abort() and avoid pop-up error
757  box.
758  */
759  raise( SIGABRT );
760  _exit( 3 ); // Just in case, if signal ignored, exit anyway.
761  } else {
762  abort();
763  }; // if
764 
765  __kmp_infinite_loop();
766  __kmp_release_bootstrap_lock( & __kmp_exit_lock );
767 
768 } // __kmp_abort_process
769 
770 void
771 __kmp_abort_thread( void )
772 {
773  // TODO: Eliminate g_abort global variable and this function.
774  // In case of abort just call abort(), it will kill all the threads.
775  __kmp_infinite_loop();
776 } // __kmp_abort_thread
777 
778 /* ------------------------------------------------------------------------ */
779 
780 /*
781  * Print out the storage map for the major kmp_info_t thread data structures
782  * that are allocated together.
783  */
784 
785 static void
786 __kmp_print_thread_storage_map( kmp_info_t *thr, int gtid )
787 {
788  __kmp_print_storage_map_gtid( gtid, thr, thr + 1, sizeof(kmp_info_t), "th_%d", gtid );
789 
790  __kmp_print_storage_map_gtid( gtid, &thr->th.th_info, &thr->th.th_team, sizeof(kmp_desc_t),
791  "th_%d.th_info", gtid );
792 
793  __kmp_print_storage_map_gtid( gtid, &thr->th.th_local, &thr->th.th_pri_head, sizeof(kmp_local_t),
794  "th_%d.th_local", gtid );
795 
796  __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[0], &thr->th.th_bar[bs_last_barrier],
797  sizeof(kmp_balign_t) * bs_last_barrier, "th_%d.th_bar", gtid );
798 
799  __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[bs_plain_barrier],
800  &thr->th.th_bar[bs_plain_barrier+1],
801  sizeof(kmp_balign_t), "th_%d.th_bar[plain]", gtid);
802 
803  __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[bs_forkjoin_barrier],
804  &thr->th.th_bar[bs_forkjoin_barrier+1],
805  sizeof(kmp_balign_t), "th_%d.th_bar[forkjoin]", gtid);
806 
807  #if KMP_FAST_REDUCTION_BARRIER
808  __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[bs_reduction_barrier],
809  &thr->th.th_bar[bs_reduction_barrier+1],
810  sizeof(kmp_balign_t), "th_%d.th_bar[reduction]", gtid);
811  #endif // KMP_FAST_REDUCTION_BARRIER
812 }
813 
814 /*
815  * Print out the storage map for the major kmp_team_t team data structures
816  * that are allocated together.
817  */
818 
819 static void
820 __kmp_print_team_storage_map( const char *header, kmp_team_t *team, int team_id, int num_thr )
821 {
822  int num_disp_buff = team->t.t_max_nproc > 1 ? KMP_MAX_DISP_BUF : 2;
823  __kmp_print_storage_map_gtid( -1, team, team + 1, sizeof(kmp_team_t), "%s_%d",
824  header, team_id );
825 
826  __kmp_print_storage_map_gtid( -1, &team->t.t_bar[0], &team->t.t_bar[bs_last_barrier],
827  sizeof(kmp_balign_team_t) * bs_last_barrier, "%s_%d.t_bar", header, team_id );
828 
829 
830  __kmp_print_storage_map_gtid( -1, &team->t.t_bar[bs_plain_barrier], &team->t.t_bar[bs_plain_barrier+1],
831  sizeof(kmp_balign_team_t), "%s_%d.t_bar[plain]", header, team_id );
832 
833  __kmp_print_storage_map_gtid( -1, &team->t.t_bar[bs_forkjoin_barrier], &team->t.t_bar[bs_forkjoin_barrier+1],
834  sizeof(kmp_balign_team_t), "%s_%d.t_bar[forkjoin]", header, team_id );
835 
836  #if KMP_FAST_REDUCTION_BARRIER
837  __kmp_print_storage_map_gtid( -1, &team->t.t_bar[bs_reduction_barrier], &team->t.t_bar[bs_reduction_barrier+1],
838  sizeof(kmp_balign_team_t), "%s_%d.t_bar[reduction]", header, team_id );
839  #endif // KMP_FAST_REDUCTION_BARRIER
840 
841  __kmp_print_storage_map_gtid( -1, &team->t.t_dispatch[0], &team->t.t_dispatch[num_thr],
842  sizeof(kmp_disp_t) * num_thr, "%s_%d.t_dispatch", header, team_id );
843 
844  __kmp_print_storage_map_gtid( -1, &team->t.t_threads[0], &team->t.t_threads[num_thr],
845  sizeof(kmp_info_t *) * num_thr, "%s_%d.t_threads", header, team_id );
846 
847  __kmp_print_storage_map_gtid( -1, &team->t.t_disp_buffer[0], &team->t.t_disp_buffer[num_disp_buff],
848  sizeof(dispatch_shared_info_t) * num_disp_buff, "%s_%d.t_disp_buffer",
849  header, team_id );
850 
851  /*
852  __kmp_print_storage_map_gtid( -1, &team->t.t_set_nproc[0], &team->t.t_set_nproc[num_thr],
853  sizeof(int) * num_thr, "%s_%d.t_set_nproc", header, team_id );
854 
855  __kmp_print_storage_map_gtid( -1, &team->t.t_set_dynamic[0], &team->t.t_set_dynamic[num_thr],
856  sizeof(int) * num_thr, "%s_%d.t_set_dynamic", header, team_id );
857 
858  __kmp_print_storage_map_gtid( -1, &team->t.t_set_nested[0], &team->t.t_set_nested[num_thr],
859  sizeof(int) * num_thr, "%s_%d.t_set_nested", header, team_id );
860 
861  __kmp_print_storage_map_gtid( -1, &team->t.t_set_blocktime[0], &team->t.t_set_blocktime[num_thr],
862  sizeof(int) * num_thr, "%s_%d.t_set_nproc", header, team_id );
863 
864  __kmp_print_storage_map_gtid( -1, &team->t.t_set_bt_intervals[0], &team->t.t_set_bt_intervals[num_thr],
865  sizeof(int) * num_thr, "%s_%d.t_set_dynamic", header, team_id );
866 
867  __kmp_print_storage_map_gtid( -1, &team->t.t_set_bt_set[0], &team->t.t_set_bt_set[num_thr],
868  sizeof(int) * num_thr, "%s_%d.t_set_nested", header, team_id );
869 
870 #if OMP_30_ENABLED
871  //__kmp_print_storage_map_gtid( -1, &team->t.t_set_max_active_levels[0], &team->t.t_set_max_active_levels[num_thr],
872  // sizeof(int) * num_thr, "%s_%d.t_set_max_active_levels", header, team_id );
873 
874  __kmp_print_storage_map_gtid( -1, &team->t.t_set_sched[0], &team->t.t_set_sched[num_thr],
875  sizeof(kmp_r_sched_t) * num_thr, "%s_%d.t_set_sched", header, team_id );
876 #endif // OMP_30_ENABLED
877 #if OMP_40_ENABLED
878  __kmp_print_storage_map_gtid( -1, &team->t.t_set_proc_bind[0], &team->t.t_set_proc_bind[num_thr],
879  sizeof(kmp_proc_bind_t) * num_thr, "%s_%d.t_set_proc_bind", header, team_id );
880 #endif
881  */
882 
883  __kmp_print_storage_map_gtid( -1, &team->t.t_taskq, &team->t.t_copypriv_data,
884  sizeof(kmp_taskq_t), "%s_%d.t_taskq", header, team_id );
885 }
886 
887 static void __kmp_init_allocator() {}
888 static void __kmp_fini_allocator() {}
889 static void __kmp_fini_allocator_thread() {}
890 
891 /* ------------------------------------------------------------------------ */
892 
893 #ifdef GUIDEDLL_EXPORTS
894 # if KMP_OS_WINDOWS
895 
896 
897 static void
898 __kmp_reset_lock( kmp_bootstrap_lock_t* lck ) {
899  // TODO: Change to __kmp_break_bootstrap_lock().
900  __kmp_init_bootstrap_lock( lck ); // make the lock released
901 }
902 
903 static void
904 __kmp_reset_locks_on_process_detach( int gtid_req ) {
905  int i;
906  int thread_count;
907 
908  // PROCESS_DETACH is expected to be called by a thread
909  // that executes ProcessExit() or FreeLibrary().
910  // OS terminates other threads (except the one calling ProcessExit or FreeLibrary).
911  // So, it might be safe to access the __kmp_threads[] without taking the forkjoin_lock.
912  // However, in fact, some threads can be still alive here, although being about to be terminated.
913  // The threads in the array with ds_thread==0 are most suspicious.
914  // Actually, it can be not safe to access the __kmp_threads[].
915 
916  // TODO: does it make sense to check __kmp_roots[] ?
917 
918  // Let's check that there are no other alive threads registered with the OMP lib.
919  while( 1 ) {
920  thread_count = 0;
921  for( i = 0; i < __kmp_threads_capacity; ++i ) {
922  if( !__kmp_threads ) continue;
923  kmp_info_t* th = __kmp_threads[ i ];
924  if( th == NULL ) continue;
925  int gtid = th->th.th_info.ds.ds_gtid;
926  if( gtid == gtid_req ) continue;
927  if( gtid < 0 ) continue;
928  DWORD exit_val;
929  int alive = __kmp_is_thread_alive( th, &exit_val );
930  if( alive ) {
931  ++thread_count;
932  }
933  }
934  if( thread_count == 0 ) break; // success
935  }
936 
937  // Assume that I'm alone.
938 
939  // Now it might be probably safe to check and reset locks.
940  // __kmp_forkjoin_lock and __kmp_stdio_lock are expected to be reset.
941  __kmp_reset_lock( &__kmp_forkjoin_lock );
942  #ifdef KMP_DEBUG
943  __kmp_reset_lock( &__kmp_stdio_lock );
944  #endif // KMP_DEBUG
945 
946 
947 }
948 
949 BOOL WINAPI
950 DllMain( HINSTANCE hInstDLL, DWORD fdwReason, LPVOID lpReserved ) {
951  //__kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
952 
953  switch( fdwReason ) {
954 
955  case DLL_PROCESS_ATTACH:
956  KA_TRACE( 10, ("DllMain: PROCESS_ATTACH\n" ));
957 
958  return TRUE;
959 
960  case DLL_PROCESS_DETACH:
961  KA_TRACE( 10, ("DllMain: PROCESS_DETACH T#%d\n",
962  __kmp_gtid_get_specific() ));
963 
964  if( lpReserved != NULL )
965  {
966  // lpReserved is used for telling the difference:
967  // lpReserved == NULL when FreeLibrary() was called,
968  // lpReserved != NULL when the process terminates.
969  // When FreeLibrary() is called, worker threads remain alive.
970  // So they will release the forkjoin lock by themselves.
971  // When the process terminates, worker threads disappear triggering
972  // the problem of unreleased forkjoin lock as described below.
973 
974  // A worker thread can take the forkjoin lock
975  // in __kmp_suspend()->__kmp_rml_decrease_load_before_sleep().
976  // The problem comes up if that worker thread becomes dead
977  // before it releases the forkjoin lock.
978  // The forkjoin lock remains taken, while the thread
979  // executing DllMain()->PROCESS_DETACH->__kmp_internal_end_library() below
980  // will try to take the forkjoin lock and will always fail,
981  // so that the application will never finish [normally].
982  // This scenario is possible if __kmpc_end() has not been executed.
983  // It looks like it's not a corner case, but common cases:
984  // - the main function was compiled by an alternative compiler;
985  // - the main function was compiled by icl but without /Qopenmp (application with plugins);
986  // - application terminates by calling C exit(), Fortran CALL EXIT() or Fortran STOP.
987  // - alive foreign thread prevented __kmpc_end from doing cleanup.
988 
989  // This is a hack to work around the problem.
990  // TODO: !!! to figure out something better.
991  __kmp_reset_locks_on_process_detach( __kmp_gtid_get_specific() );
992  }
993 
994  __kmp_internal_end_library( __kmp_gtid_get_specific() );
995 
996  return TRUE;
997 
998  case DLL_THREAD_ATTACH:
999  KA_TRACE( 10, ("DllMain: THREAD_ATTACH\n" ));
1000 
1001  /* if we wanted to register new siblings all the time here call
1002  * __kmp_get_gtid(); */
1003  return TRUE;
1004 
1005  case DLL_THREAD_DETACH:
1006  KA_TRACE( 10, ("DllMain: THREAD_DETACH T#%d\n",
1007  __kmp_gtid_get_specific() ));
1008 
1009  __kmp_internal_end_thread( __kmp_gtid_get_specific() );
1010  return TRUE;
1011  }
1012 
1013  return TRUE;
1014 }
1015 
1016 # endif /* KMP_OS_WINDOWS */
1017 #endif /* GUIDEDLL_EXPORTS */
1018 
1019 
1020 /* ------------------------------------------------------------------------ */
1021 
1022 /* Change the library type to "status" and return the old type */
1023 /* called from within initialization routines where __kmp_initz_lock is held */
1024 int
1025 __kmp_change_library( int status )
1026 {
1027  int old_status;
1028 
1029  old_status = __kmp_yield_init & 1; // check whether KMP_LIBRARY=throughput (even init count)
1030 
1031  if (status) {
1032  __kmp_yield_init |= 1; // throughput => turnaround (odd init count)
1033  }
1034  else {
1035  __kmp_yield_init &= ~1; // turnaround => throughput (even init count)
1036  }
1037 
1038  return old_status; // return previous setting of whether KMP_LIBRARY=throughput
1039 }
1040 
1041 /* ------------------------------------------------------------------------ */
1042 /* ------------------------------------------------------------------------ */
1043 
1044 /* __kmp_parallel_deo --
1045  * Wait until it's our turn.
1046  */
1047 void
1048 __kmp_parallel_deo( int *gtid_ref, int *cid_ref, ident_t *loc_ref )
1049 {
1050  int gtid = *gtid_ref;
1051 #ifdef BUILD_PARALLEL_ORDERED
1052  kmp_team_t *team = __kmp_team_from_gtid( gtid );
1053 #endif /* BUILD_PARALLEL_ORDERED */
1054 
1055  if( __kmp_env_consistency_check ) {
1056  if( __kmp_threads[gtid] -> th.th_root -> r.r_active )
1057  __kmp_push_sync( gtid, ct_ordered_in_parallel, loc_ref, NULL );
1058  }
1059 #ifdef BUILD_PARALLEL_ORDERED
1060  if( !team -> t.t_serialized ) {
1061  kmp_uint32 spins;
1062 
1063  KMP_MB();
1064  KMP_WAIT_YIELD(&team -> t.t_ordered.dt.t_value, __kmp_tid_from_gtid( gtid ), KMP_EQ, NULL);
1065  KMP_MB();
1066  }
1067 #endif /* BUILD_PARALLEL_ORDERED */
1068 }
1069 
1070 /* __kmp_parallel_dxo --
1071  * Signal the next task.
1072  */
1073 
1074 void
1075 __kmp_parallel_dxo( int *gtid_ref, int *cid_ref, ident_t *loc_ref )
1076 {
1077  int gtid = *gtid_ref;
1078 #ifdef BUILD_PARALLEL_ORDERED
1079  int tid = __kmp_tid_from_gtid( gtid );
1080  kmp_team_t *team = __kmp_team_from_gtid( gtid );
1081 #endif /* BUILD_PARALLEL_ORDERED */
1082 
1083  if( __kmp_env_consistency_check ) {
1084  if( __kmp_threads[gtid] -> th.th_root -> r.r_active )
1085  __kmp_pop_sync( gtid, ct_ordered_in_parallel, loc_ref );
1086  }
1087 #ifdef BUILD_PARALLEL_ORDERED
1088  if ( ! team -> t.t_serialized ) {
1089  KMP_MB(); /* Flush all pending memory write invalidates. */
1090 
1091  /* use the tid of the next thread in this team */
1092  /* TODO repleace with general release procedure */
1093  team -> t.t_ordered.dt.t_value = ((tid + 1) % team->t.t_nproc );
1094 
1095  KMP_MB(); /* Flush all pending memory write invalidates. */
1096  }
1097 #endif /* BUILD_PARALLEL_ORDERED */
1098 }
1099 
1100 /* ------------------------------------------------------------------------ */
1101 /* ------------------------------------------------------------------------ */
1102 
1103 /* ------------------------------------------------------------------------ */
1104 /* ------------------------------------------------------------------------ */
1105 
1106 /* The BARRIER for a SINGLE process section is always explicit */
1107 
1108 int
1109 __kmp_enter_single( int gtid, ident_t *id_ref, int push_ws )
1110 {
1111  int status;
1112  kmp_info_t *th;
1113  kmp_team_t *team;
1114 
1115  if( ! TCR_4(__kmp_init_parallel) )
1116  __kmp_parallel_initialize();
1117 
1118  th = __kmp_threads[ gtid ];
1119  team = th -> th.th_team;
1120  status = 0;
1121 
1122  th->th.th_ident = id_ref;
1123 
1124  if ( team -> t.t_serialized ) {
1125  status = 1;
1126  } else {
1127  kmp_int32 old_this = th->th.th_local.this_construct;
1128 
1129  ++th->th.th_local.this_construct;
1130  /* try to set team count to thread count--success means thread got the
1131  single block
1132  */
1133  /* TODO: Should this be acquire or release? */
1134  status = KMP_COMPARE_AND_STORE_ACQ32(&team -> t.t_construct, old_this,
1135  th->th.th_local.this_construct);
1136  }
1137 
1138  if( __kmp_env_consistency_check ) {
1139  if (status && push_ws) {
1140  __kmp_push_workshare( gtid, ct_psingle, id_ref );
1141  } else {
1142  __kmp_check_workshare( gtid, ct_psingle, id_ref );
1143  }
1144  }
1145 #if USE_ITT_BUILD
1146  if ( status ) {
1147  __kmp_itt_single_start( gtid );
1148  }
1149 #endif /* USE_ITT_BUILD */
1150  return status;
1151 }
1152 
1153 void
1154 __kmp_exit_single( int gtid )
1155 {
1156 #if USE_ITT_BUILD
1157  __kmp_itt_single_end( gtid );
1158 #endif /* USE_ITT_BUILD */
1159  if( __kmp_env_consistency_check )
1160  __kmp_pop_workshare( gtid, ct_psingle, NULL );
1161 }
1162 
1163 
1164 /* ------------------------------------------------------------------------ */
1165 /* ------------------------------------------------------------------------ */
1166 
1167 static void
1168 __kmp_linear_barrier_gather( enum barrier_type bt,
1169  kmp_info_t *this_thr,
1170  int gtid,
1171  int tid,
1172  void (*reduce)(void *, void *)
1173  USE_ITT_BUILD_ARG(void * itt_sync_obj)
1174  )
1175 {
1176  register kmp_team_t *team = this_thr -> th.th_team;
1177  register kmp_bstate_t *thr_bar = & this_thr -> th.th_bar[ bt ].bb;
1178  register kmp_info_t **other_threads = team -> t.t_threads;
1179 
1180  KA_TRACE( 20, ("__kmp_linear_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n",
1181  gtid, team->t.t_id, tid, bt ) );
1182 
1183  KMP_DEBUG_ASSERT( this_thr == other_threads[this_thr->th.th_info.ds.ds_tid] );
1184 
1185  /*
1186  * We now perform a linear reduction to signal that all
1187  * of the threads have arrived.
1188  *
1189  * Collect all the worker team member threads.
1190  */
1191  if ( ! KMP_MASTER_TID( tid )) {
1192 
1193  KA_TRACE( 20, ( "__kmp_linear_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d)"
1194  "arrived(%p): %u => %u\n",
1195  gtid, team->t.t_id, tid,
1196  __kmp_gtid_from_tid( 0, team ), team->t.t_id, 0,
1197  &thr_bar -> b_arrived, thr_bar -> b_arrived,
1198  thr_bar -> b_arrived + KMP_BARRIER_STATE_BUMP
1199  ) );
1200 
1201  /* mark arrival to master thread */
1202  //
1203  // After performing this write, a worker thread may not assume that
1204  // the team is valid any more - it could be deallocated by the master
1205  // thread at any time.
1206  //
1207  __kmp_release( other_threads[0], &thr_bar -> b_arrived, kmp_release_fence );
1208 
1209  } else {
1210  register kmp_balign_team_t *team_bar = & team -> t.t_bar[ bt ];
1211  register int nproc = this_thr -> th.th_team_nproc;
1212  register int i;
1213  /* Don't have to worry about sleep bit here or atomic since team setting */
1214  register kmp_uint new_state = team_bar -> b_arrived + KMP_BARRIER_STATE_BUMP;
1215 
1216  /* Collect all the worker team member threads. */
1217  for (i = 1; i < nproc; i++) {
1218 #if KMP_CACHE_MANAGE
1219  /* prefetch next thread's arrived count */
1220  if ( i+1 < nproc )
1221  KMP_CACHE_PREFETCH( &other_threads[ i+1 ] -> th.th_bar[ bt ].bb.b_arrived );
1222 #endif /* KMP_CACHE_MANAGE */
1223  KA_TRACE( 20, ( "__kmp_linear_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%d) "
1224  "arrived(%p) == %u\n",
1225  gtid, team->t.t_id, tid,
1226  __kmp_gtid_from_tid( i, team ), team->t.t_id, i,
1227  &other_threads[i] -> th.th_bar[ bt ].bb.b_arrived,
1228  new_state ) );
1229 
1230  /* wait for worker thread to arrive */
1231  __kmp_wait_sleep( this_thr,
1232  & other_threads[ i ] -> th.th_bar[ bt ].bb.b_arrived,
1233  new_state, FALSE
1234  USE_ITT_BUILD_ARG( itt_sync_obj )
1235  );
1236 
1237  if (reduce) {
1238 
1239  KA_TRACE( 100, ( "__kmp_linear_barrier_gather: T#%d(%d:%d) += T#%d(%d:%d)\n",
1240  gtid, team->t.t_id, tid,
1241  __kmp_gtid_from_tid( i, team ), team->t.t_id, i ) );
1242 
1243  (*reduce)( this_thr -> th.th_local.reduce_data,
1244  other_threads[ i ] -> th.th_local.reduce_data );
1245 
1246  }
1247 
1248  }
1249 
1250  /* Don't have to worry about sleep bit here or atomic since team setting */
1251  team_bar -> b_arrived = new_state;
1252  KA_TRACE( 20, ( "__kmp_linear_barrier_gather: T#%d(%d:%d) set team %d "
1253  "arrived(%p) = %u\n",
1254  gtid, team->t.t_id, tid, team->t.t_id,
1255  &team_bar -> b_arrived, new_state ) );
1256  }
1257 
1258  KA_TRACE( 20, ( "__kmp_linear_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n",
1259  gtid, team->t.t_id, tid, bt ) );
1260 }
1261 
1262 
1263 static void
1264 __kmp_tree_barrier_gather( enum barrier_type bt,
1265  kmp_info_t *this_thr,
1266  int gtid,
1267  int tid,
1268  void (*reduce) (void *, void *)
1269  USE_ITT_BUILD_ARG( void * itt_sync_obj )
1270  )
1271 {
1272  register kmp_team_t *team = this_thr -> th.th_team;
1273  register kmp_bstate_t *thr_bar = & this_thr -> th.th_bar[ bt ].bb;
1274  register kmp_info_t **other_threads = team -> t.t_threads;
1275  register kmp_uint32 nproc = this_thr -> th.th_team_nproc;
1276  register kmp_uint32 branch_bits = __kmp_barrier_gather_branch_bits[ bt ];
1277  register kmp_uint32 branch_factor = 1 << branch_bits ;
1278  register kmp_uint32 child;
1279  register kmp_uint32 child_tid;
1280  register kmp_uint new_state;
1281 
1282  KA_TRACE( 20, ( "__kmp_tree_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n",
1283  gtid, team->t.t_id, tid, bt ) );
1284 
1285  KMP_DEBUG_ASSERT( this_thr == other_threads[this_thr->th.th_info.ds.ds_tid] );
1286 
1287  /*
1288  * We now perform a tree gather to wait until all
1289  * of the threads have arrived, and reduce any required data
1290  * as we go.
1291  */
1292 
1293  child_tid = (tid << branch_bits) + 1;
1294 
1295  if ( child_tid < nproc ) {
1296 
1297  /* parent threads wait for all their children to arrive */
1298  new_state = team -> t.t_bar[ bt ].b_arrived + KMP_BARRIER_STATE_BUMP;
1299  child = 1;
1300 
1301  do {
1302  register kmp_info_t *child_thr = other_threads[ child_tid ];
1303  register kmp_bstate_t *child_bar = & child_thr -> th.th_bar[ bt ].bb;
1304 #if KMP_CACHE_MANAGE
1305  /* prefetch next thread's arrived count */
1306  if ( child+1 <= branch_factor && child_tid+1 < nproc )
1307  KMP_CACHE_PREFETCH( &other_threads[ child_tid+1 ] -> th.th_bar[ bt ].bb.b_arrived );
1308 #endif /* KMP_CACHE_MANAGE */
1309  KA_TRACE( 20, ( "__kmp_tree_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%u) "
1310  "arrived(%p) == %u\n",
1311  gtid, team->t.t_id, tid,
1312  __kmp_gtid_from_tid( child_tid, team ), team->t.t_id, child_tid,
1313  &child_bar -> b_arrived, new_state ) );
1314 
1315  /* wait for child to arrive */
1316  __kmp_wait_sleep( this_thr, &child_bar -> b_arrived, new_state, FALSE
1317  USE_ITT_BUILD_ARG( itt_sync_obj)
1318  );
1319 
1320  if (reduce) {
1321 
1322  KA_TRACE( 100, ( "__kmp_tree_barrier_gather: T#%d(%d:%d) += T#%d(%d:%u)\n",
1323  gtid, team->t.t_id, tid,
1324  __kmp_gtid_from_tid( child_tid, team ), team->t.t_id,
1325  child_tid ) );
1326 
1327  (*reduce)( this_thr -> th.th_local.reduce_data,
1328  child_thr -> th.th_local.reduce_data );
1329 
1330  }
1331 
1332  child++;
1333  child_tid++;
1334  }
1335  while ( child <= branch_factor && child_tid < nproc );
1336  }
1337 
1338  if ( !KMP_MASTER_TID(tid) ) {
1339  /* worker threads */
1340  register kmp_int32 parent_tid = (tid - 1) >> branch_bits;
1341 
1342  KA_TRACE( 20, ( "__kmp_tree_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) "
1343  "arrived(%p): %u => %u\n",
1344  gtid, team->t.t_id, tid,
1345  __kmp_gtid_from_tid( parent_tid, team ), team->t.t_id, parent_tid,
1346  &thr_bar -> b_arrived, thr_bar -> b_arrived,
1347  thr_bar -> b_arrived + KMP_BARRIER_STATE_BUMP
1348  ) );
1349 
1350  /* mark arrival to parent thread */
1351  //
1352  // After performing this write, a worker thread may not assume that
1353  // the team is valid any more - it could be deallocated by the master
1354  // thread at any time.
1355  //
1356  __kmp_release( other_threads[parent_tid], &thr_bar -> b_arrived, kmp_release_fence );
1357 
1358  } else {
1359  /* Need to update the team arrived pointer if we are the master thread */
1360 
1361  if ( nproc > 1 )
1362  /* New value was already computed above */
1363  team -> t.t_bar[ bt ].b_arrived = new_state;
1364  else
1365  team -> t.t_bar[ bt ].b_arrived += KMP_BARRIER_STATE_BUMP;
1366 
1367  KA_TRACE( 20, ( "__kmp_tree_barrier_gather: T#%d(%d:%d) set team %d arrived(%p) = %u\n",
1368  gtid, team->t.t_id, tid, team->t.t_id,
1369  &team->t.t_bar[bt].b_arrived, team->t.t_bar[bt].b_arrived ) );
1370  }
1371 
1372  KA_TRACE( 20, ( "__kmp_tree_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n",
1373  gtid, team->t.t_id, tid, bt ) );
1374 }
1375 
1376 
1377 static void
1378 __kmp_hyper_barrier_gather( enum barrier_type bt,
1379  kmp_info_t *this_thr,
1380  int gtid,
1381  int tid,
1382  void (*reduce) (void *, void *)
1383  USE_ITT_BUILD_ARG (void * itt_sync_obj)
1384  )
1385 {
1386  register kmp_team_t *team = this_thr -> th.th_team;
1387  register kmp_bstate_t *thr_bar = & this_thr -> th.th_bar[ bt ].bb;
1388  register kmp_info_t **other_threads = team -> t.t_threads;
1389  register kmp_uint new_state = KMP_BARRIER_UNUSED_STATE;
1390  register kmp_uint32 num_threads = this_thr -> th.th_team_nproc;
1391  register kmp_uint32 branch_bits = __kmp_barrier_gather_branch_bits[ bt ];
1392  register kmp_uint32 branch_factor = 1 << branch_bits ;
1393  register kmp_uint32 offset;
1394  register kmp_uint32 level;
1395 
1396  KA_TRACE( 20, ( "__kmp_hyper_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n",
1397  gtid, team->t.t_id, tid, bt ) );
1398 
1399  KMP_DEBUG_ASSERT( this_thr == other_threads[this_thr->th.th_info.ds.ds_tid] );
1400 
1401 #if USE_ITT_BUILD && USE_ITT_NOTIFY
1402  // Barrier imbalance - save arrive time to the thread
1403  if( __kmp_forkjoin_frames_mode == 2 || __kmp_forkjoin_frames_mode == 3 ) {
1404  this_thr->th.th_bar_arrive_time = __itt_get_timestamp();
1405  }
1406 #endif
1407  /*
1408  * We now perform a hypercube-embedded tree gather to wait until all
1409  * of the threads have arrived, and reduce any required data
1410  * as we go.
1411  */
1412 
1413  for ( level=0, offset =1;
1414  offset < num_threads;
1415  level += branch_bits, offset <<= branch_bits )
1416  {
1417  register kmp_uint32 child;
1418  register kmp_uint32 child_tid;
1419 
1420  if ( ((tid >> level) & (branch_factor - 1)) != 0 ) {
1421  register kmp_int32 parent_tid = tid & ~( (1 << (level + branch_bits)) -1 );
1422 
1423  KA_TRACE( 20, ( "__kmp_hyper_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) "
1424  "arrived(%p): %u => %u\n",
1425  gtid, team->t.t_id, tid,
1426  __kmp_gtid_from_tid( parent_tid, team ), team->t.t_id, parent_tid,
1427  &thr_bar -> b_arrived, thr_bar -> b_arrived,
1428  thr_bar -> b_arrived + KMP_BARRIER_STATE_BUMP
1429  ) );
1430 
1431  /* mark arrival to parent thread */
1432  //
1433  // After performing this write (in the last iteration of the
1434  // enclosing for loop), a worker thread may not assume that the
1435  // team is valid any more - it could be deallocated by the master
1436  // thread at any time.
1437  //
1438  __kmp_release( other_threads[parent_tid], &thr_bar -> b_arrived, kmp_release_fence );
1439  break;
1440  }
1441 
1442  /* parent threads wait for children to arrive */
1443 
1444  if (new_state == KMP_BARRIER_UNUSED_STATE)
1445  new_state = team -> t.t_bar[ bt ].b_arrived + KMP_BARRIER_STATE_BUMP;
1446 
1447  for ( child = 1, child_tid = tid + (1 << level);
1448  child < branch_factor && child_tid < num_threads;
1449  child++, child_tid += (1 << level) )
1450  {
1451  register kmp_info_t *child_thr = other_threads[ child_tid ];
1452  register kmp_bstate_t *child_bar = & child_thr -> th.th_bar[ bt ].bb;
1453 #if KMP_CACHE_MANAGE
1454  register kmp_uint32 next_child_tid = child_tid + (1 << level);
1455  /* prefetch next thread's arrived count */
1456  if ( child+1 < branch_factor && next_child_tid < num_threads )
1457  KMP_CACHE_PREFETCH( &other_threads[ next_child_tid ] -> th.th_bar[ bt ].bb.b_arrived );
1458 #endif /* KMP_CACHE_MANAGE */
1459  KA_TRACE( 20, ( "__kmp_hyper_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%u) "
1460  "arrived(%p) == %u\n",
1461  gtid, team->t.t_id, tid,
1462  __kmp_gtid_from_tid( child_tid, team ), team->t.t_id, child_tid,
1463  &child_bar -> b_arrived, new_state ) );
1464 
1465  /* wait for child to arrive */
1466  __kmp_wait_sleep( this_thr, &child_bar -> b_arrived, new_state, FALSE
1467  USE_ITT_BUILD_ARG (itt_sync_obj)
1468  );
1469 
1470 #if USE_ITT_BUILD
1471  // Barrier imbalance - write min of the thread time and a child time to the thread.
1472  if( __kmp_forkjoin_frames_mode == 2 || __kmp_forkjoin_frames_mode == 3 ) {
1473  this_thr->th.th_bar_arrive_time = KMP_MIN( this_thr->th.th_bar_arrive_time, child_thr->th.th_bar_arrive_time );
1474  }
1475 #endif
1476  if (reduce) {
1477 
1478  KA_TRACE( 100, ( "__kmp_hyper_barrier_gather: T#%d(%d:%d) += T#%d(%d:%u)\n",
1479  gtid, team->t.t_id, tid,
1480  __kmp_gtid_from_tid( child_tid, team ), team->t.t_id,
1481  child_tid ) );
1482 
1483  (*reduce)( this_thr -> th.th_local.reduce_data,
1484  child_thr -> th.th_local.reduce_data );
1485 
1486  }
1487  }
1488  }
1489 
1490 
1491  if ( KMP_MASTER_TID(tid) ) {
1492  /* Need to update the team arrived pointer if we are the master thread */
1493 
1494  if (new_state == KMP_BARRIER_UNUSED_STATE)
1495  team -> t.t_bar[ bt ].b_arrived += KMP_BARRIER_STATE_BUMP;
1496  else
1497  team -> t.t_bar[ bt ].b_arrived = new_state;
1498 
1499  KA_TRACE( 20, ( "__kmp_hyper_barrier_gather: T#%d(%d:%d) set team %d arrived(%p) = %u\n",
1500  gtid, team->t.t_id, tid, team->t.t_id,
1501  &team->t.t_bar[bt].b_arrived, team->t.t_bar[bt].b_arrived ) );
1502  }
1503 
1504  KA_TRACE( 20, ( "__kmp_hyper_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n",
1505  gtid, team->t.t_id, tid, bt ) );
1506 
1507 }
1508 
1509 static void
1510 __kmp_linear_barrier_release( enum barrier_type bt,
1511  kmp_info_t *this_thr,
1512  int gtid,
1513  int tid,
1514  int propagate_icvs
1515  USE_ITT_BUILD_ARG(void * itt_sync_obj)
1516  )
1517 {
1518  register kmp_bstate_t *thr_bar = &this_thr -> th.th_bar[ bt ].bb;
1519  register kmp_team_t *team;
1520 
1521  if (KMP_MASTER_TID( tid )) {
1522  register unsigned int i;
1523  register kmp_uint32 nproc = this_thr -> th.th_team_nproc;
1524  register kmp_info_t **other_threads;
1525 
1526  team = __kmp_threads[ gtid ]-> th.th_team;
1527  KMP_DEBUG_ASSERT( team != NULL );
1528  other_threads = team -> t.t_threads;
1529 
1530  KA_TRACE( 20, ( "__kmp_linear_barrier_release: T#%d(%d:%d) master enter for barrier type %d\n",
1531  gtid, team->t.t_id, tid, bt ) );
1532 
1533  if (nproc > 1) {
1534 #if KMP_BARRIER_ICV_PUSH
1535  if ( propagate_icvs ) {
1536  load_icvs(&team->t.t_implicit_task_taskdata[0].td_icvs);
1537  for (i = 1; i < nproc; i++) {
1538  __kmp_init_implicit_task( team->t.t_ident,
1539  team->t.t_threads[i], team, i, FALSE );
1540  store_icvs(&team->t.t_implicit_task_taskdata[i].td_icvs, &team->t.t_implicit_task_taskdata[0].td_icvs);
1541  }
1542  sync_icvs();
1543  }
1544 #endif // KMP_BARRIER_ICV_PUSH
1545 
1546  /* Now, release all of the worker threads */
1547  for (i = 1; i < nproc; i++) {
1548 #if KMP_CACHE_MANAGE
1549  /* prefetch next thread's go flag */
1550  if( i+1 < nproc )
1551  KMP_CACHE_PREFETCH( &other_threads[ i+1 ]-> th.th_bar[ bt ].bb.b_go );
1552 #endif /* KMP_CACHE_MANAGE */
1553  KA_TRACE( 20, ( "__kmp_linear_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%d) "
1554  "go(%p): %u => %u\n",
1555  gtid, team->t.t_id, tid,
1556  other_threads[i]->th.th_info.ds.ds_gtid, team->t.t_id, i,
1557  &other_threads[i]->th.th_bar[bt].bb.b_go,
1558  other_threads[i]->th.th_bar[bt].bb.b_go,
1559  other_threads[i]->th.th_bar[bt].bb.b_go + KMP_BARRIER_STATE_BUMP
1560  ) );
1561 
1562  __kmp_release( other_threads[ i ],
1563  &other_threads[ i ]-> th.th_bar[ bt ].bb.b_go, kmp_acquire_fence );
1564  }
1565  }
1566  } else {
1567  /* Wait for the MASTER thread to release us */
1568 
1569  KA_TRACE( 20, ( "__kmp_linear_barrier_release: T#%d wait go(%p) == %u\n",
1570  gtid, &thr_bar -> b_go, KMP_BARRIER_STATE_BUMP ) );
1571 
1572  __kmp_wait_sleep( this_thr, &thr_bar -> b_go, KMP_BARRIER_STATE_BUMP, TRUE
1573  USE_ITT_BUILD_ARG(itt_sync_obj)
1574  );
1575 
1576 #if USE_ITT_BUILD && OMP_30_ENABLED && USE_ITT_NOTIFY
1577  if ( ( __itt_sync_create_ptr && itt_sync_obj == NULL ) || KMP_ITT_DEBUG ) {
1578  // we are on a fork barrier where we could not get the object reliably (or ITTNOTIFY is disabled)
1579  itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier, 0, -1 );
1580  // cancel wait on previous parallel region...
1581  __kmp_itt_task_starting( itt_sync_obj );
1582 
1583  if ( bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done) )
1584  return;
1585 
1586  itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
1587  if ( itt_sync_obj != NULL )
1588  __kmp_itt_task_finished( itt_sync_obj ); // call prepare as early as possible for "new" barrier
1589 
1590  } else
1591 #endif /* USE_ITT_BUILD && OMP_30_ENABLED && USE_ITT_NOTIFY */
1592  //
1593  // early exit for reaping threads releasing forkjoin barrier
1594  //
1595  if ( bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done) )
1596  return;
1597 
1598  //
1599  // The worker thread may now assume that the team is valid.
1600  //
1601 #if USE_ITT_BUILD && !OMP_30_ENABLED && USE_ITT_NOTIFY
1602  // libguide only code (cannot use *itt_task* routines)
1603  if ( ( __itt_sync_create_ptr && itt_sync_obj == NULL ) || KMP_ITT_DEBUG ) {
1604  // we are on a fork barrier where we could not get the object reliably
1605  itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
1606  __kmp_itt_barrier_starting( gtid, itt_sync_obj ); // no need to call releasing, but we have paired calls...
1607  }
1608 #endif /* USE_ITT_BUILD && !OMP_30_ENABLED && USE_ITT_NOTIFY */
1609  #ifdef KMP_DEBUG
1610  tid = __kmp_tid_from_gtid( gtid );
1611  team = __kmp_threads[ gtid ]-> th.th_team;
1612  #endif
1613  KMP_DEBUG_ASSERT( team != NULL );
1614 
1615  TCW_4(thr_bar->b_go, KMP_INIT_BARRIER_STATE);
1616  KA_TRACE( 20, ("__kmp_linear_barrier_release: T#%d(%d:%d) set go(%p) = %u\n",
1617  gtid, team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE ) );
1618 
1619  KMP_MB(); /* Flush all pending memory write invalidates. */
1620  }
1621 
1622  KA_TRACE( 20, ( "__kmp_linear_barrier_release: T#%d(%d:%d) exit for barrier type %d\n",
1623  gtid, team->t.t_id, tid, bt ) );
1624 }
1625 
1626 
1627 static void
1628 __kmp_tree_barrier_release( enum barrier_type bt,
1629  kmp_info_t *this_thr,
1630  int gtid,
1631  int tid,
1632  int propagate_icvs
1633  USE_ITT_BUILD_ARG(void * itt_sync_obj)
1634  )
1635 {
1636  /* handle fork barrier workers who aren't part of a team yet */
1637  register kmp_team_t *team;
1638  register kmp_bstate_t *thr_bar = & this_thr -> th.th_bar[ bt ].bb;
1639  register kmp_uint32 nproc;
1640  register kmp_uint32 branch_bits = __kmp_barrier_release_branch_bits[ bt ];
1641  register kmp_uint32 branch_factor = 1 << branch_bits ;
1642  register kmp_uint32 child;
1643  register kmp_uint32 child_tid;
1644 
1645  /*
1646  * We now perform a tree release for all
1647  * of the threads that have been gathered
1648  */
1649 
1650  if ( ! KMP_MASTER_TID( tid )) {
1651  /* worker threads */
1652 
1653  KA_TRACE( 20, ( "__kmp_tree_barrier_release: T#%d wait go(%p) == %u\n",
1654  gtid, &thr_bar -> b_go, KMP_BARRIER_STATE_BUMP ) );
1655 
1656  /* wait for parent thread to release us */
1657  __kmp_wait_sleep( this_thr, &thr_bar -> b_go, KMP_BARRIER_STATE_BUMP, TRUE
1658  USE_ITT_BUILD_ARG(itt_sync_obj)
1659  );
1660 
1661 #if USE_ITT_BUILD && OMP_30_ENABLED && USE_ITT_NOTIFY
1662  if ( ( __itt_sync_create_ptr && itt_sync_obj == NULL ) || KMP_ITT_DEBUG ) {
1663  // we are on a fork barrier where we could not get the object reliably (or ITTNOTIFY is disabled)
1664  itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier, 0, -1 );
1665  // cancel wait on previous parallel region...
1666  __kmp_itt_task_starting( itt_sync_obj );
1667 
1668  if ( bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done) )
1669  return;
1670 
1671  itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
1672  if ( itt_sync_obj != NULL )
1673  __kmp_itt_task_finished( itt_sync_obj ); // call prepare as early as possible for "new" barrier
1674 
1675  } else
1676 #endif /* USE_ITT_BUILD && OMP_30_ENABLED && USE_ITT_NOTIFY */
1677  //
1678  // early exit for reaping threads releasing forkjoin barrier
1679  //
1680  if ( bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done) )
1681  return;
1682 
1683  //
1684  // The worker thread may now assume that the team is valid.
1685  //
1686 #if USE_ITT_BUILD && !OMP_30_ENABLED && USE_ITT_NOTIFY
1687  // libguide only code (cannot use *itt_task* routines)
1688  if ( ( __itt_sync_create_ptr && itt_sync_obj == NULL ) || KMP_ITT_DEBUG ) {
1689  // we are on a fork barrier where we could not get the object reliably
1690  itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
1691  __kmp_itt_barrier_starting( gtid, itt_sync_obj ); // no need to call releasing, but we have paired calls...
1692  }
1693 #endif /* USE_ITT_BUILD && !OMP_30_ENABLED && USE_ITT_NOTIFY */
1694  team = __kmp_threads[ gtid ]-> th.th_team;
1695  KMP_DEBUG_ASSERT( team != NULL );
1696  tid = __kmp_tid_from_gtid( gtid );
1697 
1698  TCW_4(thr_bar->b_go, KMP_INIT_BARRIER_STATE);
1699  KA_TRACE( 20, ( "__kmp_tree_barrier_release: T#%d(%d:%d) set go(%p) = %u\n",
1700  gtid, team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE ) );
1701 
1702  KMP_MB(); /* Flush all pending memory write invalidates. */
1703 
1704  } else {
1705  team = __kmp_threads[ gtid ]-> th.th_team;
1706  KMP_DEBUG_ASSERT( team != NULL );
1707 
1708  KA_TRACE( 20, ( "__kmp_tree_barrier_release: T#%d(%d:%d) master enter for barrier type %d\n",
1709  gtid, team->t.t_id, tid, bt ) );
1710  }
1711 
1712  nproc = this_thr -> th.th_team_nproc;
1713  child_tid = ( tid << branch_bits ) + 1;
1714 
1715  if ( child_tid < nproc ) {
1716  register kmp_info_t **other_threads = team -> t.t_threads;
1717  child = 1;
1718  /* parent threads release all their children */
1719 
1720  do {
1721  register kmp_info_t *child_thr = other_threads[ child_tid ];
1722  register kmp_bstate_t *child_bar = & child_thr -> th.th_bar[ bt ].bb;
1723 #if KMP_CACHE_MANAGE
1724  /* prefetch next thread's go count */
1725  if ( child+1 <= branch_factor && child_tid+1 < nproc )
1726  KMP_CACHE_PREFETCH( &other_threads[ child_tid+1 ] -> th.th_bar[ bt ].bb.b_go );
1727 #endif /* KMP_CACHE_MANAGE */
1728 
1729 #if KMP_BARRIER_ICV_PUSH
1730  if ( propagate_icvs ) {
1731  __kmp_init_implicit_task( team->t.t_ident,
1732  team->t.t_threads[child_tid], team, child_tid, FALSE );
1733  load_icvs(&team->t.t_implicit_task_taskdata[0].td_icvs);
1734  store_icvs(&team->t.t_implicit_task_taskdata[child_tid].td_icvs, &team->t.t_implicit_task_taskdata[0].td_icvs);
1735  sync_icvs();
1736  }
1737 #endif // KMP_BARRIER_ICV_PUSH
1738 
1739  KA_TRACE( 20, ( "__kmp_tree_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)"
1740  "go(%p): %u => %u\n",
1741  gtid, team->t.t_id, tid,
1742  __kmp_gtid_from_tid( child_tid, team ), team->t.t_id,
1743  child_tid, &child_bar -> b_go, child_bar -> b_go,
1744  child_bar -> b_go + KMP_BARRIER_STATE_BUMP ) );
1745 
1746  /* release child from barrier */
1747  __kmp_release( child_thr, &child_bar -> b_go, kmp_acquire_fence );
1748 
1749  child++;
1750  child_tid++;
1751  }
1752  while ( child <= branch_factor && child_tid < nproc );
1753  }
1754 
1755  KA_TRACE( 20, ( "__kmp_tree_barrier_release: T#%d(%d:%d) exit for barrier type %d\n",
1756  gtid, team->t.t_id, tid, bt ) );
1757 }
1758 
1759 /* The reverse versions seem to beat the forward versions overall */
1760 #define KMP_REVERSE_HYPER_BAR
1761 static void
1762 __kmp_hyper_barrier_release( enum barrier_type bt,
1763  kmp_info_t *this_thr,
1764  int gtid,
1765  int tid,
1766  int propagate_icvs
1767  USE_ITT_BUILD_ARG(void * itt_sync_obj)
1768  )
1769 {
1770  /* handle fork barrier workers who aren't part of a team yet */
1771  register kmp_team_t *team;
1772  register kmp_bstate_t *thr_bar = & this_thr -> th.th_bar[ bt ].bb;
1773  register kmp_info_t **other_threads;
1774  register kmp_uint32 num_threads;
1775  register kmp_uint32 branch_bits = __kmp_barrier_release_branch_bits[ bt ];
1776  register kmp_uint32 branch_factor = 1 << branch_bits;
1777  register kmp_uint32 child;
1778  register kmp_uint32 child_tid;
1779  register kmp_uint32 offset;
1780  register kmp_uint32 level;
1781 
1782  /* Perform a hypercube-embedded tree release for all of the threads
1783  that have been gathered. If KMP_REVERSE_HYPER_BAR is defined (default)
1784  the threads are released in the reverse order of the corresponding gather,
1785  otherwise threads are released in the same order. */
1786 
1787  if ( ! KMP_MASTER_TID( tid )) {
1788  /* worker threads */
1789  KA_TRACE( 20, ( "__kmp_hyper_barrier_release: T#%d wait go(%p) == %u\n",
1790  gtid, &thr_bar -> b_go, KMP_BARRIER_STATE_BUMP ) );
1791 
1792  /* wait for parent thread to release us */
1793  __kmp_wait_sleep( this_thr, &thr_bar -> b_go, KMP_BARRIER_STATE_BUMP, TRUE
1794  USE_ITT_BUILD_ARG( itt_sync_obj )
1795  );
1796 
1797 #if USE_ITT_BUILD && OMP_30_ENABLED && USE_ITT_NOTIFY
1798  if ( ( __itt_sync_create_ptr && itt_sync_obj == NULL ) || KMP_ITT_DEBUG ) {
1799  // we are on a fork barrier where we could not get the object reliably
1800  itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier, 0, -1 );
1801  // cancel wait on previous parallel region...
1802  __kmp_itt_task_starting( itt_sync_obj );
1803 
1804  if ( bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done) )
1805  return;
1806 
1807  itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
1808  if ( itt_sync_obj != NULL )
1809  __kmp_itt_task_finished( itt_sync_obj ); // call prepare as early as possible for "new" barrier
1810 
1811  } else
1812 #endif /* USE_ITT_BUILD && OMP_30_ENABLED && USE_ITT_NOTIFY */
1813  //
1814  // early exit for reaping threads releasing forkjoin barrier
1815  //
1816  if ( bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done) )
1817  return;
1818 
1819  //
1820  // The worker thread may now assume that the team is valid.
1821  //
1822 #if USE_ITT_BUILD && !OMP_30_ENABLED && USE_ITT_NOTIFY
1823  // libguide only code (cannot use *itt_task* routines)
1824  if ( ( __itt_sync_create_ptr && itt_sync_obj == NULL ) || KMP_ITT_DEBUG ) {
1825  // we are on a fork barrier where we could not get the object reliably
1826  itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
1827  __kmp_itt_barrier_starting( gtid, itt_sync_obj ); // no need to call releasing, but we have paired calls...
1828  }
1829 #endif /* USE_ITT_BUILD && !OMP_30_ENABLED && USE_ITT_NOTIFY */
1830  team = __kmp_threads[ gtid ]-> th.th_team;
1831  KMP_DEBUG_ASSERT( team != NULL );
1832  tid = __kmp_tid_from_gtid( gtid );
1833 
1834  TCW_4(thr_bar->b_go, KMP_INIT_BARRIER_STATE);
1835  KA_TRACE( 20, ( "__kmp_hyper_barrier_release: T#%d(%d:%d) set go(%p) = %u\n",
1836  gtid, team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE ) );
1837 
1838  KMP_MB(); /* Flush all pending memory write invalidates. */
1839 
1840  } else { /* KMP_MASTER_TID(tid) */
1841  team = __kmp_threads[ gtid ]-> th.th_team;
1842  KMP_DEBUG_ASSERT( team != NULL );
1843 
1844  KA_TRACE( 20, ( "__kmp_hyper_barrier_release: T#%d(%d:%d) master enter for barrier type %d\n",
1845  gtid, team->t.t_id, tid, bt ) );
1846  }
1847 
1848  num_threads = this_thr -> th.th_team_nproc;
1849  other_threads = team -> t.t_threads;
1850 
1851 #ifdef KMP_REVERSE_HYPER_BAR
1852  /* count up to correct level for parent */
1853  for ( level = 0, offset = 1;
1854  offset < num_threads && (((tid >> level) & (branch_factor-1)) == 0);
1855  level += branch_bits, offset <<= branch_bits );
1856 
1857  /* now go down from there */
1858  for ( level -= branch_bits, offset >>= branch_bits;
1859  offset != 0;
1860  level -= branch_bits, offset >>= branch_bits )
1861 #else
1862  /* Go down the tree, level by level */
1863  for ( level = 0, offset = 1;
1864  offset < num_threads;
1865  level += branch_bits, offset <<= branch_bits )
1866 #endif // KMP_REVERSE_HYPER_BAR
1867  {
1868 #ifdef KMP_REVERSE_HYPER_BAR
1869  /* Now go in reverse order through the children, highest to lowest.
1870  Initial setting of child is conservative here. */
1871  child = num_threads >> ((level==0)?level:level-1);
1872  for ( child = (child < branch_factor-1) ? child : branch_factor-1,
1873  child_tid = tid + (child << level);
1874  child >= 1;
1875  child--, child_tid -= (1 << level) )
1876 #else
1877  if (((tid >> level) & (branch_factor - 1)) != 0)
1878  /* No need to go any lower than this, since this is the level
1879  parent would be notified */
1880  break;
1881 
1882  /* iterate through children on this level of the tree */
1883  for ( child = 1, child_tid = tid + (1 << level);
1884  child < branch_factor && child_tid < num_threads;
1885  child++, child_tid += (1 << level) )
1886 #endif // KMP_REVERSE_HYPER_BAR
1887  {
1888  if ( child_tid >= num_threads ) continue; /* child doesn't exist so keep going */
1889  else {
1890  register kmp_info_t *child_thr = other_threads[ child_tid ];
1891  register kmp_bstate_t *child_bar = & child_thr -> th.th_bar[ bt ].bb;
1892 #if KMP_CACHE_MANAGE
1893  register kmp_uint32 next_child_tid = child_tid - (1 << level);
1894  /* prefetch next thread's go count */
1895 #ifdef KMP_REVERSE_HYPER_BAR
1896  if ( child-1 >= 1 && next_child_tid < num_threads )
1897 #else
1898  if ( child+1 < branch_factor && next_child_tid < num_threads )
1899 #endif // KMP_REVERSE_HYPER_BAR
1900  KMP_CACHE_PREFETCH( &other_threads[ next_child_tid ]->th.th_bar[ bt ].bb.b_go );
1901 #endif /* KMP_CACHE_MANAGE */
1902 
1903 #if KMP_BARRIER_ICV_PUSH
1904  if ( propagate_icvs ) {
1905  KMP_DEBUG_ASSERT( team != NULL );
1906  __kmp_init_implicit_task( team->t.t_ident,
1907  team->t.t_threads[child_tid], team, child_tid, FALSE );
1908  load_icvs(&team->t.t_implicit_task_taskdata[0].td_icvs);
1909  store_icvs(&team->t.t_implicit_task_taskdata[child_tid].td_icvs, &team->t.t_implicit_task_taskdata[0].td_icvs);
1910  sync_icvs();
1911  }
1912 #endif // KMP_BARRIER_ICV_PUSH
1913 
1914  KA_TRACE( 20, ( "__kmp_hyper_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)"
1915  "go(%p): %u => %u\n",
1916  gtid, team->t.t_id, tid,
1917  __kmp_gtid_from_tid( child_tid, team ), team->t.t_id,
1918  child_tid, &child_bar -> b_go, child_bar -> b_go,
1919  child_bar -> b_go + KMP_BARRIER_STATE_BUMP ) );
1920 
1921  /* release child from barrier */
1922  __kmp_release( child_thr, &child_bar -> b_go, kmp_acquire_fence );
1923  }
1924  }
1925  }
1926 
1927  KA_TRACE( 20, ( "__kmp_hyper_barrier_release: T#%d(%d:%d) exit for barrier type %d\n",
1928  gtid, team->t.t_id, tid, bt ) );
1929 }
1930 
1931 /*
1932  * Internal function to do a barrier.
1933  * If is_split is true, do a split barrier, otherwise, do a plain barrier
1934  * If reduce is non-NULL, do a split reduction barrier, otherwise, do a split barrier
1935  * Returns 0 if master thread, 1 if worker thread.
1936  */
1937 int
1938 __kmp_barrier( enum barrier_type bt, int gtid, int is_split,
1939  size_t reduce_size, void *reduce_data, void (*reduce)(void *, void *) )
1940 {
1941  register int tid = __kmp_tid_from_gtid( gtid );
1942  register kmp_info_t *this_thr = __kmp_threads[ gtid ];
1943  register kmp_team_t *team = this_thr -> th.th_team;
1944  register int status = 0;
1945 
1946  ident_t * tmp_loc = __kmp_threads[ gtid ]->th.th_ident;
1947 
1948  KA_TRACE( 15, ( "__kmp_barrier: T#%d(%d:%d) has arrived\n",
1949  gtid, __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid(gtid) ) );
1950 
1951  if ( ! team->t.t_serialized ) {
1952 #if USE_ITT_BUILD
1953  // This value will be used in itt notify events below.
1954  void * itt_sync_obj = NULL;
1955  #if USE_ITT_NOTIFY
1956  if ( __itt_sync_create_ptr || KMP_ITT_DEBUG )
1957  itt_sync_obj = __kmp_itt_barrier_object( gtid, bt, 1 );
1958  #endif
1959 #endif /* USE_ITT_BUILD */
1960  #if OMP_30_ENABLED
1961  if ( __kmp_tasking_mode == tskm_extra_barrier ) {
1962  __kmp_tasking_barrier( team, this_thr, gtid );
1963  KA_TRACE( 15, ( "__kmp_barrier: T#%d(%d:%d) past tasking barrier\n",
1964  gtid, __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid(gtid) ) );
1965  }
1966  #endif /* OMP_30_ENABLED */
1967 
1968  //
1969  // Copy the blocktime info to the thread, where __kmp_wait_sleep()
1970  // can access it when the team struct is not guaranteed to exist.
1971  //
1972  // See the note about the corresponding code in __kmp_join_barrier()
1973  // being performance-critical.
1974  //
1975  if ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) {
1976  #if OMP_30_ENABLED
1977  this_thr -> th.th_team_bt_intervals = team -> t.t_implicit_task_taskdata[tid].td_icvs.bt_intervals;
1978  this_thr -> th.th_team_bt_set = team -> t.t_implicit_task_taskdata[tid].td_icvs.bt_set;
1979  #else
1980  this_thr -> th.th_team_bt_intervals = team -> t.t_set_bt_intervals[tid];
1981  this_thr -> th.th_team_bt_set= team -> t.t_set_bt_set[tid];
1982  #endif // OMP_30_ENABLED
1983  }
1984 
1985 #if USE_ITT_BUILD
1986  if ( __itt_sync_create_ptr || KMP_ITT_DEBUG )
1987  __kmp_itt_barrier_starting( gtid, itt_sync_obj );
1988 #endif /* USE_ITT_BUILD */
1989 
1990  if ( reduce != NULL ) {
1991  //KMP_DEBUG_ASSERT( is_split == TRUE ); // #C69956
1992  this_thr -> th.th_local.reduce_data = reduce_data;
1993  }
1994  if ( __kmp_barrier_gather_pattern[ bt ] == bp_linear_bar || __kmp_barrier_gather_branch_bits[ bt ] == 0 ) {
1995  __kmp_linear_barrier_gather( bt, this_thr, gtid, tid, reduce
1996  USE_ITT_BUILD_ARG( itt_sync_obj )
1997  );
1998  } else if ( __kmp_barrier_gather_pattern[ bt ] == bp_tree_bar ) {
1999  __kmp_tree_barrier_gather( bt, this_thr, gtid, tid, reduce
2000  USE_ITT_BUILD_ARG( itt_sync_obj )
2001  );
2002  } else {
2003  __kmp_hyper_barrier_gather( bt, this_thr, gtid, tid, reduce
2004  USE_ITT_BUILD_ARG( itt_sync_obj )
2005  );
2006  }; // if
2007 
2008 #if USE_ITT_BUILD
2009  // TODO: In case of split reduction barrier, master thread may send aquired event early,
2010  // before the final summation into the shared variable is done (final summation can be a
2011  // long operation for array reductions).
2012  if ( __itt_sync_create_ptr || KMP_ITT_DEBUG )
2013  __kmp_itt_barrier_middle( gtid, itt_sync_obj );
2014 #endif /* USE_ITT_BUILD */
2015 
2016  KMP_MB();
2017 
2018  if ( KMP_MASTER_TID( tid ) ) {
2019  status = 0;
2020 
2021  #if OMP_30_ENABLED
2022  if ( __kmp_tasking_mode != tskm_immediate_exec ) {
2023  __kmp_task_team_wait( this_thr, team
2024  USE_ITT_BUILD_ARG( itt_sync_obj )
2025  );
2026  __kmp_task_team_setup( this_thr, team );
2027  }
2028  #endif /* OMP_30_ENABLED */
2029 
2030 
2031 #if USE_ITT_BUILD && USE_ITT_NOTIFY
2032  // Barrier - report frame end
2033  if( __itt_frame_submit_v3_ptr && __kmp_forkjoin_frames_mode ) {
2034  kmp_uint64 tmp = __itt_get_timestamp();
2035  switch( __kmp_forkjoin_frames_mode ) {
2036  case 1:
2037  __kmp_itt_frame_submit( gtid, this_thr->th.th_frame_time, tmp, 0, tmp_loc );
2038  this_thr->th.th_frame_time = tmp;
2039  break;
2040  case 2:
2041  __kmp_itt_frame_submit( gtid, this_thr->th.th_bar_arrive_time, tmp, 1, tmp_loc );
2042  break;
2043  case 3:
2044  __kmp_itt_frame_submit( gtid, this_thr->th.th_frame_time, tmp, 0, tmp_loc );
2045  __kmp_itt_frame_submit( gtid, this_thr->th.th_bar_arrive_time, tmp, 1, tmp_loc );
2046  this_thr->th.th_frame_time = tmp;
2047  break;
2048  }
2049  }
2050 #endif /* USE_ITT_BUILD */
2051  } else {
2052  status = 1;
2053  }
2054  if ( status == 1 || ! is_split ) {
2055  if ( __kmp_barrier_release_pattern[ bt ] == bp_linear_bar || __kmp_barrier_release_branch_bits[ bt ] == 0 ) {
2056  __kmp_linear_barrier_release( bt, this_thr, gtid, tid, FALSE
2057  USE_ITT_BUILD_ARG( itt_sync_obj )
2058  );
2059  } else if ( __kmp_barrier_release_pattern[ bt ] == bp_tree_bar ) {
2060  __kmp_tree_barrier_release( bt, this_thr, gtid, tid, FALSE
2061  USE_ITT_BUILD_ARG( itt_sync_obj )
2062  );
2063  } else {
2064  __kmp_hyper_barrier_release( bt, this_thr, gtid, tid, FALSE
2065  USE_ITT_BUILD_ARG( itt_sync_obj )
2066  );
2067  }
2068  #if OMP_30_ENABLED
2069  if ( __kmp_tasking_mode != tskm_immediate_exec ) {
2070  __kmp_task_team_sync( this_thr, team );
2071  }
2072  #endif /* OMP_30_ENABLED */
2073  }
2074 
2075 #if USE_ITT_BUILD
2076  // GEH: TODO: Move this under if-condition above and also include in __kmp_end_split_barrier().
2077  // This will more accurately represent the actual release time of the threads for split barriers.
2078  if ( __itt_sync_create_ptr || KMP_ITT_DEBUG )
2079  __kmp_itt_barrier_finished( gtid, itt_sync_obj );
2080 #endif /* USE_ITT_BUILD */
2081 
2082  } else { // Team is serialized.
2083 
2084  status = 0;
2085 
2086  #if OMP_30_ENABLED
2087  if ( __kmp_tasking_mode != tskm_immediate_exec ) {
2088  //
2089  // The task team should be NULL for serialized code.
2090  // (tasks will be executed immediately).
2091  //
2092  KMP_DEBUG_ASSERT( team->t.t_task_team == NULL );
2093  KMP_DEBUG_ASSERT( this_thr->th.th_task_team == NULL );
2094  }
2095  #endif /* OMP_30_ENABLED */
2096  }
2097 
2098  KA_TRACE( 15, ( "__kmp_barrier: T#%d(%d:%d) is leaving with return value %d\n",
2099  gtid, __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid(gtid),
2100  status ) );
2101  return status;
2102 }
2103 
2104 
2105 void
2106 __kmp_end_split_barrier( enum barrier_type bt, int gtid )
2107 {
2108  int tid = __kmp_tid_from_gtid( gtid );
2109  kmp_info_t *this_thr = __kmp_threads[ gtid ];
2110  kmp_team_t *team = this_thr -> th.th_team;
2111 
2112  if( ! team -> t.t_serialized ) {
2113  if( KMP_MASTER_GTID( gtid ) ) {
2114  if ( __kmp_barrier_release_pattern[ bt ] == bp_linear_bar || __kmp_barrier_release_branch_bits[ bt ] == 0 ) {
2115  __kmp_linear_barrier_release( bt, this_thr, gtid, tid, FALSE
2116 #if USE_ITT_BUILD
2117  , NULL
2118 #endif /* USE_ITT_BUILD */
2119  );
2120  } else if ( __kmp_barrier_release_pattern[ bt ] == bp_tree_bar ) {
2121  __kmp_tree_barrier_release( bt, this_thr, gtid, tid, FALSE
2122 #if USE_ITT_BUILD
2123  , NULL
2124 #endif /* USE_ITT_BUILD */
2125  );
2126  } else {
2127  __kmp_hyper_barrier_release( bt, this_thr, gtid, tid, FALSE
2128 #if USE_ITT_BUILD
2129  , NULL
2130 #endif /* USE_ITT_BUILD */
2131  );
2132  }; // if
2133  #if OMP_30_ENABLED
2134  if ( __kmp_tasking_mode != tskm_immediate_exec ) {
2135  __kmp_task_team_sync( this_thr, team );
2136  }; // if
2137  #endif /* OMP_30_ENABLED */
2138  }
2139  }
2140 }
2141 
2142 /* ------------------------------------------------------------------------ */
2143 /* ------------------------------------------------------------------------ */
2144 
2145 /*
2146  * determine if we can go parallel or must use a serialized parallel region and
2147  * how many threads we can use
2148  * set_nproc is the number of threads requested for the team
2149  * returns 0 if we should serialize or only use one thread,
2150  * otherwise the number of threads to use
2151  * The forkjoin lock is held by the caller.
2152  */
2153 static int
2154 __kmp_reserve_threads( kmp_root_t *root, kmp_team_t *parent_team,
2155  int master_tid, int set_nthreads
2156 #if OMP_40_ENABLED
2157  , int enter_teams
2158 #endif /* OMP_40_ENABLED */
2159 )
2160 {
2161  int capacity;
2162  int new_nthreads;
2163  int use_rml_to_adjust_nth;
2164  KMP_DEBUG_ASSERT( __kmp_init_serial );
2165  KMP_DEBUG_ASSERT( root && parent_team );
2166 
2167  //
2168  // Initial check to see if we should use a serialized team.
2169  //
2170  if ( set_nthreads == 1 ) {
2171  KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d reserving 1 thread; requested %d threads\n",
2172  __kmp_get_gtid(), set_nthreads ));
2173  return 1;
2174  }
2175  if ( ( !get__nested_2(parent_team,master_tid) && (root->r.r_in_parallel
2176 #if OMP_40_ENABLED
2177  && !enter_teams
2178 #endif /* OMP_40_ENABLED */
2179  ) ) || ( __kmp_library == library_serial ) ) {
2180  KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d serializing team; requested %d threads\n",
2181  __kmp_get_gtid(), set_nthreads ));
2182  return 1;
2183  }
2184 
2185  //
2186  // If dyn-var is set, dynamically adjust the number of desired threads,
2187  // according to the method specified by dynamic_mode.
2188  //
2189  new_nthreads = set_nthreads;
2190  use_rml_to_adjust_nth = FALSE;
2191  if ( ! get__dynamic_2( parent_team, master_tid ) ) {
2192  ;
2193  }
2194 #ifdef USE_LOAD_BALANCE
2195  else if ( __kmp_global.g.g_dynamic_mode == dynamic_load_balance ) {
2196  new_nthreads = __kmp_load_balance_nproc( root, set_nthreads );
2197  if ( new_nthreads == 1 ) {
2198  KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d load balance reduced reservation to 1 thread\n",
2199  master_tid ));
2200  return 1;
2201  }
2202  if ( new_nthreads < set_nthreads ) {
2203  KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d load balance reduced reservation to %d threads\n",
2204  master_tid, new_nthreads ));
2205  }
2206  }
2207 #endif /* USE_LOAD_BALANCE */
2208  else if ( __kmp_global.g.g_dynamic_mode == dynamic_thread_limit ) {
2209  new_nthreads = __kmp_avail_proc - __kmp_nth + (root->r.r_active ? 1
2210  : root->r.r_hot_team->t.t_nproc);
2211  if ( new_nthreads <= 1 ) {
2212  KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d thread limit reduced reservation to 1 thread\n",
2213  master_tid ));
2214  return 1;
2215  }
2216  if ( new_nthreads < set_nthreads ) {
2217  KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d thread limit reduced reservation to %d threads\n",
2218  master_tid, new_nthreads ));
2219  }
2220  else {
2221  new_nthreads = set_nthreads;
2222  }
2223  }
2224  else if ( __kmp_global.g.g_dynamic_mode == dynamic_random ) {
2225  if ( set_nthreads > 2 ) {
2226  new_nthreads = __kmp_get_random( parent_team->t.t_threads[master_tid] );
2227  new_nthreads = ( new_nthreads % set_nthreads ) + 1;
2228  if ( new_nthreads == 1 ) {
2229  KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d dynamic random reduced reservation to 1 thread\n",
2230  master_tid ));
2231  return 1;
2232  }
2233  if ( new_nthreads < set_nthreads ) {
2234  KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d dynamic random reduced reservation to %d threads\n",
2235  master_tid, new_nthreads ));
2236  }
2237  }
2238  }
2239  else {
2240  KMP_ASSERT( 0 );
2241  }
2242 
2243  //
2244  // Respect KMP_ALL_THREADS, KMP_MAX_THREADS, OMP_THREAD_LIMIT.
2245  //
2246  if ( __kmp_nth + new_nthreads - ( root->r.r_active ? 1 :
2247  root->r.r_hot_team->t.t_nproc ) > __kmp_max_nth ) {
2248  int tl_nthreads = __kmp_max_nth - __kmp_nth + ( root->r.r_active ? 1 :
2249  root->r.r_hot_team->t.t_nproc );
2250  if ( tl_nthreads <= 0 ) {
2251  tl_nthreads = 1;
2252  }
2253 
2254  //
2255  // If dyn-var is false, emit a 1-time warning.
2256  //
2257  if ( ! get__dynamic_2( parent_team, master_tid )
2258  && ( ! __kmp_reserve_warn ) ) {
2259  __kmp_reserve_warn = 1;
2260  __kmp_msg(
2261  kmp_ms_warning,
2262  KMP_MSG( CantFormThrTeam, set_nthreads, tl_nthreads ),
2263  KMP_HNT( Unset_ALL_THREADS ),
2264  __kmp_msg_null
2265  );
2266  }
2267  if ( tl_nthreads == 1 ) {
2268  KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d KMP_ALL_THREADS reduced reservation to 1 thread\n",
2269  master_tid ));
2270  return 1;
2271  }
2272  KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d KMP_ALL_THREADS reduced reservation to %d threads\n",
2273  master_tid, tl_nthreads ));
2274  new_nthreads = tl_nthreads;
2275  }
2276 
2277 
2278  //
2279  // Check if the threads array is large enough, or needs expanding.
2280  //
2281  // See comment in __kmp_register_root() about the adjustment if
2282  // __kmp_threads[0] == NULL.
2283  //
2284  capacity = __kmp_threads_capacity;
2285  if ( TCR_PTR(__kmp_threads[0]) == NULL ) {
2286  --capacity;
2287  }
2288  if ( __kmp_nth + new_nthreads - ( root->r.r_active ? 1 :
2289  root->r.r_hot_team->t.t_nproc ) > capacity ) {
2290  //
2291  // Expand the threads array.
2292  //
2293  int slotsRequired = __kmp_nth + new_nthreads - ( root->r.r_active ? 1 :
2294  root->r.r_hot_team->t.t_nproc ) - capacity;
2295  int slotsAdded = __kmp_expand_threads(slotsRequired, slotsRequired);
2296  if ( slotsAdded < slotsRequired ) {
2297  //
2298  // The threads array was not expanded enough.
2299  //
2300  new_nthreads -= ( slotsRequired - slotsAdded );
2301  KMP_ASSERT( new_nthreads >= 1 );
2302 
2303  //
2304  // If dyn-var is false, emit a 1-time warning.
2305  //
2306  if ( ! get__dynamic_2( parent_team, master_tid )
2307  && ( ! __kmp_reserve_warn ) ) {
2308  __kmp_reserve_warn = 1;
2309  if ( __kmp_tp_cached ) {
2310  __kmp_msg(
2311  kmp_ms_warning,
2312  KMP_MSG( CantFormThrTeam, set_nthreads, new_nthreads ),
2313  KMP_HNT( Set_ALL_THREADPRIVATE, __kmp_tp_capacity ),
2314  KMP_HNT( PossibleSystemLimitOnThreads ),
2315  __kmp_msg_null
2316  );
2317  }
2318  else {
2319  __kmp_msg(
2320  kmp_ms_warning,
2321  KMP_MSG( CantFormThrTeam, set_nthreads, new_nthreads ),
2322  KMP_HNT( SystemLimitOnThreads ),
2323  __kmp_msg_null
2324  );
2325  }
2326  }
2327  }
2328  }
2329 
2330  if ( new_nthreads == 1 ) {
2331  KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d serializing team after reclaiming dead roots and rechecking; requested %d threads\n",
2332  __kmp_get_gtid(), set_nthreads ) );
2333  return 1;
2334  }
2335 
2336  KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d allocating %d threads; requested %d threads\n",
2337  __kmp_get_gtid(), new_nthreads, set_nthreads ));
2338  return new_nthreads;
2339 }
2340 
2341 /* ------------------------------------------------------------------------ */
2342 /* ------------------------------------------------------------------------ */
2343 
2344 /* allocate threads from the thread pool and assign them to the new team */
2345 /* we are assured that there are enough threads available, because we
2346  * checked on that earlier within critical section forkjoin */
2347 
2348 static void
2349 __kmp_fork_team_threads( kmp_root_t *root, kmp_team_t *team,
2350  kmp_info_t *master_th, int master_gtid )
2351 {
2352  int i;
2353 
2354  KA_TRACE( 10, ("__kmp_fork_team_threads: new_nprocs = %d\n", team->t.t_nproc ) );
2355  KMP_DEBUG_ASSERT( master_gtid == __kmp_get_gtid() );
2356  KMP_MB();
2357 
2358  /* first, let's setup the master thread */
2359  master_th -> th.th_info.ds.ds_tid = 0;
2360  master_th -> th.th_team = team;
2361  master_th -> th.th_team_nproc = team -> t.t_nproc;
2362  master_th -> th.th_team_master = master_th;
2363  master_th -> th.th_team_serialized = FALSE;
2364  master_th -> th.th_dispatch = & team -> t.t_dispatch[ 0 ];
2365 
2366  /* make sure we are not the optimized hot team */
2367  if ( team != root->r.r_hot_team ) {
2368 
2369  /* install the master thread */
2370  team -> t.t_threads[ 0 ] = master_th;
2371  __kmp_initialize_info( master_th, team, 0, master_gtid );
2372 
2373  /* now, install the worker threads */
2374  for ( i=1 ; i < team->t.t_nproc ; i++ ) {
2375 
2376  /* fork or reallocate a new thread and install it in team */
2377  team -> t.t_threads[ i ] = __kmp_allocate_thread( root, team, i );
2378  KMP_DEBUG_ASSERT( team->t.t_threads[i] );
2379  KMP_DEBUG_ASSERT( team->t.t_threads[i]->th.th_team == team );
2380  /* align team and thread arrived states */
2381  KA_TRACE( 20, ("__kmp_fork_team_threads: T#%d(%d:%d) init arrived T#%d(%d:%d) join =%u, plain=%u\n",
2382  __kmp_gtid_from_tid( 0, team ), team->t.t_id, 0,
2383  __kmp_gtid_from_tid( i, team ), team->t.t_id, i,
2384  team->t.t_bar[ bs_forkjoin_barrier ].b_arrived,
2385  team->t.t_bar[ bs_plain_barrier ].b_arrived ) );
2386 
2387  { // Initialize threads' barrier data.
2388  int b;
2389  kmp_balign_t * balign = team->t.t_threads[ i ]->th.th_bar;
2390  for ( b = 0; b < bs_last_barrier; ++ b ) {
2391  balign[ b ].bb.b_arrived = team->t.t_bar[ b ].b_arrived;
2392  }; // for b
2393  }
2394  }
2395 
2396 #if OMP_40_ENABLED && (KMP_OS_WINDOWS || KMP_OS_LINUX)
2397  __kmp_partition_places( team );
2398 #endif
2399 
2400  }
2401 
2402  KMP_MB();
2403 }
2404 
2405 static void
2406 __kmp_alloc_argv_entries( int argc, kmp_team_t *team, int realloc ); // forward declaration
2407 
2408 static void
2409 __kmp_setup_icv_copy( kmp_team_t *team, int new_nproc,
2410 #if OMP_30_ENABLED
2411  kmp_internal_control_t * new_icvs,
2412  ident_t * loc
2413 #else
2414  int new_set_nproc, int new_set_dynamic, int new_set_nested,
2415  int new_set_blocktime, int new_bt_intervals, int new_bt_set
2416 #endif // OMP_30_ENABLED
2417  ); // forward declaration
2418 
2419 /* most of the work for a fork */
2420 /* return true if we really went parallel, false if serialized */
2421 int
2422 __kmp_fork_call(
2423  ident_t * loc,
2424  int gtid,
2425  int exec_master, // 0 - GNU native code, master doesn't invoke microtask
2426  // 1 - Intel code, master invokes microtask
2427  // 2 - MS native code, use special invoker
2428  kmp_int32 argc,
2429  microtask_t microtask,
2430  launch_t invoker,
2431 /* TODO: revert workaround for Intel(R) 64 tracker #96 */
2432 #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM) && KMP_OS_LINUX
2433  va_list * ap
2434 #else
2435  va_list ap
2436 #endif
2437  )
2438 {
2439  void **argv;
2440  int i;
2441  int master_tid;
2442  int master_this_cons;
2443  int master_last_cons;
2444  kmp_team_t *team;
2445  kmp_team_t *parent_team;
2446  kmp_info_t *master_th;
2447  kmp_root_t *root;
2448  int nthreads;
2449  int master_active;
2450  int master_set_numthreads;
2451  int level;
2452 #if OMP_40_ENABLED
2453  int teams_level;
2454 #endif
2455 
2456  KA_TRACE( 20, ("__kmp_fork_call: enter T#%d\n", gtid ));
2457 
2458  /* initialize if needed */
2459  KMP_DEBUG_ASSERT( __kmp_init_serial );
2460  if( ! TCR_4(__kmp_init_parallel) )
2461  __kmp_parallel_initialize();
2462 
2463  /* setup current data */
2464  master_th = __kmp_threads[ gtid ];
2465  parent_team = master_th -> th.th_team;
2466  master_tid = master_th -> th.th_info.ds.ds_tid;
2467  master_this_cons = master_th -> th.th_local.this_construct;
2468  master_last_cons = master_th -> th.th_local.last_construct;
2469  root = master_th -> th.th_root;
2470  master_active = root -> r.r_active;
2471  master_set_numthreads = master_th -> th.th_set_nproc;
2472 #if OMP_30_ENABLED
2473  // Nested level will be an index in the nested nthreads array
2474  level = parent_team->t.t_level;
2475 #endif // OMP_30_ENABLED
2476 #if OMP_40_ENABLED
2477  teams_level = master_th->th.th_teams_level; // needed to check nesting inside the teams
2478 #endif
2479 
2480 
2481  master_th->th.th_ident = loc;
2482 
2483 #if OMP_40_ENABLED
2484  if ( master_th->th.th_team_microtask &&
2485  ap && microtask != (microtask_t)__kmp_teams_master && level == teams_level ) {
2486  // AC: This is start of parallel that is nested inside teams construct.
2487  // The team is actual (hot), all workers are ready at the fork barrier.
2488  // No lock needed to initialize the team a bit, then free workers.
2489  parent_team->t.t_ident = loc;
2490  parent_team->t.t_argc = argc;
2491  argv = (void**)parent_team->t.t_argv;
2492  for( i=argc-1; i >= 0; --i )
2493 /* TODO: revert workaround for Intel(R) 64 tracker #96 */
2494 #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM) && KMP_OS_LINUX
2495  *argv++ = va_arg( *ap, void * );
2496 #else
2497  *argv++ = va_arg( ap, void * );
2498 #endif
2499  /* Increment our nested depth levels, but not increase the serialization */
2500  if ( parent_team == master_th->th.th_serial_team ) {
2501  // AC: we are in serialized parallel
2502  __kmpc_serialized_parallel(loc, gtid);
2503  KMP_DEBUG_ASSERT( parent_team->t.t_serialized > 1 );
2504  parent_team->t.t_serialized--; // AC: need this in order enquiry functions
2505  // work correctly, will restore at join time
2506  __kmp_invoke_microtask( microtask, gtid, 0, argc, parent_team->t.t_argv );
2507  return TRUE;
2508  }
2509  parent_team->t.t_pkfn = microtask;
2510  parent_team->t.t_invoke = invoker;
2511  KMP_TEST_THEN_INC32( (kmp_int32*) &root->r.r_in_parallel );
2512  parent_team->t.t_active_level ++;
2513  parent_team->t.t_level ++;
2514 
2515  /* Change number of threads in the team if requested */
2516  if ( master_set_numthreads ) { // The parallel has num_threads clause
2517  if ( master_set_numthreads < master_th->th.th_set_nth_teams ) {
2518  // AC: only can reduce the number of threads dynamically, cannot increase
2519  kmp_info_t **other_threads = parent_team->t.t_threads;
2520  parent_team->t.t_nproc = master_set_numthreads;
2521  for ( i = 0; i < master_set_numthreads; ++i ) {
2522  other_threads[i]->th.th_team_nproc = master_set_numthreads;
2523  }
2524  // Keep extra threads hot in the team for possible next parallels
2525  }
2526  master_th->th.th_set_nproc = 0;
2527  }
2528 
2529 
2530  KF_TRACE( 10, ( "__kmp_fork_call: before internal fork: root=%p, team=%p, master_th=%p, gtid=%d\n", root, parent_team, master_th, gtid ) );
2531  __kmp_internal_fork( loc, gtid, parent_team );
2532  KF_TRACE( 10, ( "__kmp_fork_call: after internal fork: root=%p, team=%p, master_th=%p, gtid=%d\n", root, parent_team, master_th, gtid ) );
2533 
2534  /* Invoke microtask for MASTER thread */
2535  KA_TRACE( 20, ("__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n",
2536  gtid, parent_team->t.t_id, parent_team->t.t_pkfn ) );
2537 
2538  if (! parent_team->t.t_invoke( gtid )) {
2539  KMP_ASSERT2( 0, "cannot invoke microtask for MASTER thread" );
2540  }
2541  KA_TRACE( 20, ("__kmp_fork_call: T#%d(%d:0) done microtask = %p\n",
2542  gtid, parent_team->t.t_id, parent_team->t.t_pkfn ) );
2543  KMP_MB(); /* Flush all pending memory write invalidates. */
2544 
2545  KA_TRACE( 20, ("__kmp_fork_call: parallel exit T#%d\n", gtid ));
2546 
2547  return TRUE;
2548  }
2549 #endif /* OMP_40_ENABLED */
2550 
2551 #if OMP_30_ENABLED && KMP_DEBUG
2552  if ( __kmp_tasking_mode != tskm_immediate_exec ) {
2553  KMP_DEBUG_ASSERT( master_th->th.th_task_team == parent_team->t.t_task_team );
2554  }
2555 #endif // OMP_30_ENABLED
2556 
2557  /* determine how many new threads we can use */
2558  __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
2559 
2560 #if OMP_30_ENABLED
2561  if ( parent_team->t.t_active_level >= master_th->th.th_current_task->td_icvs.max_active_levels ) {
2562  nthreads = 1;
2563  }
2564  else
2565 #endif // OMP_30_ENABLED
2566 
2567  {
2568  nthreads = master_set_numthreads ?
2569  master_set_numthreads : get__nproc_2( parent_team, master_tid );
2570  nthreads = __kmp_reserve_threads( root, parent_team, master_tid, nthreads
2571 #if OMP_40_ENABLED
2572  // AC: If we execute teams from parallel region (on host), then teams
2573  // should be created but each can only have 1 thread if nesting is disabled.
2574  // If teams called from serial region, then teams and their threads
2575  // should be created regardless of the nesting setting.
2576  ,( ( ap == NULL && teams_level == 0 ) ||
2577  ( ap && teams_level > 0 && teams_level == level ) )
2578 #endif /* OMP_40_ENABLED */
2579  );
2580  }
2581  KMP_DEBUG_ASSERT( nthreads > 0 );
2582 
2583  /* If we temporarily changed the set number of threads then restore it now */
2584  master_th -> th.th_set_nproc = 0;
2585 
2586 
2587  /* create a serialized parallel region? */
2588  if ( nthreads == 1 ) {
2589  /* josh todo: hypothetical question: what do we do for OS X*? */
2590 #if KMP_OS_LINUX && ( KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM )
2591  void * args[ argc ];
2592 #else
2593  void * * args = (void**) alloca( argc * sizeof( void * ) );
2594 #endif /* KMP_OS_LINUX && ( KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM ) */
2595 
2596  __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
2597  KA_TRACE( 20, ("__kmp_fork_call: T#%d serializing parallel region\n", gtid ));
2598 
2599  __kmpc_serialized_parallel(loc, gtid);
2600 
2601  if ( exec_master == 0 ) {
2602  // we were called from GNU native code
2603  KA_TRACE( 20, ("__kmp_fork_call: T#%d serial exit\n", gtid ));
2604  return FALSE;
2605  } else if ( exec_master == 1 ) {
2606  /* TODO this sucks, use the compiler itself to pass args! :) */
2607  master_th -> th.th_serial_team -> t.t_ident = loc;
2608 #if OMP_40_ENABLED
2609  if ( !ap ) {
2610  // revert change made in __kmpc_serialized_parallel()
2611  master_th -> th.th_serial_team -> t.t_level--;
2612  // Get args from parent team for teams construct
2613  __kmp_invoke_microtask( microtask, gtid, 0, argc, parent_team->t.t_argv );
2614  } else if ( microtask == (microtask_t)__kmp_teams_master ) {
2615  KMP_DEBUG_ASSERT( master_th->th.th_team == master_th->th.th_serial_team );
2616  team = master_th->th.th_team;
2617  //team->t.t_pkfn = microtask;
2618  team->t.t_invoke = invoker;
2619  __kmp_alloc_argv_entries( argc, team, TRUE );
2620  team->t.t_argc = argc;
2621  argv = (void**) team->t.t_argv;
2622  if ( ap ) {
2623  for( i=argc-1; i >= 0; --i )
2624  /* TODO: revert workaround for Intel(R) 64 tracker #96 */
2625  #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM) && KMP_OS_LINUX
2626  *argv++ = va_arg( *ap, void * );
2627  #else
2628  *argv++ = va_arg( ap, void * );
2629  #endif
2630  } else {
2631  for( i=0; i < argc; ++i )
2632  // Get args from parent team for teams construct
2633  argv[i] = parent_team->t.t_argv[i];
2634  }
2635  // AC: revert change made in __kmpc_serialized_parallel()
2636  // because initial code in teams should have level=0
2637  team->t.t_level--;
2638  // AC: call special invoker for outer "parallel" of the teams construct
2639  invoker(gtid);
2640  } else {
2641 #endif /* OMP_40_ENABLED */
2642  argv = args;
2643  for( i=argc-1; i >= 0; --i )
2644  /* TODO: revert workaround for Intel(R) 64 tracker #96 */
2645  #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM) && KMP_OS_LINUX
2646  *argv++ = va_arg( *ap, void * );
2647  #else
2648  *argv++ = va_arg( ap, void * );
2649  #endif
2650  KMP_MB();
2651  __kmp_invoke_microtask( microtask, gtid, 0, argc, args );
2652 #if OMP_40_ENABLED
2653  }
2654 #endif /* OMP_40_ENABLED */
2655  }
2656  else {
2657  KMP_ASSERT2( exec_master <= 1, "__kmp_fork_call: unknown parameter exec_master" );
2658  }
2659 
2660  KA_TRACE( 20, ("__kmp_fork_call: T#%d serial exit\n", gtid ));
2661 
2662  KMP_MB();
2663  return FALSE;
2664  }
2665 
2666 #if OMP_30_ENABLED
2667  // GEH: only modify the executing flag in the case when not serialized
2668  // serialized case is handled in kmpc_serialized_parallel
2669  KF_TRACE( 10, ( "__kmp_fork_call: parent_team_aclevel=%d, master_th=%p, curtask=%p, curtask_max_aclevel=%d\n",
2670  parent_team->t.t_active_level, master_th, master_th->th.th_current_task,
2671  master_th->th.th_current_task->td_icvs.max_active_levels ) );
2672  // TODO: GEH - cannot do this assertion because root thread not set up as executing
2673  // KMP_ASSERT( master_th->th.th_current_task->td_flags.executing == 1 );
2674  master_th->th.th_current_task->td_flags.executing = 0;
2675 #endif
2676 
2677 #if OMP_40_ENABLED
2678  if ( !master_th->th.th_team_microtask || level > teams_level )
2679 #endif /* OMP_40_ENABLED */
2680  {
2681  /* Increment our nested depth level */
2682  KMP_TEST_THEN_INC32( (kmp_int32*) &root->r.r_in_parallel );
2683  }
2684 
2685 #if OMP_30_ENABLED
2686  //
2687  // See if we need to make a copy of the ICVs.
2688  //
2689  int nthreads_icv = master_th->th.th_current_task->td_icvs.nproc;
2690  if ( ( level + 1 < __kmp_nested_nth.used ) &&
2691  ( __kmp_nested_nth.nth[level + 1] != nthreads_icv ) ) {
2692  nthreads_icv = __kmp_nested_nth.nth[level + 1];
2693  }
2694  else {
2695  nthreads_icv = 0; // don't update
2696  }
2697 
2698 #if OMP_40_ENABLED
2699  //
2700  // Figure out the proc_bind_policy for the new team.
2701  //
2702  kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind;
2703  kmp_proc_bind_t proc_bind_icv; // proc_bind_default means don't update
2704 
2705  if ( master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false ) {
2706  proc_bind = proc_bind_false;
2707  proc_bind_icv = proc_bind_default;
2708  }
2709  else {
2710  proc_bind_icv = master_th->th.th_current_task->td_icvs.proc_bind;
2711  if ( proc_bind == proc_bind_default ) {
2712  //
2713  // No proc_bind clause was specified, so use the current value
2714  // of proc-bind-var for this parallel region.
2715  //
2716  proc_bind = proc_bind_icv;
2717  }
2718  else {
2719  //
2720  // The proc_bind policy was specified explicitly on the parallel
2721  // clause. This overrides the proc-bind-var for this parallel
2722  // region, but does not change proc-bind-var.
2723  //
2724  }
2725 
2726  //
2727  // Figure the value of proc-bind-var for the child threads.
2728  //
2729  if ( ( level + 1 < __kmp_nested_proc_bind.used )
2730  && ( __kmp_nested_proc_bind.bind_types[level + 1] != proc_bind_icv ) ) {
2731  proc_bind_icv = __kmp_nested_proc_bind.bind_types[level + 1];
2732  }
2733  else {
2734  proc_bind_icv = proc_bind_default;
2735  }
2736  }
2737 
2738  //
2739  // Reset for next parallel region
2740  //
2741  master_th->th.th_set_proc_bind = proc_bind_default;
2742 #endif /* OMP_40_ENABLED */
2743 
2744  if ( ( nthreads_icv > 0 )
2745 #if OMP_40_ENABLED
2746  || ( proc_bind_icv != proc_bind_default )
2747 #endif /* OMP_40_ENABLED */
2748  )
2749  {
2750  kmp_internal_control_t new_icvs;
2751  copy_icvs( & new_icvs, & master_th->th.th_current_task->td_icvs );
2752  new_icvs.next = NULL;
2753 
2754  if ( nthreads_icv > 0 ) {
2755  new_icvs.nproc = nthreads_icv;
2756  }
2757 
2758 #if OMP_40_ENABLED
2759  if ( proc_bind_icv != proc_bind_default ) {
2760  new_icvs.proc_bind = proc_bind_icv;
2761  }
2762 #endif /* OMP_40_ENABLED */
2763 
2764  /* allocate a new parallel team */
2765  KF_TRACE( 10, ( "__kmp_fork_call: before __kmp_allocate_team\n" ) );
2766  team = __kmp_allocate_team(root, nthreads, nthreads,
2767 #if OMP_40_ENABLED
2768  proc_bind,
2769 #endif
2770  &new_icvs, argc );
2771  } else
2772 #endif /* OMP_30_ENABLED */
2773  {
2774  /* allocate a new parallel team */
2775  KF_TRACE( 10, ( "__kmp_fork_call: before __kmp_allocate_team\n" ) );
2776  team = __kmp_allocate_team(root, nthreads, nthreads,
2777 #if OMP_40_ENABLED
2778  proc_bind,
2779 #endif
2780 #if OMP_30_ENABLED
2781  &master_th->th.th_current_task->td_icvs,
2782 #else
2783  parent_team->t.t_set_nproc[master_tid],
2784  parent_team->t.t_set_dynamic[master_tid],
2785  parent_team->t.t_set_nested[master_tid],
2786  parent_team->t.t_set_blocktime[master_tid],
2787  parent_team->t.t_set_bt_intervals[master_tid],
2788  parent_team->t.t_set_bt_set[master_tid],
2789 #endif // OMP_30_ENABLED
2790  argc );
2791  }
2792 
2793  KF_TRACE( 10, ( "__kmp_fork_call: after __kmp_allocate_team - team = %p\n",
2794  team ) );
2795 
2796  /* setup the new team */
2797  team->t.t_master_tid = master_tid;
2798  team->t.t_master_this_cons = master_this_cons;
2799  team->t.t_master_last_cons = master_last_cons;
2800 
2801  team->t.t_parent = parent_team;
2802  TCW_SYNC_PTR(team->t.t_pkfn, microtask);
2803  team->t.t_invoke = invoker; /* TODO move this to root, maybe */
2804  team->t.t_ident = loc;
2805 #if OMP_30_ENABLED
2806  // TODO: parent_team->t.t_level == INT_MAX ???
2807 #if OMP_40_ENABLED
2808  if ( !master_th->th.th_team_microtask || level > teams_level ) {
2809 #endif /* OMP_40_ENABLED */
2810  team->t.t_level = parent_team->t.t_level + 1;
2811  team->t.t_active_level = parent_team->t.t_active_level + 1;
2812 #if OMP_40_ENABLED
2813  } else {
2814  // AC: Do not increase parallel level at start of the teams construct
2815  team->t.t_level = parent_team->t.t_level;
2816  team->t.t_active_level = parent_team->t.t_active_level;
2817  }
2818 #endif /* OMP_40_ENABLED */
2819  team->t.t_sched = get__sched_2( parent_team, master_tid ); // set master's schedule as new run-time schedule
2820 
2821 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
2822  if ( __kmp_inherit_fp_control ) {
2823  __kmp_store_x87_fpu_control_word( &team->t.t_x87_fpu_control_word );
2824  __kmp_store_mxcsr( &team->t.t_mxcsr );
2825  team->t.t_mxcsr &= KMP_X86_MXCSR_MASK;
2826  team->t.t_fp_control_saved = TRUE;
2827  }
2828  else {
2829  team->t.t_fp_control_saved = FALSE;
2830  }
2831 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
2832 
2833  if ( __kmp_tasking_mode != tskm_immediate_exec ) {
2834  //
2835  // Set the master thread's task team to the team's task team.
2836  // Unless this is the hot team, it should be NULL.
2837  //
2838  KMP_DEBUG_ASSERT( master_th->th.th_task_team == parent_team->t.t_task_team );
2839  KA_TRACE( 20, ( "__kmp_fork_call: Master T#%d pushing task_team %p / team %p, new task_team %p / team %p\n",
2840  __kmp_gtid_from_thread( master_th ), master_th->th.th_task_team,
2841  parent_team, team->t.t_task_team, team ) );
2842  master_th->th.th_task_team = team->t.t_task_team;
2843  KMP_DEBUG_ASSERT( ( master_th->th.th_task_team == NULL ) || ( team == root->r.r_hot_team ) ) ;
2844  }
2845 #endif // OMP_30_ENABLED
2846 
2847  KA_TRACE( 20, ("__kmp_fork_call: T#%d(%d:%d)->(%d:0) created a team of %d threads\n",
2848  gtid, parent_team->t.t_id, team->t.t_master_tid, team->t.t_id, team->t.t_nproc ));
2849  KMP_DEBUG_ASSERT( team != root->r.r_hot_team ||
2850  ( team->t.t_master_tid == 0 &&
2851  ( team->t.t_parent == root->r.r_root_team || team->t.t_parent->t.t_serialized ) ));
2852  KMP_MB();
2853 
2854  /* now, setup the arguments */
2855  argv = (void**) team -> t.t_argv;
2856 #if OMP_40_ENABLED
2857  if ( ap ) {
2858 #endif /* OMP_40_ENABLED */
2859  for( i=argc-1; i >= 0; --i )
2860 /* TODO: revert workaround for Intel(R) 64 tracker #96 */
2861 #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM) && KMP_OS_LINUX
2862  *argv++ = va_arg( *ap, void * );
2863 #else
2864  *argv++ = va_arg( ap, void * );
2865 #endif
2866 #if OMP_40_ENABLED
2867  } else {
2868  for( i=0; i < argc; ++i )
2869  // Get args from parent team for teams construct
2870  argv[i] = team->t.t_parent->t.t_argv[i];
2871  }
2872 #endif /* OMP_40_ENABLED */
2873 
2874  /* now actually fork the threads */
2875 
2876  team->t.t_master_active = master_active;
2877  if (!root -> r.r_active) /* Only do the assignment if it makes a difference to prevent cache ping-pong */
2878  root -> r.r_active = TRUE;
2879 
2880  __kmp_fork_team_threads( root, team, master_th, gtid );
2881  __kmp_setup_icv_copy(team, nthreads
2882 #if OMP_30_ENABLED
2883  , &master_th->th.th_current_task->td_icvs, loc
2884 #else
2885  , parent_team->t.t_set_nproc[master_tid],
2886  parent_team->t.t_set_dynamic[master_tid],
2887  parent_team->t.t_set_nested[master_tid],
2888  parent_team->t.t_set_blocktime[master_tid],
2889  parent_team->t.t_set_bt_intervals[master_tid],
2890  parent_team->t.t_set_bt_set[master_tid]
2891 #endif /* OMP_30_ENABLED */
2892  );
2893 
2894 
2895  __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
2896 
2897 
2898 #if USE_ITT_BUILD
2899  // Mark start of "parallel" region for VTune. Only use one of frame notification scheme at the moment.
2900  if ( ( __itt_frame_begin_v3_ptr && __kmp_forkjoin_frames && ! __kmp_forkjoin_frames_mode ) || KMP_ITT_DEBUG )
2901 # if OMP_40_ENABLED
2902  if ( !master_th->th.th_team_microtask || microtask == (microtask_t)__kmp_teams_master )
2903  // Either not in teams or the outer fork of the teams construct
2904 # endif /* OMP_40_ENABLED */
2905  __kmp_itt_region_forking( gtid );
2906 #endif /* USE_ITT_BUILD */
2907 
2908 #if USE_ITT_BUILD && USE_ITT_NOTIFY && OMP_30_ENABLED
2909  // Internal fork - report frame begin
2910  if( ( __kmp_forkjoin_frames_mode == 1 || __kmp_forkjoin_frames_mode == 3 ) && __itt_frame_submit_v3_ptr && __itt_get_timestamp_ptr )
2911  {
2912  if( ! ( team->t.t_active_level > 1 ) ) {
2913  master_th->th.th_frame_time = __itt_get_timestamp();
2914  }
2915  }
2916 #endif /* USE_ITT_BUILD */
2917 
2918  /* now go on and do the work */
2919  KMP_DEBUG_ASSERT( team == __kmp_threads[gtid]->th.th_team );
2920  KMP_MB();
2921 
2922  KF_TRACE( 10, ( "__kmp_internal_fork : root=%p, team=%p, master_th=%p, gtid=%d\n", root, team, master_th, gtid ) );
2923 
2924 #if USE_ITT_BUILD
2925  if ( __itt_stack_caller_create_ptr ) {
2926  team->t.t_stack_id = __kmp_itt_stack_caller_create(); // create new stack stitching id before entering fork barrier
2927  }
2928 #endif /* USE_ITT_BUILD */
2929 
2930 #if OMP_40_ENABLED
2931  if ( ap ) // AC: skip __kmp_internal_fork at teams construct, let only master threads execute
2932 #endif /* OMP_40_ENABLED */
2933  {
2934  __kmp_internal_fork( loc, gtid, team );
2935  KF_TRACE( 10, ( "__kmp_internal_fork : after : root=%p, team=%p, master_th=%p, gtid=%d\n", root, team, master_th, gtid ) );
2936  }
2937 
2938  if (! exec_master) {
2939  KA_TRACE( 20, ("__kmp_fork_call: parallel exit T#%d\n", gtid ));
2940  return TRUE;
2941  }
2942 
2943  /* Invoke microtask for MASTER thread */
2944  KA_TRACE( 20, ("__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n",
2945  gtid, team->t.t_id, team->t.t_pkfn ) );
2946 
2947  if (! team->t.t_invoke( gtid )) {
2948  KMP_ASSERT2( 0, "cannot invoke microtask for MASTER thread" );
2949  }
2950  KA_TRACE( 20, ("__kmp_fork_call: T#%d(%d:0) done microtask = %p\n",
2951  gtid, team->t.t_id, team->t.t_pkfn ) );
2952  KMP_MB(); /* Flush all pending memory write invalidates. */
2953 
2954  KA_TRACE( 20, ("__kmp_fork_call: parallel exit T#%d\n", gtid ));
2955 
2956  return TRUE;
2957 }
2958 
2959 
2960 void
2961 __kmp_join_call(ident_t *loc, int gtid
2962 #if OMP_40_ENABLED
2963  , int exit_teams
2964 #endif /* OMP_40_ENABLED */
2965 )
2966 {
2967  kmp_team_t *team;
2968  kmp_team_t *parent_team;
2969  kmp_info_t *master_th;
2970  kmp_root_t *root;
2971  int master_active;
2972  int i;
2973 
2974  KA_TRACE( 20, ("__kmp_join_call: enter T#%d\n", gtid ));
2975 
2976  /* setup current data */
2977  master_th = __kmp_threads[ gtid ];
2978  root = master_th -> th.th_root;
2979  team = master_th -> th.th_team;
2980  parent_team = team->t.t_parent;
2981 
2982  master_th->th.th_ident = loc;
2983 
2984 #if OMP_30_ENABLED && KMP_DEBUG
2985  if ( __kmp_tasking_mode != tskm_immediate_exec ) {
2986  KA_TRACE( 20, ( "__kmp_join_call: T#%d, old team = %p old task_team = %p, th_task_team = %p\n",
2987  __kmp_gtid_from_thread( master_th ), team,
2988  team -> t.t_task_team, master_th->th.th_task_team) );
2989  KMP_DEBUG_ASSERT( master_th->th.th_task_team == team->t.t_task_team );
2990  }
2991 #endif // OMP_30_ENABLED
2992 
2993  if( team->t.t_serialized ) {
2994 #if OMP_40_ENABLED
2995  if ( master_th->th.th_team_microtask ) {
2996  // We are in teams construct
2997  int level = team->t.t_level;
2998  int tlevel = master_th->th.th_teams_level;
2999  if ( level == tlevel ) {
3000  // AC: we haven't incremented it earlier at start of teams construct,
3001  // so do it here - at the end of teams construct
3002  team->t.t_level++;
3003  } else if ( level == tlevel + 1 ) {
3004  // AC: we are exiting parallel inside teams, need to increment serialization
3005  // in order to restore it in the next call to __kmpc_end_serialized_parallel
3006  team->t.t_serialized++;
3007  }
3008  }
3009 #endif /* OMP_40_ENABLED */
3010  __kmpc_end_serialized_parallel( loc, gtid );
3011  return;
3012  }
3013 
3014  master_active = team->t.t_master_active;
3015 
3016 #if OMP_40_ENABLED
3017  if (!exit_teams)
3018 #endif /* OMP_40_ENABLED */
3019  {
3020  // AC: No barrier for internal teams at exit from teams construct.
3021  // But there is barrier for external team (league).
3022  __kmp_internal_join( loc, gtid, team );
3023  }
3024  KMP_MB();
3025 
3026 #if USE_ITT_BUILD
3027  if ( __itt_stack_caller_create_ptr ) {
3028  __kmp_itt_stack_caller_destroy( (__itt_caller)team->t.t_stack_id ); // destroy the stack stitching id after join barrier
3029  }
3030 
3031  // Mark end of "parallel" region for VTune. Only use one of frame notification scheme at the moment.
3032  if ( ( __itt_frame_end_v3_ptr && __kmp_forkjoin_frames && ! __kmp_forkjoin_frames_mode ) || KMP_ITT_DEBUG )
3033 # if OMP_40_ENABLED
3034  if ( !master_th->th.th_team_microtask /* not in teams */ ||
3035  ( !exit_teams && team->t.t_level == master_th->th.th_teams_level ) )
3036  // Either not in teams or exiting teams region
3037  // (teams is a frame and no other frames inside the teams)
3038 # endif /* OMP_40_ENABLED */
3039  {
3040  master_th->th.th_ident = loc;
3041  __kmp_itt_region_joined( gtid );
3042  }
3043 #endif /* USE_ITT_BUILD */
3044 
3045 #if OMP_40_ENABLED
3046  if ( master_th->th.th_team_microtask &&
3047  !exit_teams &&
3048  team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
3049  team->t.t_level == master_th->th.th_teams_level + 1 ) {
3050  // AC: We need to leave the team structure intact at the end
3051  // of parallel inside the teams construct, so that at the next
3052  // parallel same (hot) team works, only adjust nesting levels
3053 
3054  /* Decrement our nested depth level */
3055  team->t.t_level --;
3056  team->t.t_active_level --;
3057  KMP_TEST_THEN_DEC32( (kmp_int32*) &root->r.r_in_parallel );
3058 
3059  /* Restore number of threads in the team if needed */
3060  if ( master_th->th.th_team_nproc < master_th->th.th_set_nth_teams ) {
3061  int old_num = master_th->th.th_team_nproc;
3062  int new_num = master_th->th.th_set_nth_teams;
3063  kmp_info_t **other_threads = team->t.t_threads;
3064  team->t.t_nproc = new_num;
3065  for ( i = 0; i < old_num; ++i ) {
3066  other_threads[i]->th.th_team_nproc = new_num;
3067  }
3068  // Adjust states of non-used threads of the team
3069  for ( i = old_num; i < new_num; ++i ) {
3070  // Re-initialize thread's barrier data.
3071  int b;
3072  kmp_balign_t * balign = other_threads[i]->th.th_bar;
3073  for ( b = 0; b < bp_last_bar; ++ b ) {
3074  balign[ b ].bb.b_arrived = team->t.t_bar[ b ].b_arrived;
3075  }
3076  // Synchronize thread's task state
3077  other_threads[i]->th.th_task_state = master_th->th.th_task_state;
3078  }
3079  }
3080  return;
3081  }
3082 #endif /* OMP_40_ENABLED */
3083  /* do cleanup and restore the parent team */
3084  master_th -> th.th_info .ds.ds_tid = team -> t.t_master_tid;
3085  master_th -> th.th_local.this_construct = team -> t.t_master_this_cons;
3086  master_th -> th.th_local.last_construct = team -> t.t_master_last_cons;
3087 
3088  master_th -> th.th_dispatch =
3089  & parent_team -> t.t_dispatch[ team -> t.t_master_tid ];
3090 
3091  /* jc: The following lock has instructions with REL and ACQ semantics,
3092  separating the parallel user code called in this parallel region
3093  from the serial user code called after this function returns.
3094  */
3095  __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
3096 
3097 #if OMP_40_ENABLED
3098  if ( !master_th->th.th_team_microtask || team->t.t_level > master_th->th.th_teams_level )
3099 #endif /* OMP_40_ENABLED */
3100  {
3101  /* Decrement our nested depth level */
3102  KMP_TEST_THEN_DEC32( (kmp_int32*) &root->r.r_in_parallel );
3103  }
3104  KMP_DEBUG_ASSERT( root->r.r_in_parallel >= 0 );
3105 
3106  #if OMP_30_ENABLED
3107  KF_TRACE( 10, ("__kmp_join_call1: T#%d, this_thread=%p team=%p\n",
3108  0, master_th, team ) );
3109  __kmp_pop_current_task_from_thread( master_th );
3110  #endif // OMP_30_ENABLED
3111 
3112 #if OMP_40_ENABLED && (KMP_OS_WINDOWS || KMP_OS_LINUX)
3113  //
3114  // Restore master thread's partition.
3115  //
3116  master_th -> th.th_first_place = team -> t.t_first_place;
3117  master_th -> th.th_last_place = team -> t.t_last_place;
3118 #endif /* OMP_40_ENABLED */
3119 
3120 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
3121  if ( __kmp_inherit_fp_control && team->t.t_fp_control_saved ) {
3122  __kmp_clear_x87_fpu_status_word();
3123  __kmp_load_x87_fpu_control_word( &team->t.t_x87_fpu_control_word );
3124  __kmp_load_mxcsr( &team->t.t_mxcsr );
3125  }
3126 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
3127 
3128  if ( root -> r.r_active != master_active )
3129  root -> r.r_active = master_active;
3130 
3131  __kmp_free_team( root, team ); /* this will free worker threads */
3132 
3133  /* this race was fun to find. make sure the following is in the critical
3134  * region otherwise assertions may fail occasiounally since the old team
3135  * may be reallocated and the hierarchy appears inconsistent. it is
3136  * actually safe to run and won't cause any bugs, but will cause thoose
3137  * assertion failures. it's only one deref&assign so might as well put this
3138  * in the critical region */
3139  master_th -> th.th_team = parent_team;
3140  master_th -> th.th_team_nproc = parent_team -> t.t_nproc;
3141  master_th -> th.th_team_master = parent_team -> t.t_threads[0];
3142  master_th -> th.th_team_serialized = parent_team -> t.t_serialized;
3143 
3144  /* restore serialized team, if need be */
3145  if( parent_team -> t.t_serialized &&
3146  parent_team != master_th->th.th_serial_team &&
3147  parent_team != root->r.r_root_team ) {
3148  __kmp_free_team( root, master_th -> th.th_serial_team );
3149  master_th -> th.th_serial_team = parent_team;
3150  }
3151 
3152 #if OMP_30_ENABLED
3153  if ( __kmp_tasking_mode != tskm_immediate_exec ) {
3154  //
3155  // Copy the task team from the new child / old parent team
3156  // to the thread. If non-NULL, copy the state flag also.
3157  //
3158  if ( ( master_th -> th.th_task_team = parent_team -> t.t_task_team ) != NULL ) {
3159  master_th -> th.th_task_state = master_th -> th.th_task_team -> tt.tt_state;
3160  }
3161  KA_TRACE( 20, ( "__kmp_join_call: Master T#%d restoring task_team %p / team %p\n",
3162  __kmp_gtid_from_thread( master_th ), master_th->th.th_task_team,
3163  parent_team ) );
3164  }
3165 #endif /* OMP_30_ENABLED */
3166 
3167  #if OMP_30_ENABLED
3168  // TODO: GEH - cannot do this assertion because root thread not set up as executing
3169  // KMP_ASSERT( master_th->th.th_current_task->td_flags.executing == 0 );
3170  master_th->th.th_current_task->td_flags.executing = 1;
3171  #endif // OMP_30_ENABLED
3172 
3173  __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
3174 
3175  KMP_MB();
3176  KA_TRACE( 20, ("__kmp_join_call: exit T#%d\n", gtid ));
3177 }
3178 
3179 /* ------------------------------------------------------------------------ */
3180 /* ------------------------------------------------------------------------ */
3181 
3182 /* Check whether we should push an internal control record onto the
3183  serial team stack. If so, do it. */
3184 void
3185 __kmp_save_internal_controls ( kmp_info_t * thread )
3186 {
3187 
3188  if ( thread -> th.th_team != thread -> th.th_serial_team ) {
3189  return;
3190  }
3191  if (thread -> th.th_team -> t.t_serialized > 1) {
3192  int push = 0;
3193 
3194  if (thread -> th.th_team -> t.t_control_stack_top == NULL) {
3195  push = 1;
3196  } else {
3197  if ( thread -> th.th_team -> t.t_control_stack_top -> serial_nesting_level !=
3198  thread -> th.th_team -> t.t_serialized ) {
3199  push = 1;
3200  }
3201  }
3202  if (push) { /* push a record on the serial team's stack */
3203  kmp_internal_control_t * control = (kmp_internal_control_t *) __kmp_allocate(sizeof(kmp_internal_control_t));
3204 
3205 #if OMP_30_ENABLED
3206  copy_icvs( control, & thread->th.th_current_task->td_icvs );
3207 #else
3208  control->nproc = thread->th.th_team->t.t_set_nproc[0];
3209  control->dynamic = thread->th.th_team->t.t_set_dynamic[0];
3210  control->nested = thread->th.th_team->t.t_set_nested[0];
3211  control->blocktime = thread->th.th_team->t.t_set_blocktime[0];
3212  control->bt_intervals = thread->th.th_team->t.t_set_bt_intervals[0];
3213  control->bt_set = thread->th.th_team->t.t_set_bt_set[0];
3214 #endif // OMP_30_ENABLED
3215 
3216  control->serial_nesting_level = thread->th.th_team->t.t_serialized;
3217 
3218  control->next = thread -> th.th_team -> t.t_control_stack_top;
3219  thread -> th.th_team -> t.t_control_stack_top = control;
3220  }
3221  }
3222 }
3223 
3224 /* Changes set_nproc */
3225 void
3226 __kmp_set_num_threads( int new_nth, int gtid )
3227 {
3228  kmp_info_t *thread;
3229  kmp_root_t *root;
3230 
3231  KF_TRACE( 10, ("__kmp_set_num_threads: new __kmp_nth = %d\n", new_nth ));
3232  KMP_DEBUG_ASSERT( __kmp_init_serial );
3233 
3234  if (new_nth < 1)
3235  new_nth = 1;
3236  else if (new_nth > __kmp_max_nth)
3237  new_nth = __kmp_max_nth;
3238 
3239  thread = __kmp_threads[gtid];
3240 
3241  __kmp_save_internal_controls( thread );
3242 
3243  set__nproc( thread, new_nth );
3244 
3245  //
3246  // If this omp_set_num_threads() call will cause the hot team size to be
3247  // reduced (in the absence of a num_threads clause), then reduce it now,
3248  // rather than waiting for the next parallel region.
3249  //
3250  root = thread->th.th_root;
3251  if ( __kmp_init_parallel && ( ! root->r.r_active )
3252  && ( root->r.r_hot_team->t.t_nproc > new_nth ) ) {
3253  kmp_team_t *hot_team = root->r.r_hot_team;
3254  int f;
3255 
3256  __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
3257 
3258 
3259 #if OMP_30_ENABLED
3260  if ( __kmp_tasking_mode != tskm_immediate_exec ) {
3261  kmp_task_team_t *task_team = hot_team->t.t_task_team;
3262  if ( ( task_team != NULL ) && TCR_SYNC_4(task_team->tt.tt_active) ) {
3263  //
3264  // Signal the worker threads (esp. the extra ones) to stop
3265  // looking for tasks while spin waiting. The task teams
3266  // are reference counted and will be deallocated by the
3267  // last worker thread.
3268  //
3269  KMP_DEBUG_ASSERT( hot_team->t.t_nproc > 1 );
3270  TCW_SYNC_4( task_team->tt.tt_active, FALSE );
3271  KMP_MB();
3272 
3273  KA_TRACE( 20, ( "__kmp_set_num_threads: setting task_team %p to NULL\n",
3274  &hot_team->t.t_task_team ) );
3275  hot_team->t.t_task_team = NULL;
3276  }
3277  else {
3278  KMP_DEBUG_ASSERT( task_team == NULL );
3279  }
3280  }
3281 #endif // OMP_30_ENABLED
3282 
3283  //
3284  // Release the extra threads we don't need any more.
3285  //
3286  for ( f = new_nth; f < hot_team->t.t_nproc; f++ ) {
3287  KMP_DEBUG_ASSERT( hot_team->t.t_threads[f] != NULL );
3288  __kmp_free_thread( hot_team->t.t_threads[f] );
3289  hot_team->t.t_threads[f] = NULL;
3290  }
3291  hot_team->t.t_nproc = new_nth;
3292 
3293 
3294  __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
3295 
3296  //
3297  // Update the t_nproc field in the threads that are still active.
3298  //
3299  for( f=0 ; f < new_nth; f++ ) {
3300  KMP_DEBUG_ASSERT( hot_team->t.t_threads[f] != NULL );
3301  hot_team->t.t_threads[f]->th.th_team_nproc = new_nth;
3302  }
3303 #if KMP_MIC
3304  // Special flag in case omp_set_num_threads() call
3305  hot_team -> t.t_size_changed = -1;
3306 #endif
3307  }
3308 
3309 }
3310 
3311 #if OMP_30_ENABLED
3312 /* Changes max_active_levels */
3313 void
3314 __kmp_set_max_active_levels( int gtid, int max_active_levels )
3315 {
3316  kmp_info_t *thread;
3317 
3318  KF_TRACE( 10, ( "__kmp_set_max_active_levels: new max_active_levels for thread %d = (%d)\n", gtid, max_active_levels ) );
3319  KMP_DEBUG_ASSERT( __kmp_init_serial );
3320 
3321  // validate max_active_levels
3322  if( max_active_levels < 0 ) {
3323  KMP_WARNING( ActiveLevelsNegative, max_active_levels );
3324  // We ignore this call if the user has specified a negative value.
3325  // The current setting won't be changed. The last valid setting will be used.
3326  // A warning will be issued (if warnings are allowed as controlled by the KMP_WARNINGS env var).
3327  KF_TRACE( 10, ( "__kmp_set_max_active_levels: the call is ignored: new max_active_levels for thread %d = (%d)\n", gtid, max_active_levels ) );
3328  return;
3329  }
3330  if( max_active_levels <= KMP_MAX_ACTIVE_LEVELS_LIMIT ) {
3331  // it's OK, the max_active_levels is within the valid range: [ 0; KMP_MAX_ACTIVE_LEVELS_LIMIT ]
3332  // We allow a zero value. (implementation defined behavior)
3333  } else {
3334  KMP_WARNING( ActiveLevelsExceedLimit, max_active_levels, KMP_MAX_ACTIVE_LEVELS_LIMIT );
3335  max_active_levels = KMP_MAX_ACTIVE_LEVELS_LIMIT;
3336  // Current upper limit is MAX_INT. (implementation defined behavior)
3337  // If the input exceeds the upper limit, we correct the input to be the upper limit. (implementation defined behavior)
3338  // Actually, the flow should never get here until we use MAX_INT limit.
3339  }
3340  KF_TRACE( 10, ( "__kmp_set_max_active_levels: after validation: new max_active_levels for thread %d = (%d)\n", gtid, max_active_levels ) );
3341 
3342  thread = __kmp_threads[ gtid ];
3343 
3344  __kmp_save_internal_controls( thread );
3345 
3346  set__max_active_levels( thread, max_active_levels );
3347 
3348 }
3349 
3350 /* Gets max_active_levels */
3351 int
3352 __kmp_get_max_active_levels( int gtid )
3353 {
3354  kmp_info_t *thread;
3355 
3356  KF_TRACE( 10, ( "__kmp_get_max_active_levels: thread %d\n", gtid ) );
3357  KMP_DEBUG_ASSERT( __kmp_init_serial );
3358 
3359  thread = __kmp_threads[ gtid ];
3360  KMP_DEBUG_ASSERT( thread -> th.th_current_task );
3361  KF_TRACE( 10, ( "__kmp_get_max_active_levels: thread %d, curtask=%p, curtask_maxaclevel=%d\n",
3362  gtid, thread -> th.th_current_task, thread -> th.th_current_task -> td_icvs.max_active_levels ) );
3363  return thread -> th.th_current_task -> td_icvs.max_active_levels;
3364 }
3365 
3366 /* Changes def_sched_var ICV values (run-time schedule kind and chunk) */
3367 void
3368 __kmp_set_schedule( int gtid, kmp_sched_t kind, int chunk )
3369 {
3370  kmp_info_t *thread;
3371 // kmp_team_t *team;
3372 
3373  KF_TRACE( 10, ("__kmp_set_schedule: new schedule for thread %d = (%d, %d)\n", gtid, (int)kind, chunk ));
3374  KMP_DEBUG_ASSERT( __kmp_init_serial );
3375 
3376  // Check if the kind parameter is valid, correct if needed.
3377  // Valid parameters should fit in one of two intervals - standard or extended:
3378  // <lower>, <valid>, <upper_std>, <lower_ext>, <valid>, <upper>
3379  // 2008-01-25: 0, 1 - 4, 5, 100, 101 - 102, 103
3380  if ( kind <= kmp_sched_lower || kind >= kmp_sched_upper ||
3381  ( kind <= kmp_sched_lower_ext && kind >= kmp_sched_upper_std ) )
3382  {
3383  // TODO: Hint needs attention in case we change the default schedule.
3384  __kmp_msg(
3385  kmp_ms_warning,
3386  KMP_MSG( ScheduleKindOutOfRange, kind ),
3387  KMP_HNT( DefaultScheduleKindUsed, "static, no chunk" ),
3388  __kmp_msg_null
3389  );
3390  kind = kmp_sched_default;
3391  chunk = 0; // ignore chunk value in case of bad kind
3392  }
3393 
3394  thread = __kmp_threads[ gtid ];
3395 
3396  __kmp_save_internal_controls( thread );
3397 
3398  if ( kind < kmp_sched_upper_std ) {
3399  if ( kind == kmp_sched_static && chunk < KMP_DEFAULT_CHUNK ) {
3400  // differ static chunked vs. unchunked:
3401  // chunk should be invalid to indicate unchunked schedule (which is the default)
3402  thread -> th.th_current_task -> td_icvs.sched.r_sched_type = kmp_sch_static;
3403  } else {
3404  thread -> th.th_current_task -> td_icvs.sched.r_sched_type = __kmp_sch_map[ kind - kmp_sched_lower - 1 ];
3405  }
3406  } else {
3407  // __kmp_sch_map[ kind - kmp_sched_lower_ext + kmp_sched_upper_std - kmp_sched_lower - 2 ];
3408  thread -> th.th_current_task -> td_icvs.sched.r_sched_type =
3409  __kmp_sch_map[ kind - kmp_sched_lower_ext + kmp_sched_upper_std - kmp_sched_lower - 2 ];
3410  }
3411  if ( kind == kmp_sched_auto ) {
3412  // ignore parameter chunk for schedule auto
3413  thread -> th.th_current_task -> td_icvs.sched.chunk = KMP_DEFAULT_CHUNK;
3414  } else {
3415  thread -> th.th_current_task -> td_icvs.sched.chunk = chunk;
3416  }
3417 }
3418 
3419 /* Gets def_sched_var ICV values */
3420 void
3421 __kmp_get_schedule( int gtid, kmp_sched_t * kind, int * chunk )
3422 {
3423  kmp_info_t *thread;
3424  enum sched_type th_type;
3425  int i;
3426 
3427  KF_TRACE( 10, ("__kmp_get_schedule: thread %d\n", gtid ));
3428  KMP_DEBUG_ASSERT( __kmp_init_serial );
3429 
3430  thread = __kmp_threads[ gtid ];
3431 
3432  //th_type = thread -> th.th_team -> t.t_set_sched[ thread->th.th_info.ds.ds_tid ].r_sched_type;
3433  th_type = thread -> th.th_current_task -> td_icvs.sched.r_sched_type;
3434 
3435  switch ( th_type ) {
3436  case kmp_sch_static:
3437  case kmp_sch_static_greedy:
3438  case kmp_sch_static_balanced:
3439  *kind = kmp_sched_static;
3440  *chunk = 0; // chunk was not set, try to show this fact via zero value
3441  return;
3442  case kmp_sch_static_chunked:
3443  *kind = kmp_sched_static;
3444  break;
3445  case kmp_sch_dynamic_chunked:
3446  *kind = kmp_sched_dynamic;
3447  break;
3449  case kmp_sch_guided_iterative_chunked:
3450  case kmp_sch_guided_analytical_chunked:
3451  *kind = kmp_sched_guided;
3452  break;
3453  case kmp_sch_auto:
3454  *kind = kmp_sched_auto;
3455  break;
3456  case kmp_sch_trapezoidal:
3457  *kind = kmp_sched_trapezoidal;
3458  break;
3459 /*
3460  case kmp_sch_static_steal:
3461  *kind = kmp_sched_static_steal;
3462  break;
3463 */
3464  default:
3465  KMP_FATAL( UnknownSchedulingType, th_type );
3466  }
3467 
3468  //*chunk = thread -> th.th_team -> t.t_set_sched[ thread->th.th_info.ds.ds_tid ].chunk;
3469  *chunk = thread -> th.th_current_task -> td_icvs.sched.chunk;
3470 }
3471 
3472 int
3473 __kmp_get_ancestor_thread_num( int gtid, int level ) {
3474 
3475  int ii, dd;
3476  kmp_team_t *team;
3477  kmp_info_t *thr;
3478 
3479  KF_TRACE( 10, ("__kmp_get_ancestor_thread_num: thread %d %d\n", gtid, level ));
3480  KMP_DEBUG_ASSERT( __kmp_init_serial );
3481 
3482  // validate level
3483  if( level == 0 ) return 0;
3484  if( level < 0 ) return -1;
3485  thr = __kmp_threads[ gtid ];
3486  team = thr->th.th_team;
3487  ii = team -> t.t_level;
3488  if( level > ii ) return -1;
3489 
3490 #if OMP_40_ENABLED
3491  if( thr->th.th_team_microtask ) {
3492  // AC: we are in teams region where multiple nested teams have same level
3493  int tlevel = thr->th.th_teams_level; // the level of the teams construct
3494  if( level <= tlevel ) { // otherwise usual algorithm works (will not touch the teams)
3495  KMP_DEBUG_ASSERT( ii >= tlevel );
3496  // AC: As we need to pass by the teams league, we need to artificially increase ii
3497  if ( ii == tlevel ) {
3498  ii += 2; // three teams have same level
3499  } else {
3500  ii ++; // two teams have same level
3501  }
3502  }
3503  }
3504 #endif
3505 
3506  if( ii == level ) return __kmp_tid_from_gtid( gtid );
3507 
3508  dd = team -> t.t_serialized;
3509  level++;
3510  while( ii > level )
3511  {
3512  for( dd = team -> t.t_serialized; ( dd > 0 ) && ( ii > level ); dd--, ii-- )
3513  {
3514  }
3515  if( ( team -> t.t_serialized ) && ( !dd ) ) {
3516  team = team->t.t_parent;
3517  continue;
3518  }
3519  if( ii > level ) {
3520  team = team->t.t_parent;
3521  dd = team -> t.t_serialized;
3522  ii--;
3523  }
3524  }
3525 
3526  return ( dd > 1 ) ? ( 0 ) : ( team -> t.t_master_tid );
3527 }
3528 
3529 int
3530 __kmp_get_team_size( int gtid, int level ) {
3531 
3532  int ii, dd;
3533  kmp_team_t *team;
3534  kmp_info_t *thr;
3535 
3536  KF_TRACE( 10, ("__kmp_get_team_size: thread %d %d\n", gtid, level ));
3537  KMP_DEBUG_ASSERT( __kmp_init_serial );
3538 
3539  // validate level
3540  if( level == 0 ) return 1;
3541  if( level < 0 ) return -1;
3542  thr = __kmp_threads[ gtid ];
3543  team = thr->th.th_team;
3544  ii = team -> t.t_level;
3545  if( level > ii ) return -1;
3546 
3547 #if OMP_40_ENABLED
3548  if( thr->th.th_team_microtask ) {
3549  // AC: we are in teams region where multiple nested teams have same level
3550  int tlevel = thr->th.th_teams_level; // the level of the teams construct
3551  if( level <= tlevel ) { // otherwise usual algorithm works (will not touch the teams)
3552  KMP_DEBUG_ASSERT( ii >= tlevel );
3553  // AC: As we need to pass by the teams league, we need to artificially increase ii
3554  if ( ii == tlevel ) {
3555  ii += 2; // three teams have same level
3556  } else {
3557  ii ++; // two teams have same level
3558  }
3559  }
3560  }
3561 #endif
3562 
3563  while( ii > level )
3564  {
3565  for( dd = team -> t.t_serialized; ( dd > 0 ) && ( ii > level ); dd--, ii-- )
3566  {
3567  }
3568  if( team -> t.t_serialized && ( !dd ) ) {
3569  team = team->t.t_parent;
3570  continue;
3571  }
3572  if( ii > level ) {
3573  team = team->t.t_parent;
3574  ii--;
3575  }
3576  }
3577 
3578  return team -> t.t_nproc;
3579 }
3580 
3581 #endif // OMP_30_ENABLED
3582 
3583 kmp_r_sched_t
3584 __kmp_get_schedule_global() {
3585 // This routine created because pairs (__kmp_sched, __kmp_chunk) and (__kmp_static, __kmp_guided)
3586 // may be changed by kmp_set_defaults independently. So one can get the updated schedule here.
3587 
3588  kmp_r_sched_t r_sched;
3589 
3590  // create schedule from 4 globals: __kmp_sched, __kmp_chunk, __kmp_static, __kmp_guided
3591  // __kmp_sched should keep original value, so that user can set KMP_SCHEDULE multiple times,
3592  // and thus have different run-time schedules in different roots (even in OMP 2.5)
3593  if ( __kmp_sched == kmp_sch_static ) {
3594  r_sched.r_sched_type = __kmp_static; // replace STATIC with more detailed schedule (balanced or greedy)
3595  } else if ( __kmp_sched == kmp_sch_guided_chunked ) {
3596  r_sched.r_sched_type = __kmp_guided; // replace GUIDED with more detailed schedule (iterative or analytical)
3597  } else {
3598  r_sched.r_sched_type = __kmp_sched; // (STATIC_CHUNKED), or (DYNAMIC_CHUNKED), or other
3599  }
3600 
3601  if ( __kmp_chunk < KMP_DEFAULT_CHUNK ) { // __kmp_chunk may be wrong here (if it was not ever set)
3602  r_sched.chunk = KMP_DEFAULT_CHUNK;
3603  } else {
3604  r_sched.chunk = __kmp_chunk;
3605  }
3606 
3607  return r_sched;
3608 }
3609 
3610 /* ------------------------------------------------------------------------ */
3611 /* ------------------------------------------------------------------------ */
3612 
3613 
3614 /*
3615  * Allocate (realloc == FALSE) * or reallocate (realloc == TRUE)
3616  * at least argc number of *t_argv entries for the requested team.
3617  */
3618 static void
3619 __kmp_alloc_argv_entries( int argc, kmp_team_t *team, int realloc )
3620 {
3621 
3622  KMP_DEBUG_ASSERT( team );
3623  if( !realloc || argc > team -> t.t_max_argc ) {
3624 
3625  KA_TRACE( 100, ( "__kmp_alloc_argv_entries: team %d: needed entries=%d, current entries=%d\n",
3626  team->t.t_id, argc, ( realloc ) ? team->t.t_max_argc : 0 ));
3627 #if (KMP_PERF_V106 == KMP_ON)
3628  /* if previously allocated heap space for args, free them */
3629  if ( realloc && team -> t.t_argv != &team -> t.t_inline_argv[0] )
3630  __kmp_free( (void *) team -> t.t_argv );
3631 
3632  if ( argc <= KMP_INLINE_ARGV_ENTRIES ) {
3633  /* use unused space in the cache line for arguments */
3634  team -> t.t_max_argc = KMP_INLINE_ARGV_ENTRIES;
3635  KA_TRACE( 100, ( "__kmp_alloc_argv_entries: team %d: inline allocate %d argv entries\n",
3636  team->t.t_id, team->t.t_max_argc ));
3637  team -> t.t_argv = &team -> t.t_inline_argv[0];
3638  if ( __kmp_storage_map ) {
3639  __kmp_print_storage_map_gtid( -1, &team->t.t_inline_argv[0],
3640  &team->t.t_inline_argv[KMP_INLINE_ARGV_ENTRIES],
3641  (sizeof(void *) * KMP_INLINE_ARGV_ENTRIES),
3642  "team_%d.t_inline_argv",
3643  team->t.t_id );
3644  }
3645  } else {
3646  /* allocate space for arguments in the heap */
3647  team -> t.t_max_argc = ( argc <= (KMP_MIN_MALLOC_ARGV_ENTRIES >> 1 )) ?
3648  KMP_MIN_MALLOC_ARGV_ENTRIES : 2 * argc;
3649  KA_TRACE( 100, ( "__kmp_alloc_argv_entries: team %d: dynamic allocate %d argv entries\n",
3650  team->t.t_id, team->t.t_max_argc ));
3651  team -> t.t_argv = (void**) __kmp_page_allocate( sizeof(void*) * team->t.t_max_argc );
3652  if ( __kmp_storage_map ) {
3653  __kmp_print_storage_map_gtid( -1, &team->t.t_argv[0], &team->t.t_argv[team->t.t_max_argc],
3654  sizeof(void *) * team->t.t_max_argc, "team_%d.t_argv",
3655  team->t.t_id );
3656  }
3657  }
3658 #else /* KMP_PERF_V106 == KMP_OFF */
3659  if ( realloc )
3660  __kmp_free( (void*) team -> t.t_argv );
3661  team -> t.t_max_argc = ( argc <= (KMP_MIN_MALLOC_ARGV_ENTRIES >> 1 )) ?
3662  KMP_MIN_MALLOC_ARGV_ENTRIES : 2 * argc;
3663  KA_TRACE( 100, ( "__kmp_alloc_argv_entries: team %d: dynamic allocate %d argv entries\n",
3664  team->t.t_id, team->t.t_max_argc ));
3665  team -> t.t_argv = __kmp_page_allocate( sizeof(void*) * team->t.t_max_argc );
3666  if ( __kmp_storage_map ) {
3667  __kmp_print_storage_map_gtid( -1, &team->t.t_argv[0], &team->t.t_argv[team->t.t_max_argc],
3668  sizeof(void *) * team->t.t_max_argc, "team_%d.t_argv", team->t.t_id );
3669  }
3670 #endif /* KMP_PERF_V106 */
3671 
3672  }
3673 }
3674 
3675 static void
3676 __kmp_allocate_team_arrays(kmp_team_t *team, int max_nth)
3677 {
3678  int i;
3679  int num_disp_buff = max_nth > 1 ? KMP_MAX_DISP_BUF : 2;
3680 #if KMP_USE_POOLED_ALLOC
3681  // AC: TODO: fix bug here: size of t_disp_buffer should not be multiplied by max_nth!
3682  char *ptr = __kmp_allocate(max_nth *
3683  ( sizeof(kmp_info_t*) + sizeof(dispatch_shared_info_t)*num_disp_buf
3684  + sizeof(kmp_disp_t) + sizeof(int)*6
3685 # if OMP_30_ENABLED
3686  //+ sizeof(int)
3687  + sizeof(kmp_r_sched_t)
3688  + sizeof(kmp_taskdata_t)
3689 # endif // OMP_30_ENABLED
3690  ) );
3691 
3692  team -> t.t_threads = (kmp_info_t**) ptr; ptr += sizeof(kmp_info_t*) * max_nth;
3693  team -> t.t_disp_buffer = (dispatch_shared_info_t*) ptr;
3694  ptr += sizeof(dispatch_shared_info_t) * num_disp_buff;
3695  team -> t.t_dispatch = (kmp_disp_t*) ptr; ptr += sizeof(kmp_disp_t) * max_nth;
3696  team -> t.t_set_nproc = (int*) ptr; ptr += sizeof(int) * max_nth;
3697  team -> t.t_set_dynamic = (int*) ptr; ptr += sizeof(int) * max_nth;
3698  team -> t.t_set_nested = (int*) ptr; ptr += sizeof(int) * max_nth;
3699  team -> t.t_set_blocktime = (int*) ptr; ptr += sizeof(int) * max_nth;
3700  team -> t.t_set_bt_intervals = (int*) ptr; ptr += sizeof(int) * max_nth;
3701  team -> t.t_set_bt_set = (int*) ptr;
3702 # if OMP_30_ENABLED
3703  ptr += sizeof(int) * max_nth;
3704  //team -> t.t_set_max_active_levels = (int*) ptr; ptr += sizeof(int) * max_nth;
3705  team -> t.t_set_sched = (kmp_r_sched_t*) ptr;
3706  ptr += sizeof(kmp_r_sched_t) * max_nth;
3707  team -> t.t_implicit_task_taskdata = (kmp_taskdata_t*) ptr;
3708  ptr += sizeof(kmp_taskdata_t) * max_nth;
3709 # endif // OMP_30_ENABLED
3710 #else
3711 
3712  team -> t.t_threads = (kmp_info_t**) __kmp_allocate( sizeof(kmp_info_t*) * max_nth );
3713  team -> t.t_disp_buffer = (dispatch_shared_info_t*)
3714  __kmp_allocate( sizeof(dispatch_shared_info_t) * num_disp_buff );
3715  team -> t.t_dispatch = (kmp_disp_t*) __kmp_allocate( sizeof(kmp_disp_t) * max_nth );
3716  #if OMP_30_ENABLED
3717  //team -> t.t_set_max_active_levels = (int*) __kmp_allocate( sizeof(int) * max_nth );
3718  //team -> t.t_set_sched = (kmp_r_sched_t*) __kmp_allocate( sizeof(kmp_r_sched_t) * max_nth );
3719  team -> t.t_implicit_task_taskdata = (kmp_taskdata_t*) __kmp_allocate( sizeof(kmp_taskdata_t) * max_nth );
3720  #else
3721  team -> t.t_set_nproc = (int*) __kmp_allocate( sizeof(int) * max_nth );
3722  team -> t.t_set_dynamic = (int*) __kmp_allocate( sizeof(int) * max_nth );
3723  team -> t.t_set_nested = (int*) __kmp_allocate( sizeof(int) * max_nth );
3724  team -> t.t_set_blocktime = (int*) __kmp_allocate( sizeof(int) * max_nth );
3725  team -> t.t_set_bt_intervals = (int*) __kmp_allocate( sizeof(int) * max_nth );
3726  team -> t.t_set_bt_set = (int*) __kmp_allocate( sizeof(int) * max_nth );
3727 # endif // OMP_30_ENABLED
3728 #endif
3729  team->t.t_max_nproc = max_nth;
3730 
3731  /* setup dispatch buffers */
3732  for(i = 0 ; i < num_disp_buff; ++i)
3733  team -> t.t_disp_buffer[i].buffer_index = i;
3734 }
3735 
3736 static void
3737 __kmp_free_team_arrays(kmp_team_t *team) {
3738  /* Note: this does not free the threads in t_threads (__kmp_free_threads) */
3739  int i;
3740  for ( i = 0; i < team->t.t_max_nproc; ++ i ) {
3741  if ( team->t.t_dispatch[ i ].th_disp_buffer != NULL ) {
3742  __kmp_free( team->t.t_dispatch[ i ].th_disp_buffer );
3743  team->t.t_dispatch[ i ].th_disp_buffer = NULL;
3744  }; // if
3745  }; // for
3746  __kmp_free(team->t.t_threads);
3747  #if !KMP_USE_POOLED_ALLOC
3748  __kmp_free(team->t.t_disp_buffer);
3749  __kmp_free(team->t.t_dispatch);
3750  #if OMP_30_ENABLED
3751  //__kmp_free(team->t.t_set_max_active_levels);
3752  //__kmp_free(team->t.t_set_sched);
3753  __kmp_free(team->t.t_implicit_task_taskdata);
3754  #else
3755  __kmp_free(team->t.t_set_nproc);
3756  __kmp_free(team->t.t_set_dynamic);
3757  __kmp_free(team->t.t_set_nested);
3758  __kmp_free(team->t.t_set_blocktime);
3759  __kmp_free(team->t.t_set_bt_intervals);
3760  __kmp_free(team->t.t_set_bt_set);
3761  # endif // OMP_30_ENABLED
3762  #endif
3763  team->t.t_threads = NULL;
3764  team->t.t_disp_buffer = NULL;
3765  team->t.t_dispatch = NULL;
3766 #if OMP_30_ENABLED
3767  //team->t.t_set_sched = 0;
3768  //team->t.t_set_max_active_levels = 0;
3769  team->t.t_implicit_task_taskdata = 0;
3770 #else
3771  team->t.t_set_nproc = 0;
3772  team->t.t_set_dynamic = 0;
3773  team->t.t_set_nested = 0;
3774  team->t.t_set_blocktime = 0;
3775  team->t.t_set_bt_intervals = 0;
3776  team->t.t_set_bt_set = 0;
3777 #endif // OMP_30_ENABLED
3778 }
3779 
3780 static void
3781 __kmp_reallocate_team_arrays(kmp_team_t *team, int max_nth) {
3782  kmp_info_t **oldThreads = team->t.t_threads;
3783 
3784  #if !KMP_USE_POOLED_ALLOC
3785  __kmp_free(team->t.t_disp_buffer);
3786  __kmp_free(team->t.t_dispatch);
3787  #if OMP_30_ENABLED
3788  //__kmp_free(team->t.t_set_max_active_levels);
3789  //__kmp_free(team->t.t_set_sched);
3790  __kmp_free(team->t.t_implicit_task_taskdata);
3791  #else
3792  __kmp_free(team->t.t_set_nproc);
3793  __kmp_free(team->t.t_set_dynamic);
3794  __kmp_free(team->t.t_set_nested);
3795  __kmp_free(team->t.t_set_blocktime);
3796  __kmp_free(team->t.t_set_bt_intervals);
3797  __kmp_free(team->t.t_set_bt_set);
3798  # endif // OMP_30_ENABLED
3799  #endif
3800  __kmp_allocate_team_arrays(team, max_nth);
3801 
3802  memcpy(team->t.t_threads, oldThreads, team->t.t_nproc * sizeof (kmp_info_t*));
3803 
3804  __kmp_free(oldThreads);
3805 }
3806 
3807 static kmp_internal_control_t
3808 __kmp_get_global_icvs( void ) {
3809 
3810 #if OMP_30_ENABLED
3811  kmp_r_sched_t r_sched = __kmp_get_schedule_global(); // get current state of scheduling globals
3812 #endif /* OMP_30_ENABLED */
3813 
3814 #if OMP_40_ENABLED
3815  KMP_DEBUG_ASSERT( __kmp_nested_proc_bind.used > 0 );
3816 #endif /* OMP_40_ENABLED */
3817 
3818  kmp_internal_control_t g_icvs = {
3819  0, //int serial_nesting_level; //corresponds to the value of the th_team_serialized field
3820  __kmp_dflt_nested, //int nested; //internal control for nested parallelism (per thread)
3821  __kmp_global.g.g_dynamic, //internal control for dynamic adjustment of threads (per thread)
3822  __kmp_dflt_team_nth,
3823  //int nproc; //internal control for # of threads for next parallel region (per thread)
3824  // (use a max ub on value if __kmp_parallel_initialize not called yet)
3825  __kmp_dflt_blocktime, //int blocktime; //internal control for blocktime
3826  __kmp_bt_intervals, //int bt_intervals; //internal control for blocktime intervals
3827  __kmp_env_blocktime, //int bt_set; //internal control for whether blocktime is explicitly set
3828 #if OMP_30_ENABLED
3829  __kmp_dflt_max_active_levels, //int max_active_levels; //internal control for max_active_levels
3830  r_sched, //kmp_r_sched_t sched; //internal control for runtime schedule {sched,chunk} pair
3831 #endif /* OMP_30_ENABLED */
3832 #if OMP_40_ENABLED
3833  __kmp_nested_proc_bind.bind_types[0],
3834 #endif /* OMP_40_ENABLED */
3835  NULL //struct kmp_internal_control *next;
3836  };
3837 
3838  return g_icvs;
3839 }
3840 
3841 static kmp_internal_control_t
3842 __kmp_get_x_global_icvs( const kmp_team_t *team ) {
3843 
3844  #if OMP_30_ENABLED
3845  kmp_internal_control_t gx_icvs;
3846  gx_icvs.serial_nesting_level = 0; // probably =team->t.t_serial like in save_inter_controls
3847  copy_icvs( & gx_icvs, & team->t.t_threads[0]->th.th_current_task->td_icvs );
3848  gx_icvs.next = NULL;
3849  #else
3850  kmp_internal_control_t gx_icvs =
3851  {
3852  0,
3853  team->t.t_set_nested[0],
3854  team->t.t_set_dynamic[0],
3855  team->t.t_set_nproc[0],
3856  team->t.t_set_blocktime[0],
3857  team->t.t_set_bt_intervals[0],
3858  team->t.t_set_bt_set[0],
3859  NULL //struct kmp_internal_control *next;
3860  };
3861  #endif // OMP_30_ENABLED
3862 
3863  return gx_icvs;
3864 }
3865 
3866 static void
3867 __kmp_initialize_root( kmp_root_t *root )
3868 {
3869  int f;
3870  kmp_team_t *root_team;
3871  kmp_team_t *hot_team;
3872  size_t disp_size, dispatch_size, bar_size;
3873  int hot_team_max_nth;
3874 #if OMP_30_ENABLED
3875  kmp_r_sched_t r_sched = __kmp_get_schedule_global(); // get current state of scheduling globals
3876  kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
3877 #endif // OMP_30_ENABLED
3878  KMP_DEBUG_ASSERT( root );
3879  KMP_ASSERT( ! root->r.r_begin );
3880 
3881  /* setup the root state structure */
3882  __kmp_init_lock( &root->r.r_begin_lock );
3883  root -> r.r_begin = FALSE;
3884  root -> r.r_active = FALSE;
3885  root -> r.r_in_parallel = 0;
3886  root -> r.r_blocktime = __kmp_dflt_blocktime;
3887  root -> r.r_nested = __kmp_dflt_nested;
3888 
3889  /* setup the root team for this task */
3890  /* allocate the root team structure */
3891  KF_TRACE( 10, ( "__kmp_initialize_root: before root_team\n" ) );
3892  root_team =
3893  __kmp_allocate_team(
3894  root,
3895  1, // new_nproc
3896  1, // max_nproc
3897 #if OMP_40_ENABLED
3898  __kmp_nested_proc_bind.bind_types[0],
3899 #endif
3900 #if OMP_30_ENABLED
3901  &r_icvs,
3902 #else
3903  __kmp_dflt_team_nth_ub, // num_treads
3904  __kmp_global.g.g_dynamic, // dynamic
3905  __kmp_dflt_nested, // nested
3906  __kmp_dflt_blocktime, // blocktime
3907  __kmp_bt_intervals, // bt_intervals
3908  __kmp_env_blocktime, // bt_set
3909 #endif // OMP_30_ENABLED
3910  0 // argc
3911  );
3912 
3913  KF_TRACE( 10, ( "__kmp_initialize_root: after root_team = %p\n", root_team ) );
3914 
3915  root -> r.r_root_team = root_team;
3916  root_team -> t.t_control_stack_top = NULL;
3917 
3918  /* initialize root team */
3919  root_team -> t.t_threads[0] = NULL;
3920  root_team -> t.t_nproc = 1;
3921  root_team -> t.t_serialized = 1;
3922 #if OMP_30_ENABLED
3923  // TODO???: root_team -> t.t_max_active_levels = __kmp_dflt_max_active_levels;
3924  root_team -> t.t_sched.r_sched_type = r_sched.r_sched_type;
3925  root_team -> t.t_sched.chunk = r_sched.chunk;
3926 #endif // OMP_30_ENABLED
3927  KA_TRACE( 20, ("__kmp_initialize_root: init root team %d arrived: join=%u, plain=%u\n",
3928  root_team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE ));
3929 
3930  /* setup the hot team for this task */
3931  /* allocate the hot team structure */
3932  KF_TRACE( 10, ( "__kmp_initialize_root: before hot_team\n" ) );
3933  hot_team =
3934  __kmp_allocate_team(
3935  root,
3936  1, // new_nproc
3937  __kmp_dflt_team_nth_ub * 2, // max_nproc
3938 #if OMP_40_ENABLED
3939  __kmp_nested_proc_bind.bind_types[0],
3940 #endif
3941 #if OMP_30_ENABLED
3942  &r_icvs,
3943 #else
3944  __kmp_dflt_team_nth_ub, // num_treads
3945  __kmp_global.g.g_dynamic, // dynamic
3946  __kmp_dflt_nested, // nested
3947  __kmp_dflt_blocktime, // blocktime
3948  __kmp_bt_intervals, // bt_intervals
3949  __kmp_env_blocktime, // bt_set
3950 #endif // OMP_30_ENABLED
3951  0 // argc
3952  );
3953  KF_TRACE( 10, ( "__kmp_initialize_root: after hot_team = %p\n", hot_team ) );
3954 
3955  root -> r.r_hot_team = hot_team;
3956  root_team -> t.t_control_stack_top = NULL;
3957 
3958  /* first-time initialization */
3959  hot_team -> t.t_parent = root_team;
3960 
3961  /* initialize hot team */
3962  hot_team_max_nth = hot_team->t.t_max_nproc;
3963  for ( f = 0; f < hot_team_max_nth; ++ f ) {
3964  hot_team -> t.t_threads[ f ] = NULL;
3965  }; // for
3966  hot_team -> t.t_nproc = 1;
3967 #if OMP_30_ENABLED
3968  // TODO???: hot_team -> t.t_max_active_levels = __kmp_dflt_max_active_levels;
3969  hot_team -> t.t_sched.r_sched_type = r_sched.r_sched_type;
3970  hot_team -> t.t_sched.chunk = r_sched.chunk;
3971 #endif // OMP_30_ENABLED
3972 #if KMP_MIC
3973  hot_team -> t.t_size_changed = 0;
3974 #endif
3975 
3976 }
3977 
3978 #ifdef KMP_DEBUG
3979 
3980 
3981 typedef struct kmp_team_list_item {
3982  kmp_team_p const * entry;
3983  struct kmp_team_list_item * next;
3984 } kmp_team_list_item_t;
3985 typedef kmp_team_list_item_t * kmp_team_list_t;
3986 
3987 
3988 static void
3989 __kmp_print_structure_team_accum( // Add team to list of teams.
3990  kmp_team_list_t list, // List of teams.
3991  kmp_team_p const * team // Team to add.
3992 ) {
3993 
3994  // List must terminate with item where both entry and next are NULL.
3995  // Team is added to the list only once.
3996  // List is sorted in ascending order by team id.
3997  // Team id is *not* a key.
3998 
3999  kmp_team_list_t l;
4000 
4001  KMP_DEBUG_ASSERT( list != NULL );
4002  if ( team == NULL ) {
4003  return;
4004  }; // if
4005 
4006  __kmp_print_structure_team_accum( list, team->t.t_parent );
4007  __kmp_print_structure_team_accum( list, team->t.t_next_pool );
4008 
4009  // Search list for the team.
4010  l = list;
4011  while ( l->next != NULL && l->entry != team ) {
4012  l = l->next;
4013  }; // while
4014  if ( l->next != NULL ) {
4015  return; // Team has been added before, exit.
4016  }; // if
4017 
4018  // Team is not found. Search list again for insertion point.
4019  l = list;
4020  while ( l->next != NULL && l->entry->t.t_id <= team->t.t_id ) {
4021  l = l->next;
4022  }; // while
4023 
4024  // Insert team.
4025  {
4026  kmp_team_list_item_t * item =
4027  (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC( sizeof( kmp_team_list_item_t ) );
4028  * item = * l;
4029  l->entry = team;
4030  l->next = item;
4031  }
4032 
4033 }
4034 
4035 static void
4036 __kmp_print_structure_team(
4037  char const * title,
4038  kmp_team_p const * team
4039 
4040 ) {
4041  __kmp_printf( "%s", title );
4042  if ( team != NULL ) {
4043  __kmp_printf( "%2x %p\n", team->t.t_id, team );
4044  } else {
4045  __kmp_printf( " - (nil)\n" );
4046  }; // if
4047 }
4048 
4049 static void
4050 __kmp_print_structure_thread(
4051  char const * title,
4052  kmp_info_p const * thread
4053 
4054 ) {
4055  __kmp_printf( "%s", title );
4056  if ( thread != NULL ) {
4057  __kmp_printf( "%2d %p\n", thread->th.th_info.ds.ds_gtid, thread );
4058  } else {
4059  __kmp_printf( " - (nil)\n" );
4060  }; // if
4061 }
4062 
4063 static void
4064 __kmp_print_structure(
4065  void
4066 ) {
4067 
4068  kmp_team_list_t list;
4069 
4070  // Initialize list of teams.
4071  list = (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC( sizeof( kmp_team_list_item_t ) );
4072  list->entry = NULL;
4073  list->next = NULL;
4074 
4075  __kmp_printf( "\n------------------------------\nGlobal Thread Table\n------------------------------\n" );
4076  {
4077  int gtid;
4078  for ( gtid = 0; gtid < __kmp_threads_capacity; ++ gtid ) {
4079  __kmp_printf( "%2d", gtid );
4080  if ( __kmp_threads != NULL ) {
4081  __kmp_printf( " %p", __kmp_threads[ gtid ] );
4082  }; // if
4083  if ( __kmp_root != NULL ) {
4084  __kmp_printf( " %p", __kmp_root[ gtid ] );
4085  }; // if
4086  __kmp_printf( "\n" );
4087  }; // for gtid
4088  }
4089 
4090  // Print out __kmp_threads array.
4091  __kmp_printf( "\n------------------------------\nThreads\n------------------------------\n" );
4092  if ( __kmp_threads != NULL ) {
4093  int gtid;
4094  for ( gtid = 0; gtid < __kmp_threads_capacity; ++ gtid ) {
4095  kmp_info_t const * thread = __kmp_threads[ gtid ];
4096  if ( thread != NULL ) {
4097  __kmp_printf( "GTID %2d %p:\n", gtid, thread );
4098  __kmp_printf( " Our Root: %p\n", thread->th.th_root );
4099  __kmp_print_structure_team( " Our Team: ", thread->th.th_team );
4100  __kmp_print_structure_team( " Serial Team: ", thread->th.th_serial_team );
4101  __kmp_printf( " Threads: %2d\n", thread->th.th_team_nproc );
4102  __kmp_print_structure_thread( " Master: ", thread->th.th_team_master );
4103  __kmp_printf( " Serialized?: %2d\n", thread->th.th_team_serialized );
4104  __kmp_printf( " Set NProc: %2d\n", thread->th.th_set_nproc );
4105 #if OMP_40_ENABLED
4106  __kmp_printf( " Set Proc Bind: %2d\n", thread->th.th_set_proc_bind );
4107 #endif
4108  __kmp_print_structure_thread( " Next in pool: ", thread->th.th_next_pool );
4109  __kmp_printf( "\n" );
4110  __kmp_print_structure_team_accum( list, thread->th.th_team );
4111  __kmp_print_structure_team_accum( list, thread->th.th_serial_team );
4112  }; // if
4113  }; // for gtid
4114  } else {
4115  __kmp_printf( "Threads array is not allocated.\n" );
4116  }; // if
4117 
4118  // Print out __kmp_root array.
4119  __kmp_printf( "\n------------------------------\nUbers\n------------------------------\n" );
4120  if ( __kmp_root != NULL ) {
4121  int gtid;
4122  for ( gtid = 0; gtid < __kmp_threads_capacity; ++ gtid ) {
4123  kmp_root_t const * root = __kmp_root[ gtid ];
4124  if ( root != NULL ) {
4125  __kmp_printf( "GTID %2d %p:\n", gtid, root );
4126  __kmp_print_structure_team( " Root Team: ", root->r.r_root_team );
4127  __kmp_print_structure_team( " Hot Team: ", root->r.r_hot_team );
4128  __kmp_print_structure_thread( " Uber Thread: ", root->r.r_uber_thread );
4129  __kmp_printf( " Active?: %2d\n", root->r.r_active );
4130  __kmp_printf( " Nested?: %2d\n", root->r.r_nested );
4131  __kmp_printf( " In Parallel: %2d\n", root->r.r_in_parallel );
4132  __kmp_printf( "\n" );
4133  __kmp_print_structure_team_accum( list, root->r.r_root_team );
4134  __kmp_print_structure_team_accum( list, root->r.r_hot_team );
4135  }; // if
4136  }; // for gtid
4137  } else {
4138  __kmp_printf( "Ubers array is not allocated.\n" );
4139  }; // if
4140 
4141  __kmp_printf( "\n------------------------------\nTeams\n------------------------------\n" );
4142  while ( list->next != NULL ) {
4143  kmp_team_p const * team = list->entry;
4144  int i;
4145  __kmp_printf( "Team %2x %p:\n", team->t.t_id, team );
4146  __kmp_print_structure_team( " Parent Team: ", team->t.t_parent );
4147  __kmp_printf( " Master TID: %2d\n", team->t.t_master_tid );
4148  __kmp_printf( " Max threads: %2d\n", team->t.t_max_nproc );
4149  __kmp_printf( " Levels of serial: %2d\n", team->t.t_serialized );
4150  __kmp_printf( " Number threads: %2d\n", team->t.t_nproc );
4151  for ( i = 0; i < team->t.t_nproc; ++ i ) {
4152  __kmp_printf( " Thread %2d: ", i );
4153  __kmp_print_structure_thread( "", team->t.t_threads[ i ] );
4154  }; // for i
4155  __kmp_print_structure_team( " Next in pool: ", team->t.t_next_pool );
4156  __kmp_printf( "\n" );
4157  list = list->next;
4158  }; // while
4159 
4160  // Print out __kmp_thread_pool and __kmp_team_pool.
4161  __kmp_printf( "\n------------------------------\nPools\n------------------------------\n" );
4162  __kmp_print_structure_thread( "Thread pool: ", (kmp_info_t *)__kmp_thread_pool );
4163  __kmp_print_structure_team( "Team pool: ", (kmp_team_t *)__kmp_team_pool );
4164  __kmp_printf( "\n" );
4165 
4166  // Free team list.
4167  while ( list != NULL ) {
4168  kmp_team_list_item_t * item = list;
4169  list = list->next;
4170  KMP_INTERNAL_FREE( item );
4171  }; // while
4172 
4173 }
4174 
4175 #endif
4176 
4177 
4178 //---------------------------------------------------------------------------
4179 // Stuff for per-thread fast random number generator
4180 // Table of primes
4181 
4182 static const unsigned __kmp_primes[] = {
4183  0x9e3779b1, 0xffe6cc59, 0x2109f6dd, 0x43977ab5,
4184  0xba5703f5, 0xb495a877, 0xe1626741, 0x79695e6b,
4185  0xbc98c09f, 0xd5bee2b3, 0x287488f9, 0x3af18231,
4186  0x9677cd4d, 0xbe3a6929, 0xadc6a877, 0xdcf0674b,
4187  0xbe4d6fe9, 0x5f15e201, 0x99afc3fd, 0xf3f16801,
4188  0xe222cfff, 0x24ba5fdb, 0x0620452d, 0x79f149e3,
4189  0xc8b93f49, 0x972702cd, 0xb07dd827, 0x6c97d5ed,
4190  0x085a3d61, 0x46eb5ea7, 0x3d9910ed, 0x2e687b5b,
4191  0x29609227, 0x6eb081f1, 0x0954c4e1, 0x9d114db9,
4192  0x542acfa9, 0xb3e6bd7b, 0x0742d917, 0xe9f3ffa7,
4193  0x54581edb, 0xf2480f45, 0x0bb9288f, 0xef1affc7,
4194  0x85fa0ca7, 0x3ccc14db, 0xe6baf34b, 0x343377f7,
4195  0x5ca19031, 0xe6d9293b, 0xf0a9f391, 0x5d2e980b,
4196  0xfc411073, 0xc3749363, 0xb892d829, 0x3549366b,
4197  0x629750ad, 0xb98294e5, 0x892d9483, 0xc235baf3,
4198  0x3d2402a3, 0x6bdef3c9, 0xbec333cd, 0x40c9520f
4199 };
4200 
4201 //---------------------------------------------------------------------------
4202 // __kmp_get_random: Get a random number using a linear congruential method.
4203 
4204 unsigned short
4205 __kmp_get_random( kmp_info_t * thread )
4206 {
4207  unsigned x = thread -> th.th_x;
4208  unsigned short r = x>>16;
4209 
4210  thread -> th.th_x = x*thread->th.th_a+1;
4211 
4212  KA_TRACE(30, ("__kmp_get_random: THREAD: %d, RETURN: %u\n",
4213  thread->th.th_info.ds.ds_tid, r) );
4214 
4215  return r;
4216 }
4217 //--------------------------------------------------------
4218 // __kmp_init_random: Initialize a random number generator
4219 
4220 void
4221 __kmp_init_random( kmp_info_t * thread )
4222 {
4223  unsigned seed = thread->th.th_info.ds.ds_tid;
4224 
4225  thread -> th.th_a = __kmp_primes[seed%(sizeof(__kmp_primes)/sizeof(__kmp_primes[0]))];
4226  thread -> th.th_x = (seed+1)*thread->th.th_a+1;
4227  KA_TRACE(30, ("__kmp_init_random: THREAD: %u; A: %u\n", seed, thread -> th.th_a) );
4228 }
4229 
4230 
4231 #if KMP_OS_WINDOWS
4232 /* reclaim array entries for root threads that are already dead, returns number reclaimed */
4233 static int
4234 __kmp_reclaim_dead_roots(void) {
4235  int i, r = 0;
4236 
4237  for(i = 0; i < __kmp_threads_capacity; ++i) {
4238  if( KMP_UBER_GTID( i ) &&
4239  !__kmp_still_running((kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[i])) &&
4240  !__kmp_root[i]->r.r_active ) { // AC: reclaim only roots died in non-active state
4241  r += __kmp_unregister_root_other_thread(i);
4242  }
4243  }
4244  return r;
4245 }
4246 #endif
4247 
4248 /*
4249  This function attempts to create free entries in __kmp_threads and __kmp_root, and returns the number of
4250  free entries generated.
4251 
4252  For Windows* OS static library, the first mechanism used is to reclaim array entries for root threads that are
4253  already dead.
4254 
4255  On all platforms, expansion is attempted on the arrays __kmp_threads_ and __kmp_root, with appropriate
4256  update to __kmp_threads_capacity. Array capacity is increased by doubling with clipping to
4257  __kmp_tp_capacity, if threadprivate cache array has been created.
4258  Synchronization with __kmpc_threadprivate_cached is done using __kmp_tp_cached_lock.
4259 
4260  After any dead root reclamation, if the clipping value allows array expansion to result in the generation
4261  of a total of nWish free slots, the function does that expansion. If not, but the clipping value allows
4262  array expansion to result in the generation of a total of nNeed free slots, the function does that expansion.
4263  Otherwise, nothing is done beyond the possible initial root thread reclamation. However, if nNeed is zero,
4264  a best-effort attempt is made to fulfil nWish as far as possible, i.e. the function will attempt to create
4265  as many free slots as possible up to nWish.
4266 
4267  If any argument is negative, the behavior is undefined.
4268 */
4269 static int
4270 __kmp_expand_threads(int nWish, int nNeed) {
4271  int added = 0;
4272  int old_tp_cached;
4273  int __kmp_actual_max_nth;
4274 
4275  if(nNeed > nWish) /* normalize the arguments */
4276  nWish = nNeed;
4277 #if KMP_OS_WINDOWS && !defined GUIDEDLL_EXPORTS
4278 /* only for Windows static library */
4279  /* reclaim array entries for root threads that are already dead */
4280  added = __kmp_reclaim_dead_roots();
4281 
4282  if(nNeed) {
4283  nNeed -= added;
4284  if(nNeed < 0)
4285  nNeed = 0;
4286  }
4287  if(nWish) {
4288  nWish -= added;
4289  if(nWish < 0)
4290  nWish = 0;
4291  }
4292 #endif
4293  if(nWish <= 0)
4294  return added;
4295 
4296  while(1) {
4297  int nTarget;
4298  int minimumRequiredCapacity;
4299  int newCapacity;
4300  kmp_info_t **newThreads;
4301  kmp_root_t **newRoot;
4302 
4303  //
4304  // Note that __kmp_threads_capacity is not bounded by __kmp_max_nth.
4305  // If __kmp_max_nth is set to some value less than __kmp_sys_max_nth
4306  // by the user via OMP_THREAD_LIMIT, then __kmp_threads_capacity may
4307  // become > __kmp_max_nth in one of two ways:
4308  //
4309  // 1) The initialization thread (gtid = 0) exits. __kmp_threads[0]
4310  // may not be resused by another thread, so we may need to increase
4311  // __kmp_threads_capacity to __kmp_max_threads + 1.
4312  //
4313  // 2) New foreign root(s) are encountered. We always register new
4314  // foreign roots. This may cause a smaller # of threads to be
4315  // allocated at subsequent parallel regions, but the worker threads
4316  // hang around (and eventually go to sleep) and need slots in the
4317  // __kmp_threads[] array.
4318  //
4319  // Anyway, that is the reason for moving the check to see if
4320  // __kmp_max_threads was exceeded into __kmp_reseerve_threads()
4321  // instead of having it performed here. -BB
4322  //
4323  old_tp_cached = __kmp_tp_cached;
4324  __kmp_actual_max_nth = old_tp_cached ? __kmp_tp_capacity : __kmp_sys_max_nth;
4325  KMP_DEBUG_ASSERT(__kmp_actual_max_nth >= __kmp_threads_capacity);
4326 
4327  /* compute expansion headroom to check if we can expand and whether to aim for nWish or nNeed */
4328  nTarget = nWish;
4329  if(__kmp_actual_max_nth - __kmp_threads_capacity < nTarget) {
4330  /* can't fulfil nWish, so try nNeed */
4331  if(nNeed) {
4332  nTarget = nNeed;
4333  if(__kmp_actual_max_nth - __kmp_threads_capacity < nTarget) {
4334  /* possible expansion too small -- give up */
4335  break;
4336  }
4337  } else {
4338  /* best-effort */
4339  nTarget = __kmp_actual_max_nth - __kmp_threads_capacity;
4340  if(!nTarget) {
4341  /* can expand at all -- give up */
4342  break;
4343  }
4344  }
4345  }
4346  minimumRequiredCapacity = __kmp_threads_capacity + nTarget;
4347 
4348  newCapacity = __kmp_threads_capacity;
4349  do{
4350  newCapacity =
4351  newCapacity <= (__kmp_actual_max_nth >> 1) ?
4352  (newCapacity << 1) :
4353  __kmp_actual_max_nth;
4354  } while(newCapacity < minimumRequiredCapacity);
4355  newThreads = (kmp_info_t**) __kmp_allocate((sizeof(kmp_info_t*) + sizeof(kmp_root_t*)) * newCapacity + CACHE_LINE);
4356  newRoot = (kmp_root_t**) ((char*)newThreads + sizeof(kmp_info_t*) * newCapacity );
4357  memcpy(newThreads, __kmp_threads, __kmp_threads_capacity * sizeof(kmp_info_t*));
4358  memcpy(newRoot, __kmp_root, __kmp_threads_capacity * sizeof(kmp_root_t*));
4359  memset(newThreads + __kmp_threads_capacity, 0,
4360  (newCapacity - __kmp_threads_capacity) * sizeof(kmp_info_t*));
4361  memset(newRoot + __kmp_threads_capacity, 0,
4362  (newCapacity - __kmp_threads_capacity) * sizeof(kmp_root_t*));
4363 
4364  if(!old_tp_cached && __kmp_tp_cached && newCapacity > __kmp_tp_capacity) {
4365  /* __kmp_tp_cached has changed, i.e. __kmpc_threadprivate_cached has allocated a threadprivate cache
4366  while we were allocating the expanded array, and our new capacity is larger than the threadprivate
4367  cache capacity, so we should deallocate the expanded arrays and try again. This is the first check
4368  of a double-check pair.
4369  */
4370  __kmp_free(newThreads);
4371  continue; /* start over and try again */
4372  }
4373  __kmp_acquire_bootstrap_lock(&__kmp_tp_cached_lock);
4374  if(!old_tp_cached && __kmp_tp_cached && newCapacity > __kmp_tp_capacity) {
4375  /* Same check as above, but this time with the lock so we can be sure if we can succeed. */
4376  __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock);
4377  __kmp_free(newThreads);
4378  continue; /* start over and try again */
4379  } else {
4380  /* success */
4381  // __kmp_free( __kmp_threads ); // ATT: It leads to crash. Need to be investigated.
4382  //
4383  *(kmp_info_t**volatile*)&__kmp_threads = newThreads;
4384  *(kmp_root_t**volatile*)&__kmp_root = newRoot;
4385  added += newCapacity - __kmp_threads_capacity;
4386  *(volatile int*)&__kmp_threads_capacity = newCapacity;
4387  __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock);
4388  break; /* succeded, so we can exit the loop */
4389  }
4390  }
4391  return added;
4392 }
4393 
4394 /* register the current thread as a root thread and obtain our gtid */
4395 /* we must have the __kmp_initz_lock held at this point */
4396 /* Argument TRUE only if are the thread that calls from __kmp_do_serial_initialize() */
4397 int
4398 __kmp_register_root( int initial_thread )
4399 {
4400  kmp_info_t *root_thread;
4401  kmp_root_t *root;
4402  int gtid;
4403  int capacity;
4404  __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
4405  KA_TRACE( 20, ("__kmp_register_root: entered\n"));
4406  KMP_MB();
4407 
4408 
4409  /*
4410  2007-03-02:
4411 
4412  If initial thread did not invoke OpenMP RTL yet, and this thread is not an initial one,
4413  "__kmp_all_nth >= __kmp_threads_capacity" condition does not work as expected -- it may
4414  return false (that means there is at least one empty slot in __kmp_threads array), but it
4415  is possible the only free slot is #0, which is reserved for initial thread and so cannot be
4416  used for this one. Following code workarounds this bug.
4417 
4418  However, right solution seems to be not reserving slot #0 for initial thread because:
4419  (1) there is no magic in slot #0,
4420  (2) we cannot detect initial thread reliably (the first thread which does serial
4421  initialization may be not a real initial thread).
4422  */
4423  capacity = __kmp_threads_capacity;
4424  if ( ! initial_thread && TCR_PTR(__kmp_threads[0]) == NULL ) {
4425  -- capacity;
4426  }; // if
4427 
4428  /* see if there are too many threads */
4429  if ( __kmp_all_nth >= capacity && !__kmp_expand_threads( 1, 1 ) ) {
4430  if ( __kmp_tp_cached ) {
4431  __kmp_msg(
4432  kmp_ms_fatal,
4433  KMP_MSG( CantRegisterNewThread ),
4434  KMP_HNT( Set_ALL_THREADPRIVATE, __kmp_tp_capacity ),
4435  KMP_HNT( PossibleSystemLimitOnThreads ),
4436  __kmp_msg_null
4437  );
4438  }
4439  else {
4440  __kmp_msg(
4441  kmp_ms_fatal,
4442  KMP_MSG( CantRegisterNewThread ),
4443  KMP_HNT( SystemLimitOnThreads ),
4444  __kmp_msg_null
4445  );
4446  }
4447  }; // if
4448 
4449  /* find an available thread slot */
4450  /* Don't reassign the zero slot since we need that to only be used by initial
4451  thread */
4452  for( gtid=(initial_thread ? 0 : 1) ; TCR_PTR(__kmp_threads[gtid]) != NULL ; gtid++ );
4453  KA_TRACE( 1, ("__kmp_register_root: found slot in threads array: T#%d\n", gtid ));
4454  KMP_ASSERT( gtid < __kmp_threads_capacity );
4455 
4456  /* update global accounting */
4457  __kmp_all_nth ++;
4458  TCW_4(__kmp_nth, __kmp_nth + 1);
4459 
4460  //
4461  // if __kmp_adjust_gtid_mode is set, then we use method #1 (sp search)
4462  // for low numbers of procs, and method #2 (keyed API call) for higher
4463  // numbers of procs.
4464  //
4465  if ( __kmp_adjust_gtid_mode ) {
4466  if ( __kmp_all_nth >= __kmp_tls_gtid_min ) {
4467  if ( TCR_4(__kmp_gtid_mode) != 2) {
4468  TCW_4(__kmp_gtid_mode, 2);
4469  }
4470  }
4471  else {
4472  if (TCR_4(__kmp_gtid_mode) != 1 ) {
4473  TCW_4(__kmp_gtid_mode, 1);
4474  }
4475  }
4476  }
4477 
4478 #ifdef KMP_ADJUST_BLOCKTIME
4479  /* Adjust blocktime to zero if necessary */
4480  /* Middle initialization might not have ocurred yet */
4481  if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
4482  if ( __kmp_nth > __kmp_avail_proc ) {
4483  __kmp_zero_bt = TRUE;
4484  }
4485  }
4486 #endif /* KMP_ADJUST_BLOCKTIME */
4487 
4488  /* setup this new hierarchy */
4489  if( ! ( root = __kmp_root[gtid] )) {
4490  root = __kmp_root[gtid] = (kmp_root_t*) __kmp_allocate( sizeof(kmp_root_t) );
4491  KMP_DEBUG_ASSERT( ! root->r.r_root_team );
4492  }
4493 
4494  __kmp_initialize_root( root );
4495 
4496  /* setup new root thread structure */
4497  if( root -> r.r_uber_thread ) {
4498  root_thread = root -> r.r_uber_thread;
4499  } else {
4500  root_thread = (kmp_info_t*) __kmp_allocate( sizeof(kmp_info_t) );
4501  if ( __kmp_storage_map ) {
4502  __kmp_print_thread_storage_map( root_thread, gtid );
4503  }
4504  root_thread -> th.th_info .ds.ds_gtid = gtid;
4505  root_thread -> th.th_root = root;
4506  if( __kmp_env_consistency_check ) {
4507  root_thread -> th.th_cons = __kmp_allocate_cons_stack( gtid );
4508  }
4509  #if USE_FAST_MEMORY
4510  __kmp_initialize_fast_memory( root_thread );
4511  #endif /* USE_FAST_MEMORY */
4512 
4513  #if KMP_USE_BGET
4514  KMP_DEBUG_ASSERT( root_thread -> th.th_local.bget_data == NULL );
4515  __kmp_initialize_bget( root_thread );
4516  #endif
4517  __kmp_init_random( root_thread ); // Initialize random number generator
4518  }
4519 
4520  /* setup the serial team held in reserve by the root thread */
4521  if( ! root_thread -> th.th_serial_team ) {
4522  #if OMP_30_ENABLED
4523  kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
4524  #endif // OMP_30_ENABLED
4525  KF_TRACE( 10, ( "__kmp_register_root: before serial_team\n" ) );
4526  root_thread -> th.th_serial_team = __kmp_allocate_team( root, 1, 1,
4527 #if OMP_40_ENABLED
4528  proc_bind_default,
4529 #endif
4530 #if OMP_30_ENABLED
4531  &r_icvs,
4532 #else
4533  __kmp_dflt_team_nth_ub,
4534  __kmp_global.g.g_dynamic,
4535  __kmp_dflt_nested,
4536  __kmp_dflt_blocktime,
4537  __kmp_bt_intervals,
4538  __kmp_env_blocktime,
4539 #endif // OMP_30_ENABLED
4540  0 );
4541  }
4542  KMP_ASSERT( root_thread -> th.th_serial_team );
4543  KF_TRACE( 10, ( "__kmp_register_root: after serial_team = %p\n",
4544  root_thread -> th.th_serial_team ) );
4545 
4546  /* drop root_thread into place */
4547  TCW_SYNC_PTR(__kmp_threads[gtid], root_thread);
4548 
4549  root -> r.r_root_team -> t.t_threads[0] = root_thread;
4550  root -> r.r_hot_team -> t.t_threads[0] = root_thread;
4551  root_thread -> th.th_serial_team -> t.t_threads[0] = root_thread;
4552  root_thread -> th.th_serial_team -> t.t_serialized = 0; // AC: the team created in reserve, not for execution (it is unused for now).
4553  root -> r.r_uber_thread = root_thread;
4554 
4555  /* initialize the thread, get it ready to go */
4556  __kmp_initialize_info( root_thread, root->r.r_root_team, 0, gtid );
4557 
4558  /* prepare the master thread for get_gtid() */
4559  __kmp_gtid_set_specific( gtid );
4560  #ifdef KMP_TDATA_GTID
4561  __kmp_gtid = gtid;
4562  #endif
4563  __kmp_create_worker( gtid, root_thread, __kmp_stksize );
4564  KMP_DEBUG_ASSERT( __kmp_gtid_get_specific() == gtid );
4565  TCW_4(__kmp_init_gtid, TRUE);
4566 
4567  KA_TRACE( 20, ("__kmp_register_root: T#%d init T#%d(%d:%d) arrived: join=%u, plain=%u\n",
4568  gtid, __kmp_gtid_from_tid( 0, root->r.r_hot_team ),
4569  root -> r.r_hot_team -> t.t_id, 0, KMP_INIT_BARRIER_STATE,
4570  KMP_INIT_BARRIER_STATE ) );
4571  { // Initialize barrier data.
4572  int b;
4573  for ( b = 0; b < bs_last_barrier; ++ b ) {
4574  root_thread->th.th_bar[ b ].bb.b_arrived = KMP_INIT_BARRIER_STATE;
4575  }; // for
4576  }
4577  KMP_DEBUG_ASSERT( root->r.r_hot_team->t.t_bar[ bs_forkjoin_barrier ].b_arrived == KMP_INIT_BARRIER_STATE );
4578 
4579 
4580 #if KMP_OS_WINDOWS || KMP_OS_LINUX
4581  if ( TCR_4(__kmp_init_middle) ) {
4582  __kmp_affinity_set_init_mask( gtid, TRUE );
4583  }
4584 #endif /* KMP_OS_WINDOWS || KMP_OS_LINUX */
4585 
4586  __kmp_root_counter ++;
4587 
4588  KMP_MB();
4589  __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
4590 
4591  return gtid;
4592 }
4593 
4594 /* Resets a root thread and clear its root and hot teams.
4595  Returns the number of __kmp_threads entries directly and indirectly freed.
4596 */
4597 static int
4598 __kmp_reset_root(int gtid, kmp_root_t *root)
4599 {
4600  kmp_team_t * root_team = root->r.r_root_team;
4601  kmp_team_t * hot_team = root->r.r_hot_team;
4602  int n = hot_team->t.t_nproc;
4603  int i;
4604 
4605  KMP_DEBUG_ASSERT( ! root->r.r_active );
4606 
4607  root->r.r_root_team = NULL;
4608  root->r.r_hot_team = NULL;
4609  // __kmp_free_team() does not free hot teams, so we have to clear r_hot_team before call
4610  // to __kmp_free_team().
4611  __kmp_free_team( root, root_team );
4612  __kmp_free_team( root, hot_team );
4613 
4614 #if OMP_30_ENABLED
4615  //
4616  // Before we can reap the thread, we need to make certain that all
4617  // other threads in the teams that had this root as ancestor have stopped trying to steal tasks.
4618  //
4619  if ( __kmp_tasking_mode != tskm_immediate_exec ) {
4620  __kmp_wait_to_unref_task_teams();
4621  }
4622 #endif /* OMP_30_ENABLED */
4623 
4624  #if KMP_OS_WINDOWS
4625  /* Close Handle of root duplicated in __kmp_create_worker (tr #62919) */
4626  KA_TRACE( 10, ("__kmp_reset_root: free handle, th = %p, handle = %" KMP_UINTPTR_SPEC "\n",
4627  (LPVOID)&(root->r.r_uber_thread->th),
4628  root->r.r_uber_thread->th.th_info.ds.ds_thread ) );
4629  __kmp_free_handle( root->r.r_uber_thread->th.th_info.ds.ds_thread );
4630  #endif /* KMP_OS_WINDOWS */
4631 
4632  TCW_4(__kmp_nth, __kmp_nth - 1); // __kmp_reap_thread will decrement __kmp_all_nth.
4633  __kmp_reap_thread( root->r.r_uber_thread, 1 );
4634 
4635  // We canot put root thread to __kmp_thread_pool, so we have to reap it istead of freeing.
4636  root->r.r_uber_thread = NULL;
4637  /* mark root as no longer in use */
4638  root -> r.r_begin = FALSE;
4639 
4640  return n;
4641 }
4642 
4643 void
4644 __kmp_unregister_root_current_thread( int gtid )
4645 {
4646  kmp_root_t *root = __kmp_root[gtid];
4647 
4648  KA_TRACE( 1, ("__kmp_unregister_root_current_thread: enter T#%d\n", gtid ));
4649  KMP_DEBUG_ASSERT( __kmp_threads && __kmp_threads[gtid] );
4650  KMP_ASSERT( KMP_UBER_GTID( gtid ));
4651  KMP_ASSERT( root == __kmp_threads[gtid]->th.th_root );
4652  KMP_ASSERT( root->r.r_active == FALSE );
4653 
4654  /* this lock should be ok, since unregister_root_current_thread is never called during
4655  * and abort, only during a normal close. furthermore, if you have the
4656  * forkjoin lock, you should never try to get the initz lock */
4657 
4658  __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
4659 
4660  KMP_MB();
4661 
4662  __kmp_reset_root(gtid, root);
4663 
4664  /* free up this thread slot */
4665  __kmp_gtid_set_specific( KMP_GTID_DNE );
4666 #ifdef KMP_TDATA_GTID
4667  __kmp_gtid = KMP_GTID_DNE;
4668 #endif
4669 
4670  KMP_MB();
4671  KC_TRACE( 10, ("__kmp_unregister_root_current_thread: T#%d unregistered\n", gtid ));
4672 
4673  __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
4674 }
4675 
4676 /* __kmp_forkjoin_lock must be already held
4677  Unregisters a root thread that is not the current thread. Returns the number of
4678  __kmp_threads entries freed as a result.
4679  */
4680 static int
4681 __kmp_unregister_root_other_thread( int gtid )
4682 {
4683  kmp_root_t *root = __kmp_root[gtid];
4684  int r;
4685 
4686  KA_TRACE( 1, ("__kmp_unregister_root_other_thread: enter T#%d\n", gtid ));
4687  KMP_DEBUG_ASSERT( __kmp_threads && __kmp_threads[gtid] );
4688  KMP_ASSERT( KMP_UBER_GTID( gtid ));
4689  KMP_ASSERT( root == __kmp_threads[gtid]->th.th_root );
4690  KMP_ASSERT( root->r.r_active == FALSE );
4691 
4692  r = __kmp_reset_root(gtid, root);
4693  KC_TRACE( 10, ("__kmp_unregister_root_other_thread: T#%d unregistered\n", gtid ));
4694  return r;
4695 }
4696 
4697 #if OMP_30_ENABLED
4698 
4699 #if KMP_DEBUG
4700 void __kmp_task_info() {
4701 
4702  kmp_int32 gtid = __kmp_entry_gtid();
4703  kmp_int32 tid = __kmp_tid_from_gtid( gtid );
4704  kmp_info_t *this_thr = __kmp_threads[ gtid ];
4705  kmp_team_t *steam = this_thr -> th.th_serial_team;
4706  kmp_team_t *team = this_thr -> th.th_team;
4707 
4708  __kmp_printf( "__kmp_task_info: gtid=%d tid=%d t_thread=%p team=%p curtask=%p ptask=%p\n",
4709  gtid, tid, this_thr, team, this_thr->th.th_current_task, team->t.t_implicit_task_taskdata[tid].td_parent );
4710 }
4711 #endif // KMP_DEBUG
4712 
4713 #endif // OMP_30_ENABLED
4714 
4715 /* TODO optimize with one big memclr, take out what isn't needed,
4716  * split responsility to workers as much as possible, and delay
4717  * initialization of features as much as possible */
4718 static void
4719 __kmp_initialize_info( kmp_info_t *this_thr, kmp_team_t *team, int tid, int gtid )
4720 {
4721  /* this_thr->th.th_info.ds.ds_gtid is setup in kmp_allocate_thread/create_worker
4722  * this_thr->th.th_serial_team is setup in __kmp_allocate_thread */
4723 
4724  KMP_DEBUG_ASSERT( this_thr != NULL );
4725  KMP_DEBUG_ASSERT( this_thr -> th.th_serial_team );
4726  KMP_DEBUG_ASSERT( team );
4727  KMP_DEBUG_ASSERT( team -> t.t_threads );
4728  KMP_DEBUG_ASSERT( team -> t.t_dispatch );
4729  KMP_DEBUG_ASSERT( team -> t.t_threads[0] );
4730  KMP_DEBUG_ASSERT( team -> t.t_threads[0] -> th.th_root );
4731 
4732  KMP_MB();
4733 
4734  TCW_SYNC_PTR(this_thr->th.th_team, team);
4735 
4736  this_thr->th.th_info.ds.ds_tid = tid;
4737  this_thr->th.th_set_nproc = 0;
4738 #if OMP_40_ENABLED
4739  this_thr->th.th_set_proc_bind = proc_bind_default;
4740 # if (KMP_OS_WINDOWS || KMP_OS_LINUX)
4741  this_thr->th.th_new_place = this_thr->th.th_current_place;
4742 # endif
4743 #endif
4744  this_thr->th.th_root = team -> t.t_threads[0] -> th.th_root;
4745 
4746  /* setup the thread's cache of the team structure */
4747  this_thr->th.th_team_nproc = team -> t.t_nproc;
4748  this_thr->th.th_team_master = team -> t.t_threads[0];
4749  this_thr->th.th_team_serialized = team -> t.t_serialized;
4750 #if OMP_40_ENABLED
4751  this_thr->th.th_team_microtask = team -> t.t_threads[0] -> th.th_team_microtask;
4752  this_thr->th.th_teams_level = team -> t.t_threads[0] -> th.th_teams_level;
4753  this_thr->th.th_set_nth_teams = team -> t.t_threads[0] -> th.th_set_nth_teams;
4754 #endif /* OMP_40_ENABLED */
4755  TCW_PTR(this_thr->th.th_sleep_loc, NULL);
4756 
4757 #if OMP_30_ENABLED
4758  KMP_DEBUG_ASSERT( team -> t.t_implicit_task_taskdata );
4759  this_thr->th.th_task_state = 0;
4760 
4761  KF_TRACE( 10, ( "__kmp_initialize_info1: T#%d:%d this_thread=%p curtask=%p\n",
4762  tid, gtid, this_thr, this_thr->th.th_current_task ) );
4763 
4764  __kmp_init_implicit_task( this_thr->th.th_team_master->th.th_ident, this_thr, team, tid, TRUE );
4765 
4766  KF_TRACE( 10, ( "__kmp_initialize_info2: T#%d:%d this_thread=%p curtask=%p\n",
4767  tid, gtid, this_thr, this_thr->th.th_current_task ) );
4768  // TODO: Initialize ICVs from parent; GEH - isn't that already done in __kmp_initialize_team()?
4769 #endif // OMP_30_ENABLED
4770 
4771  /* TODO no worksharing in speculative threads */
4772  this_thr -> th.th_dispatch = &team -> t.t_dispatch[ tid ];
4773 
4774  this_thr->th.th_local.this_construct = 0;
4775  this_thr->th.th_local.last_construct = 0;
4776 
4777 #ifdef BUILD_TV
4778  this_thr->th.th_local.tv_data = 0;
4779 #endif
4780 
4781  if ( ! this_thr->th.th_pri_common ) {
4782  this_thr->th.th_pri_common = (struct common_table *) __kmp_allocate( sizeof(struct common_table) );
4783  if ( __kmp_storage_map ) {
4784  __kmp_print_storage_map_gtid(
4785  gtid, this_thr->th.th_pri_common, this_thr->th.th_pri_common + 1,
4786  sizeof( struct common_table ), "th_%d.th_pri_common\n", gtid
4787  );
4788  }; // if
4789  this_thr->th.th_pri_head = NULL;
4790  }; // if
4791 
4792  /* Initialize dynamic dispatch */
4793  {
4794  volatile kmp_disp_t *dispatch = this_thr -> th.th_dispatch;
4795  /*
4796  * Use team max_nproc since this will never change for the team.
4797  */
4798  size_t disp_size = sizeof( dispatch_private_info_t ) *
4799  ( team->t.t_max_nproc == 1 ? 1 : KMP_MAX_DISP_BUF );
4800  KD_TRACE( 10, ("__kmp_initialize_info: T#%d max_nproc: %d\n", gtid, team->t.t_max_nproc ) );
4801  KMP_ASSERT( dispatch );
4802  KMP_DEBUG_ASSERT( team -> t.t_dispatch );
4803  KMP_DEBUG_ASSERT( dispatch == &team->t.t_dispatch[ tid ] );
4804 
4805  dispatch->th_disp_index = 0;
4806 
4807  if( ! dispatch -> th_disp_buffer ) {
4808  dispatch -> th_disp_buffer = (dispatch_private_info_t *) __kmp_allocate( disp_size );
4809 
4810  if ( __kmp_storage_map ) {
4811  __kmp_print_storage_map_gtid( gtid, &dispatch->th_disp_buffer[ 0 ],
4812  &dispatch->th_disp_buffer[ team->t.t_max_nproc == 1 ? 1 : KMP_MAX_DISP_BUF ],
4813  disp_size, "th_%d.th_dispatch.th_disp_buffer "
4814  "(team_%d.t_dispatch[%d].th_disp_buffer)",
4815  gtid, team->t.t_id, gtid );
4816  }
4817  } else {
4818  memset( & dispatch -> th_disp_buffer[0], '\0', disp_size );
4819  }
4820 
4821  dispatch -> th_dispatch_pr_current = 0;
4822  dispatch -> th_dispatch_sh_current = 0;
4823 
4824  dispatch -> th_deo_fcn = 0; /* ORDERED */
4825  dispatch -> th_dxo_fcn = 0; /* END ORDERED */
4826  }
4827 
4828  this_thr->th.th_next_pool = NULL;
4829 
4830  KMP_DEBUG_ASSERT( !this_thr->th.th_spin_here );
4831  KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 );
4832 
4833  KMP_MB();
4834 }
4835 
4836 
4837 /* allocate a new thread for the requesting team. this is only called from within a
4838  * forkjoin critical section. we will first try to get an available thread from the
4839  * thread pool. if none is available, we will fork a new one assuming we are able
4840  * to create a new one. this should be assured, as the caller should check on this
4841  * first.
4842  */
4843 kmp_info_t *
4844 __kmp_allocate_thread( kmp_root_t *root, kmp_team_t *team, int new_tid )
4845 {
4846  kmp_team_t *serial_team;
4847  kmp_info_t *new_thr;
4848  int new_gtid;
4849 
4850  KA_TRACE( 20, ("__kmp_allocate_thread: T#%d\n", __kmp_get_gtid() ));
4851  KMP_DEBUG_ASSERT( root && team );
4852  KMP_DEBUG_ASSERT( KMP_MASTER_GTID( __kmp_get_gtid() ));
4853  KMP_MB();
4854 
4855  /* first, try to get one from the thread pool */
4856  if ( __kmp_thread_pool ) {
4857 
4858  new_thr = (kmp_info_t*)__kmp_thread_pool;
4859  __kmp_thread_pool = (volatile kmp_info_t *) new_thr->th.th_next_pool;
4860  if ( new_thr == __kmp_thread_pool_insert_pt ) {
4861  __kmp_thread_pool_insert_pt = NULL;
4862  }
4863  TCW_4(new_thr->th.th_in_pool, FALSE);
4864  //
4865  // Don't touch th_active_in_pool or th_active.
4866  // The worker thread adjusts those flags as it sleeps/awakens.
4867  //
4868 
4869  __kmp_thread_pool_nth--;
4870 
4871  KA_TRACE( 20, ("__kmp_allocate_thread: T#%d using thread T#%d\n",
4872  __kmp_get_gtid(), new_thr->th.th_info.ds.ds_gtid ));
4873  KMP_ASSERT( ! new_thr -> th.th_team );
4874  KMP_DEBUG_ASSERT( __kmp_nth < __kmp_threads_capacity );
4875  KMP_DEBUG_ASSERT( __kmp_thread_pool_nth >= 0 );
4876 
4877  /* setup the thread structure */
4878  __kmp_initialize_info( new_thr, team, new_tid, new_thr->th.th_info.ds.ds_gtid );
4879  KMP_DEBUG_ASSERT( new_thr->th.th_serial_team );
4880 
4881  TCW_4(__kmp_nth, __kmp_nth + 1);
4882 
4883 #ifdef KMP_ADJUST_BLOCKTIME
4884  /* Adjust blocktime back to zero if necessar y */
4885  /* Middle initialization might not have ocurred yet */
4886  if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
4887  if ( __kmp_nth > __kmp_avail_proc ) {
4888  __kmp_zero_bt = TRUE;
4889  }
4890  }
4891 #endif /* KMP_ADJUST_BLOCKTIME */
4892 
4893  KF_TRACE( 10, ("__kmp_allocate_thread: T#%d using thread %p T#%d\n",
4894  __kmp_get_gtid(), new_thr, new_thr->th.th_info.ds.ds_gtid ));
4895 
4896  KMP_MB();
4897  return new_thr;
4898  }
4899 
4900 
4901  /* no, well fork a new one */
4902  KMP_ASSERT( __kmp_nth == __kmp_all_nth );
4903  KMP_ASSERT( __kmp_all_nth < __kmp_threads_capacity );
4904 
4905  //
4906  // If this is the first worker thread the RTL is creating, then also
4907  // launch the monitor thread. We try to do this as early as possible.
4908  //
4909  if ( ! TCR_4( __kmp_init_monitor ) ) {
4910  __kmp_acquire_bootstrap_lock( & __kmp_monitor_lock );
4911  if ( ! TCR_4( __kmp_init_monitor ) ) {
4912  KF_TRACE( 10, ( "before __kmp_create_monitor\n" ) );
4913  TCW_4( __kmp_init_monitor, 1 );
4914  __kmp_create_monitor( & __kmp_monitor );
4915  KF_TRACE( 10, ( "after __kmp_create_monitor\n" ) );
4916  #if KMP_OS_WINDOWS
4917  // AC: wait until monitor has started. This is a fix for CQ232808.
4918  // The reason is that if the library is loaded/unloaded in a loop with small (parallel)
4919  // work in between, then there is high probability that monitor thread started after
4920  // the library shutdown. At shutdown it is too late to cope with the problem, because
4921  // when the master is in DllMain (process detach) the monitor has no chances to start
4922  // (it is blocked), and master has no means to inform the monitor that the library has gone,
4923  // because all the memory which the monitor can access is going to be released/reset.
4924  while ( TCR_4(__kmp_init_monitor) < 2 ) {
4925  KMP_YIELD( TRUE );
4926  }
4927  KF_TRACE( 10, ( "after monitor thread has started\n" ) );
4928  #endif
4929  }
4930  __kmp_release_bootstrap_lock( & __kmp_monitor_lock );
4931  }
4932 
4933  KMP_MB();
4934  for( new_gtid=1 ; TCR_PTR(__kmp_threads[new_gtid]) != NULL; ++new_gtid ) {
4935  KMP_DEBUG_ASSERT( new_gtid < __kmp_threads_capacity );
4936  }
4937 
4938  /* allocate space for it. */
4939  new_thr = (kmp_info_t*) __kmp_allocate( sizeof(kmp_info_t) );
4940 
4941  TCW_SYNC_PTR(__kmp_threads[new_gtid], new_thr);
4942 
4943  if ( __kmp_storage_map ) {
4944  __kmp_print_thread_storage_map( new_thr, new_gtid );
4945  }
4946 
4947  /* add the reserve serialized team, initialized from the team's master thread */
4948  {
4949  #if OMP_30_ENABLED
4950  kmp_internal_control_t r_icvs = __kmp_get_x_global_icvs( team );
4951  #endif // OMP_30_ENABLED
4952  KF_TRACE( 10, ( "__kmp_allocate_thread: before th_serial/serial_team\n" ) );
4953  new_thr -> th.th_serial_team = serial_team =
4954  (kmp_team_t*) __kmp_allocate_team( root, 1, 1,
4955 #if OMP_40_ENABLED
4956  proc_bind_default,
4957 #endif
4958 #if OMP_30_ENABLED
4959  &r_icvs,
4960 #else
4961  team->t.t_set_nproc[0],
4962  team->t.t_set_dynamic[0],
4963  team->t.t_set_nested[0],
4964  team->t.t_set_blocktime[0],
4965  team->t.t_set_bt_intervals[0],
4966  team->t.t_set_bt_set[0],
4967 #endif // OMP_30_ENABLED
4968  0 );
4969  }
4970  KMP_ASSERT ( serial_team );
4971  serial_team -> t.t_serialized = 0; // AC: the team created in reserve, not for execution (it is unused for now).
4972  serial_team -> t.t_threads[0] = new_thr;
4973  KF_TRACE( 10, ( "__kmp_allocate_thread: after th_serial/serial_team : new_thr=%p\n",
4974  new_thr ) );
4975 
4976  /* setup the thread structures */
4977  __kmp_initialize_info( new_thr, team, new_tid, new_gtid );
4978 
4979  #if USE_FAST_MEMORY
4980  __kmp_initialize_fast_memory( new_thr );
4981  #endif /* USE_FAST_MEMORY */
4982 
4983  #if KMP_USE_BGET
4984  KMP_DEBUG_ASSERT( new_thr -> th.th_local.bget_data == NULL );
4985  __kmp_initialize_bget( new_thr );
4986  #endif
4987 
4988  __kmp_init_random( new_thr ); // Initialize random number generator
4989 
4990  /* Initialize these only once when thread is grabbed for a team allocation */
4991  KA_TRACE( 20, ("__kmp_allocate_thread: T#%d init go fork=%u, plain=%u\n",
4992  __kmp_get_gtid(), KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE ));
4993 
4994  new_thr->th.th_bar[ bs_forkjoin_barrier ].bb.b_go = KMP_INIT_BARRIER_STATE;
4995  new_thr->th.th_bar[ bs_plain_barrier ].bb.b_go = KMP_INIT_BARRIER_STATE;
4996  #if KMP_FAST_REDUCTION_BARRIER
4997  new_thr->th.th_bar[ bs_reduction_barrier ].bb.b_go = KMP_INIT_BARRIER_STATE;
4998  #endif // KMP_FAST_REDUCTION_BARRIER
4999 
5000  new_thr->th.th_spin_here = FALSE;
5001  new_thr->th.th_next_waiting = 0;
5002 
5003 #if OMP_40_ENABLED && (KMP_OS_WINDOWS || KMP_OS_LINUX)
5004  new_thr->th.th_current_place = KMP_PLACE_UNDEFINED;
5005  new_thr->th.th_new_place = KMP_PLACE_UNDEFINED;
5006  new_thr->th.th_first_place = KMP_PLACE_UNDEFINED;
5007  new_thr->th.th_last_place = KMP_PLACE_UNDEFINED;
5008 #endif
5009 
5010  TCW_4(new_thr->th.th_in_pool, FALSE);
5011  new_thr->th.th_active_in_pool = FALSE;
5012  TCW_4(new_thr->th.th_active, TRUE);
5013 
5014  /* adjust the global counters */
5015  __kmp_all_nth ++;
5016  __kmp_nth ++;
5017 
5018  //
5019  // if __kmp_adjust_gtid_mode is set, then we use method #1 (sp search)
5020  // for low numbers of procs, and method #2 (keyed API call) for higher
5021  // numbers of procs.
5022  //
5023  if ( __kmp_adjust_gtid_mode ) {
5024  if ( __kmp_all_nth >= __kmp_tls_gtid_min ) {
5025  if ( TCR_4(__kmp_gtid_mode) != 2) {
5026  TCW_4(__kmp_gtid_mode, 2);
5027  }
5028  }
5029  else {
5030  if (TCR_4(__kmp_gtid_mode) != 1 ) {
5031  TCW_4(__kmp_gtid_mode, 1);
5032  }
5033  }
5034  }
5035 
5036 #ifdef KMP_ADJUST_BLOCKTIME
5037  /* Adjust blocktime back to zero if necessary */
5038  /* Middle initialization might not have ocurred yet */
5039  if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
5040  if ( __kmp_nth > __kmp_avail_proc ) {
5041  __kmp_zero_bt = TRUE;
5042  }
5043  }
5044 #endif /* KMP_ADJUST_BLOCKTIME */
5045 
5046  /* actually fork it and create the new worker thread */
5047  KF_TRACE( 10, ("__kmp_allocate_thread: before __kmp_create_worker: %p\n", new_thr ));
5048  __kmp_create_worker( new_gtid, new_thr, __kmp_stksize );
5049  KF_TRACE( 10, ("__kmp_allocate_thread: after __kmp_create_worker: %p\n", new_thr ));
5050 
5051 
5052  KA_TRACE( 20, ("__kmp_allocate_thread: T#%d forked T#%d\n", __kmp_get_gtid(), new_gtid ));
5053  KMP_MB();
5054  return new_thr;
5055 }
5056 
5057 /*
5058  * reinitialize team for reuse.
5059  *
5060  * The hot team code calls this case at every fork barrier, so EPCC barrier
5061  * test are extremely sensitive to changes in it, esp. writes to the team
5062  * struct, which cause a cache invalidation in all threads.
5063  *
5064  * IF YOU TOUCH THIS ROUTINE, RUN EPCC C SYNCBENCH ON A BIG-IRON MACHINE!!!
5065  */
5066 static void
5067 __kmp_reinitialize_team( kmp_team_t *team,
5068 #if OMP_30_ENABLED
5069  kmp_internal_control_t *new_icvs, ident_t *loc
5070 #else
5071  int new_set_nproc, int new_set_dynamic, int new_set_nested,
5072  int new_set_blocktime, int new_bt_intervals, int new_bt_set
5073 #endif
5074  ) {
5075  KF_TRACE( 10, ( "__kmp_reinitialize_team: enter this_thread=%p team=%p\n",
5076  team->t.t_threads[0], team ) );
5077 #if OMP_30_ENABLED
5078  KMP_DEBUG_ASSERT( team && new_icvs);
5079  KMP_DEBUG_ASSERT( ( ! TCR_4(__kmp_init_parallel) ) || new_icvs->nproc );
5080  team->t.t_ident = loc;
5081 #else
5082  KMP_DEBUG_ASSERT( team && new_set_nproc );
5083 #endif // OMP_30_ENABLED
5084 
5085  team->t.t_id = KMP_GEN_TEAM_ID();
5086 
5087  // Copy ICVs to the master thread's implicit taskdata
5088 #if OMP_30_ENABLED
5089  load_icvs(new_icvs);
5090  __kmp_init_implicit_task( loc, team->t.t_threads[0], team, 0, FALSE );
5091  store_icvs(&team->t.t_implicit_task_taskdata[0].td_icvs, new_icvs);
5092  sync_icvs();
5093 # else
5094  team -> t.t_set_nproc[0] = new_set_nproc;
5095  team -> t.t_set_dynamic[0] = new_set_dynamic;
5096  team -> t.t_set_nested[0] = new_set_nested;
5097  team -> t.t_set_blocktime[0] = new_set_blocktime;
5098  team -> t.t_set_bt_intervals[0] = new_bt_intervals;
5099  team -> t.t_set_bt_set[0] = new_bt_set;
5100 # endif // OMP_30_ENABLED
5101 
5102  KF_TRACE( 10, ( "__kmp_reinitialize_team: exit this_thread=%p team=%p\n",
5103  team->t.t_threads[0], team ) );
5104 }
5105 
5106 static void
5107 __kmp_setup_icv_copy(kmp_team_t * team, int new_nproc,
5108 #if OMP_30_ENABLED
5109  kmp_internal_control_t * new_icvs,
5110  ident_t * loc
5111 #else
5112  int new_set_nproc, int new_set_dynamic, int new_set_nested,
5113  int new_set_blocktime, int new_bt_intervals, int new_bt_set
5114 #endif // OMP_30_ENABLED
5115  )
5116 {
5117  int f;
5118 
5119 #if OMP_30_ENABLED
5120  KMP_DEBUG_ASSERT( team && new_nproc && new_icvs );
5121  KMP_DEBUG_ASSERT( ( ! TCR_4(__kmp_init_parallel) ) || new_icvs->nproc );
5122 #else
5123  KMP_DEBUG_ASSERT( team && new_nproc && new_set_nproc );
5124 #endif // OMP_30_ENABLED
5125 
5126  // Master thread's copy of the ICVs was set up on the implicit taskdata in __kmp_reinitialize_team.
5127  // __kmp_fork_call() assumes the master thread's implicit task has this data before this function is called.
5128 #if KMP_BARRIER_ICV_PULL
5129  // Copy the ICVs to master's thread structure into th_fixed_icvs (which remains untouched), where all of the
5130  // worker threads can access them and make their own copies after the barrier.
5131  load_icvs(new_icvs);
5132  KMP_DEBUG_ASSERT(team->t.t_threads[0]); // the threads arrays should be allocated at this point
5133  store_icvs(&team->t.t_threads[0]->th.th_fixed_icvs, new_icvs);
5134  sync_icvs();
5135  KF_TRACE(10, ("__kmp_setup_icv_copy: PULL: T#%d this_thread=%p team=%p\n", 0, team->t.t_threads[0], team));
5136 
5137 #elif KMP_BARRIER_ICV_PUSH
5138  // The ICVs will be propagated in the fork barrier, so nothing needs to be done here.
5139  KF_TRACE(10, ("__kmp_setup_icv_copy: PUSH: T#%d this_thread=%p team=%p\n", 0, team->t.t_threads[0], team));
5140 
5141 #else
5142  // Copy the ICVs to each of the non-master threads. This takes O(nthreads) time.
5143 # if OMP_30_ENABLED
5144  load_icvs(new_icvs);
5145 # endif // OMP_30_ENABLED
5146  KMP_DEBUG_ASSERT(team->t.t_threads[0]); // the threads arrays should be allocated at this point
5147  for(f=1 ; f<new_nproc ; f++) { // skip the master thread
5148 # if OMP_30_ENABLED
5149  // TODO: GEH - pass in better source location info since usually NULL here
5150  KF_TRACE( 10, ( "__kmp_setup_icv_copy: LINEAR: T#%d this_thread=%p team=%p\n",
5151  f, team->t.t_threads[f], team ) );
5152  __kmp_init_implicit_task( loc, team->t.t_threads[f], team, f, FALSE );
5153  store_icvs(&team->t.t_implicit_task_taskdata[f].td_icvs, new_icvs);
5154  KF_TRACE( 10, ( "__kmp_setup_icv_copy: LINEAR: T#%d this_thread=%p team=%p\n",
5155  f, team->t.t_threads[f], team ) );
5156 # else
5157  team -> t.t_set_nproc[f] = new_set_nproc;
5158  team -> t.t_set_dynamic[f] = new_set_dynamic;
5159  team -> t.t_set_nested[f] = new_set_nested;
5160  team -> t.t_set_blocktime[f] = new_set_blocktime;
5161  team -> t.t_set_bt_intervals[f] = new_bt_intervals;
5162  team -> t.t_set_bt_set[f] = new_bt_set;
5163 # endif // OMP_30_ENABLED
5164  }
5165 # if OMP_30_ENABLED
5166  sync_icvs();
5167 # endif // OMP_30_ENABLED
5168 #endif // KMP_BARRIER_ICV_PULL
5169 }
5170 
5171 /* initialize the team data structure
5172  * this assumes the t_threads and t_max_nproc are already set
5173  * also, we don't touch the arguments */
5174 static void
5175 __kmp_initialize_team(
5176  kmp_team_t * team,
5177  int new_nproc,
5178  #if OMP_30_ENABLED
5179  kmp_internal_control_t * new_icvs,
5180  ident_t * loc
5181  #else
5182  int new_set_nproc, int new_set_dynamic, int new_set_nested,
5183  int new_set_blocktime, int new_bt_intervals, int new_bt_set
5184  #endif // OMP_30_ENABLED
5185 ) {
5186  KF_TRACE( 10, ( "__kmp_initialize_team: enter: team=%p\n", team ) );
5187 
5188  /* verify */
5189  KMP_DEBUG_ASSERT( team );
5190  KMP_DEBUG_ASSERT( new_nproc <= team->t.t_max_nproc );
5191  KMP_DEBUG_ASSERT( team->t.t_threads );
5192  KMP_MB();
5193 
5194  team -> t.t_master_tid = 0; /* not needed */
5195  /* team -> t.t_master_bar; not needed */
5196  team -> t.t_serialized = new_nproc > 1 ? 0 : 1;
5197  team -> t.t_nproc = new_nproc;
5198 
5199  /* team -> t.t_parent = NULL; TODO not needed & would mess up hot team */
5200  team -> t.t_next_pool = NULL;
5201  /* memset( team -> t.t_threads, 0, sizeof(kmp_info_t*)*new_nproc ); would mess up hot team */
5202 
5203  TCW_SYNC_PTR(team->t.t_pkfn, NULL); /* not needed */
5204  team -> t.t_invoke = NULL; /* not needed */
5205 
5206 #if OMP_30_ENABLED
5207  // TODO???: team -> t.t_max_active_levels = new_max_active_levels;
5208  team -> t.t_sched = new_icvs->sched;
5209 #endif // OMP_30_ENABLED
5210 
5211 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
5212  team -> t.t_fp_control_saved = FALSE; /* not needed */
5213  team -> t.t_x87_fpu_control_word = 0; /* not needed */
5214  team -> t.t_mxcsr = 0; /* not needed */
5215 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
5216 
5217  team -> t.t_construct = 0;
5218  __kmp_init_lock( & team -> t.t_single_lock );
5219 
5220  team -> t.t_ordered .dt.t_value = 0;
5221  team -> t.t_master_active = FALSE;
5222 
5223  memset( & team -> t.t_taskq, '\0', sizeof( kmp_taskq_t ));
5224 
5225 #ifdef KMP_DEBUG
5226  team -> t.t_copypriv_data = NULL; /* not necessary, but nice for debugging */
5227 #endif
5228  team -> t.t_copyin_counter = 0; /* for barrier-free copyin implementation */
5229 
5230  team -> t.t_control_stack_top = NULL;
5231 
5232  __kmp_reinitialize_team( team,
5233 #if OMP_30_ENABLED
5234  new_icvs, loc
5235 #else
5236  new_set_nproc, new_set_dynamic, new_set_nested,
5237  new_set_blocktime, new_bt_intervals, new_bt_set
5238 #endif // OMP_30_ENABLED
5239  );
5240 
5241 
5242  KMP_MB();
5243  KF_TRACE( 10, ( "__kmp_initialize_team: exit: team=%p\n", team ) );
5244 }
5245 
5246 #if KMP_OS_LINUX
5247 /* Sets full mask for thread and returns old mask, no changes to structures. */
5248 static void
5249 __kmp_set_thread_affinity_mask_full_tmp( kmp_affin_mask_t *old_mask )
5250 {
5251  if ( KMP_AFFINITY_CAPABLE() ) {
5252  int status;
5253  if ( old_mask != NULL ) {
5254  status = __kmp_get_system_affinity( old_mask, TRUE );
5255  int error = errno;
5256  if ( status != 0 ) {
5257  __kmp_msg(
5258  kmp_ms_fatal,
5259  KMP_MSG( ChangeThreadAffMaskError ),
5260  KMP_ERR( error ),
5261  __kmp_msg_null
5262  );
5263  }
5264  }
5265  __kmp_set_system_affinity( __kmp_affinity_get_fullMask(), TRUE );
5266  }
5267 }
5268 #endif
5269 
5270 #if OMP_40_ENABLED && (KMP_OS_WINDOWS || KMP_OS_LINUX)
5271 
5272 //
5273 // __kmp_partition_places() is the heart of the OpenMP 4.0 affinity mechanism.
5274 // It calculats the worker + master thread's partition based upon the parent
5275 // thread's partition, and binds each worker to a thread in thier partition.
5276 // The master thread's partition should already include its current binding.
5277 //
5278 static void
5279 __kmp_partition_places( kmp_team_t *team )
5280 {
5281  //
5282  // Copy the master thread's place partion to the team struct
5283  //
5284  kmp_info_t *master_th = team->t.t_threads[0];
5285  KMP_DEBUG_ASSERT( master_th != NULL );
5286  kmp_proc_bind_t proc_bind = team->t.t_proc_bind;
5287  int first_place = master_th->th.th_first_place;
5288  int last_place = master_th->th.th_last_place;
5289  int masters_place = master_th->th.th_current_place;
5290  team->t.t_first_place = first_place;
5291  team->t.t_last_place = last_place;
5292 
5293  KA_TRACE( 20, ("__kmp_partition_places: enter: proc_bind = %d T#%d(%d:0) bound to place %d partition = [%d,%d]\n",
5294  proc_bind, __kmp_gtid_from_thread( team->t.t_threads[0] ), team->t.t_id,
5295  masters_place, first_place, last_place ) );
5296 
5297  switch ( proc_bind ) {
5298 
5299  case proc_bind_default:
5300  //
5301  // serial teams might have the proc_bind policy set to
5302  // proc_bind_default. It doesn't matter, as we don't
5303  // rebind the master thread for any proc_bind policy.
5304  //
5305  KMP_DEBUG_ASSERT( team->t.t_nproc == 1 );
5306  break;
5307 
5308  case proc_bind_master:
5309  {
5310  int f;
5311  int n_th = team->t.t_nproc;
5312  for ( f = 1; f < n_th; f++ ) {
5313  kmp_info_t *th = team->t.t_threads[f];
5314  KMP_DEBUG_ASSERT( th != NULL );
5315  th->th.th_first_place = first_place;
5316  th->th.th_last_place = last_place;
5317  th->th.th_new_place = masters_place;
5318 
5319  KA_TRACE( 100, ("__kmp_partition_places: master: T#%d(%d:%d) place %d partition = [%d,%d]\n",
5320  __kmp_gtid_from_thread( team->t.t_threads[f] ),
5321  team->t.t_id, f, masters_place, first_place, last_place ) );
5322  }
5323  }
5324  break;
5325 
5326  case proc_bind_close:
5327  {
5328  int f;
5329  int n_th = team->t.t_nproc;
5330  int n_places;
5331  if ( first_place <= last_place ) {
5332  n_places = last_place - first_place + 1;
5333  }
5334  else {
5335  n_places = __kmp_affinity_num_masks - first_place + last_place + 1;
5336  }
5337  if ( n_th <= n_places ) {
5338  int place = masters_place;
5339  for ( f = 1; f < n_th; f++ ) {
5340  kmp_info_t *th = team->t.t_threads[f];
5341  KMP_DEBUG_ASSERT( th != NULL );
5342 
5343  if ( place == last_place ) {
5344  place = first_place;
5345  }
5346  else if ( place == __kmp_affinity_num_masks - 1) {
5347  place = 0;
5348  }
5349  else {
5350  place++;
5351  }
5352  th->th.th_first_place = first_place;
5353  th->th.th_last_place = last_place;
5354  th->th.th_new_place = place;
5355 
5356  KA_TRACE( 100, ("__kmp_partition_places: close: T#%d(%d:%d) place %d partition = [%d,%d]\n",
5357  __kmp_gtid_from_thread( team->t.t_threads[f] ),
5358  team->t.t_id, f, place, first_place, last_place ) );
5359  }
5360  }
5361  else {
5362  int S, rem, gap, s_count;
5363  S = n_th / n_places;
5364  s_count = 0;
5365  rem = n_th - ( S * n_places );
5366  gap = rem > 0 ? n_places/rem : n_places;
5367  int place = masters_place;
5368  int gap_ct = gap;
5369  for ( f = 0; f < n_th; f++ ) {
5370  kmp_info_t *th = team->t.t_threads[f];
5371  KMP_DEBUG_ASSERT( th != NULL );
5372 
5373  th->th.th_first_place = first_place;
5374  th->th.th_last_place = last_place;
5375  th->th.th_new_place = place;
5376  s_count++;
5377 
5378  if ( (s_count == S) && rem && (gap_ct == gap) ) {
5379  // do nothing, add an extra thread to place on next iteration
5380  }
5381  else if ( (s_count == S+1) && rem && (gap_ct == gap) ) {
5382  // we added an extra thread to this place; move to next place
5383  if ( place == last_place ) {
5384  place = first_place;
5385  }
5386  else if ( place == __kmp_affinity_num_masks - 1) {
5387  place = 0;
5388  }
5389  else {
5390  place++;
5391  }
5392  s_count = 0;
5393  gap_ct = 1;
5394  rem--;
5395  }
5396  else if (s_count == S) { // place full; don't add extra
5397  if ( place == last_place ) {
5398  place = first_place;
5399  }
5400  else if ( place == __kmp_affinity_num_masks - 1) {
5401  place = 0;
5402  }
5403  else {
5404  place++;
5405  }
5406  gap_ct++;
5407  s_count = 0;
5408  }
5409 
5410  KA_TRACE( 100, ("__kmp_partition_places: close: T#%d(%d:%d) place %d partition = [%d,%d]\n",
5411  __kmp_gtid_from_thread( team->t.t_threads[f] ),
5412  team->t.t_id, f, th->th.th_new_place, first_place,
5413  last_place ) );
5414  }
5415  KMP_DEBUG_ASSERT( place == masters_place );
5416  }
5417  }
5418  break;
5419 
5420  case proc_bind_spread:
5421  {
5422  int f;
5423  int n_th = team->t.t_nproc;
5424  int n_places;
5425  if ( first_place <= last_place ) {
5426  n_places = last_place - first_place + 1;
5427  }
5428  else {
5429  n_places = __kmp_affinity_num_masks - first_place + last_place + 1;
5430  }
5431  if ( n_th <= n_places ) {
5432  int place = masters_place;
5433  int S = n_places/n_th;
5434  int s_count, rem, gap, gap_ct;
5435  rem = n_places - n_th*S;
5436  gap = rem ? n_th/rem : 1;
5437  gap_ct = gap;
5438  for ( f = 0; f < n_th; f++ ) {
5439  kmp_info_t *th = team->t.t_threads[f];
5440  KMP_DEBUG_ASSERT( th != NULL );
5441 
5442  th->th.th_first_place = place;
5443  th->th.th_new_place = place;
5444  s_count = 1;
5445  while (s_count < S) {
5446  if ( place == last_place ) {
5447  place = first_place;
5448  }
5449  else if ( place == __kmp_affinity_num_masks - 1) {
5450  place = 0;
5451  }
5452  else {
5453  place++;
5454  }
5455  s_count++;
5456  }
5457  if (rem && (gap_ct == gap)) {
5458  if ( place == last_place ) {
5459  place = first_place;
5460  }
5461  else if ( place == __kmp_affinity_num_masks - 1) {
5462  place = 0;
5463  }
5464  else {
5465  place++;
5466  }
5467  rem--;
5468  gap_ct = 0;
5469  }
5470  th->th.th_last_place = place;
5471  gap_ct++;
5472 
5473  if ( place == last_place ) {
5474  place = first_place;
5475  }
5476  else if ( place == __kmp_affinity_num_masks - 1) {
5477  place = 0;
5478  }
5479  else {
5480  place++;
5481  }
5482 
5483  KA_TRACE( 100, ("__kmp_partition_places: spread: T#%d(%d:%d) place %d partition = [%d,%d]\n",
5484  __kmp_gtid_from_thread( team->t.t_threads[f] ),
5485  team->t.t_id, f, th->th.th_new_place,
5486  th->th.th_first_place, th->th.th_last_place ) );
5487  }
5488  KMP_DEBUG_ASSERT( place == masters_place );
5489  }
5490  else {
5491  int S, rem, gap, s_count;
5492  S = n_th / n_places;
5493  s_count = 0;
5494  rem = n_th - ( S * n_places );
5495  gap = rem > 0 ? n_places/rem : n_places;
5496  int place = masters_place;
5497  int gap_ct = gap;
5498  for ( f = 0; f < n_th; f++ ) {
5499  kmp_info_t *th = team->t.t_threads[f];
5500  KMP_DEBUG_ASSERT( th != NULL );
5501 
5502  th->th.th_first_place = place;
5503  th->th.th_last_place = place;
5504  th->th.th_new_place = place;
5505  s_count++;
5506 
5507  if ( (s_count == S) && rem && (gap_ct == gap) ) {
5508  // do nothing, add an extra thread to place on next iteration
5509  }
5510  else if ( (s_count == S+1) && rem && (gap_ct == gap) ) {
5511  // we added an extra thread to this place; move on to next place
5512  if ( place == last_place ) {
5513  place = first_place;
5514  }
5515  else if ( place == __kmp_affinity_num_masks - 1) {
5516  place = 0;
5517  }
5518  else {
5519  place++;
5520  }
5521  s_count = 0;
5522  gap_ct = 1;
5523  rem--;
5524  }
5525  else if (s_count == S) { // place is full; don't add extra thread
5526  if ( place == last_place ) {
5527  place = first_place;
5528  }
5529  else if ( place == __kmp_affinity_num_masks - 1) {
5530  place = 0;
5531  }
5532  else {
5533  place++;
5534  }
5535  gap_ct++;
5536  s_count = 0;
5537  }
5538 
5539  KA_TRACE( 100, ("__kmp_partition_places: spread: T#%d(%d:%d) place %d partition = [%d,%d]\n",
5540  __kmp_gtid_from_thread( team->t.t_threads[f] ),
5541  team->t.t_id, f, th->th.th_new_place,
5542  th->th.th_first_place, th->th.th_last_place) );
5543  }
5544  KMP_DEBUG_ASSERT( place == masters_place );
5545  }
5546  }
5547  break;
5548 
5549  default:
5550  break;
5551  }
5552 
5553  KA_TRACE( 20, ("__kmp_partition_places: exit T#%d\n", team->t.t_id ) );
5554 }
5555 
5556 #endif /* OMP_40_ENABLED && (KMP_OS_WINDOWS || KMP_OS_LINUX) */
5557 
5558 /* allocate a new team data structure to use. take one off of the free pool if available */
5559 kmp_team_t *
5560 __kmp_allocate_team( kmp_root_t *root, int new_nproc, int max_nproc,
5561 #if OMP_40_ENABLED
5562  kmp_proc_bind_t new_proc_bind,
5563 #endif
5564 #if OMP_30_ENABLED
5565  kmp_internal_control_t *new_icvs,
5566 #else
5567  int new_set_nproc, int new_set_dynamic, int new_set_nested,
5568  int new_set_blocktime, int new_bt_intervals, int new_bt_set,
5569 #endif
5570  int argc )
5571 {
5572  int f;
5573  kmp_team_t *team;
5574  char *ptr;
5575  size_t size;
5576 
5577  KA_TRACE( 20, ("__kmp_allocate_team: called\n"));
5578  KMP_DEBUG_ASSERT( new_nproc >=1 && argc >=0 );
5579  KMP_DEBUG_ASSERT( max_nproc >= new_nproc );
5580  KMP_MB();
5581 
5582  //
5583  // optimization to use a "hot" team for the top level,
5584  // as it is usually the same
5585  //
5586  if ( ! root->r.r_active && new_nproc > 1 ) {
5587 
5588  KMP_DEBUG_ASSERT( new_nproc == max_nproc );
5589 
5590  team = root -> r.r_hot_team;
5591 
5592 #if OMP_30_ENABLED && KMP_DEBUG
5593  if ( __kmp_tasking_mode != tskm_immediate_exec ) {
5594  KA_TRACE( 20, ("__kmp_allocate_team: hot team task_team = %p before reinit\n",
5595  team -> t.t_task_team ));
5596  }
5597 #endif
5598 
5599  /* has the number of threads changed? */
5600  if( team -> t.t_nproc > new_nproc ) {
5601  KA_TRACE( 20, ("__kmp_allocate_team: decreasing hot team thread count to %d\n", new_nproc ));
5602 
5603 #if KMP_MIC
5604  team -> t.t_size_changed = 1;
5605 #endif
5606 #if OMP_30_ENABLED
5607  if ( __kmp_tasking_mode != tskm_immediate_exec ) {
5608  kmp_task_team_t *task_team = team->t.t_task_team;
5609  if ( ( task_team != NULL ) && TCR_SYNC_4(task_team->tt.tt_active) ) {
5610  //
5611  // Signal the worker threads (esp. the extra ones) to stop
5612  // looking for tasks while spin waiting. The task teams
5613  // are reference counted and will be deallocated by the
5614  // last worker thread.
5615  //
5616  KMP_DEBUG_ASSERT( team->t.t_nproc > 1 );
5617  TCW_SYNC_4( task_team->tt.tt_active, FALSE );
5618  KMP_MB();
5619 
5620  KA_TRACE( 20, ( "__kmp_allocate_team: setting task_team %p to NULL\n",
5621  &team->t.t_task_team ) );
5622  team->t.t_task_team = NULL;
5623  }
5624  else {
5625  KMP_DEBUG_ASSERT( task_team == NULL );
5626  }
5627  }
5628 #endif // OMP_30_ENABLED
5629 
5630  /* release the extra threads we don't need any more */
5631  for( f = new_nproc ; f < team->t.t_nproc ; f++ ) {
5632  KMP_DEBUG_ASSERT( team->t.t_threads[ f ] );
5633  __kmp_free_thread( team->t.t_threads[ f ] );
5634  team -> t.t_threads[ f ] = NULL;
5635  }
5636 
5637  team -> t.t_nproc = new_nproc;
5638 #if OMP_30_ENABLED
5639  // TODO???: team -> t.t_max_active_levels = new_max_active_levels;
5640  team -> t.t_sched = new_icvs->sched;
5641 #endif
5642  __kmp_reinitialize_team( team,
5643 #if OMP_30_ENABLED
5644  new_icvs, root->r.r_uber_thread->th.th_ident
5645 #else
5646  new_set_nproc, new_set_dynamic, new_set_nested,
5647  new_set_blocktime, new_bt_intervals, new_bt_set
5648 #endif // OMP_30_ENABLED
5649  );
5650 
5651 
5652 #if OMP_30_ENABLED
5653  if ( __kmp_tasking_mode != tskm_immediate_exec ) {
5654  kmp_task_team_t *task_team = team->t.t_task_team;
5655  if ( task_team != NULL ) {
5656  KMP_DEBUG_ASSERT( ! TCR_4(task_team->tt.tt_found_tasks) );
5657  task_team->tt.tt_nproc = new_nproc;
5658  task_team->tt.tt_unfinished_threads = new_nproc;
5659  task_team->tt.tt_ref_ct = new_nproc - 1;
5660  }
5661  }
5662 #endif
5663 
5664  /* update the remaining threads */
5665  for( f = 0 ; f < new_nproc ; f++ ) {
5666  team -> t.t_threads[ f ] -> th.th_team_nproc = team->t.t_nproc;
5667  }
5668 
5669 #if OMP_30_ENABLED
5670  // restore the current task state of the master thread: should be the implicit task
5671  KF_TRACE( 10, ("__kmp_allocate_team: T#%d, this_thread=%p team=%p\n",
5672  0, team->t.t_threads[0], team ) );
5673 
5674  __kmp_push_current_task_to_thread( team -> t.t_threads[ 0 ], team, 0 );
5675 #endif
5676 
5677 #ifdef KMP_DEBUG
5678  for ( f = 0; f < team->t.t_nproc; f++ ) {
5679  KMP_DEBUG_ASSERT( team->t.t_threads[f] &&
5680  team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc );
5681  }
5682 #endif
5683 
5684 #if OMP_40_ENABLED
5685  team->t.t_proc_bind = new_proc_bind;
5686 # if KMP_OS_WINDOWS || KMP_OS_LINUX
5687  __kmp_partition_places( team );
5688 # endif
5689 #endif
5690 
5691  }
5692  else if ( team -> t.t_nproc < new_nproc ) {
5693 #if KMP_OS_LINUX
5694  kmp_affin_mask_t *old_mask;
5695  if ( KMP_AFFINITY_CAPABLE() ) {
5696  KMP_CPU_ALLOC(old_mask);
5697  }
5698 #endif
5699 
5700  KA_TRACE( 20, ("__kmp_allocate_team: increasing hot team thread count to %d\n", new_nproc ));
5701 
5702 #if KMP_MIC
5703  team -> t.t_size_changed = 1;
5704 #endif
5705 
5706 
5707  if(team -> t.t_max_nproc < new_nproc) {
5708  /* reallocate larger arrays */
5709  __kmp_reallocate_team_arrays(team, new_nproc);
5710  __kmp_reinitialize_team( team,
5711 #if OMP_30_ENABLED
5712  new_icvs, NULL
5713 #else
5714  new_set_nproc, new_set_dynamic, new_set_nested,
5715  new_set_blocktime, new_bt_intervals, new_bt_set
5716 #endif // OMP_30_ENABLED
5717  );
5718  }
5719 
5720 #if KMP_OS_LINUX
5721  /* Temporarily set full mask for master thread before
5722  creation of workers. The reason is that workers inherit
5723  the affinity from master, so if a lot of workers are
5724  created on the single core quickly, they don't get
5725  a chance to set their own affinity for a long time.
5726  */
5727  __kmp_set_thread_affinity_mask_full_tmp( old_mask );
5728 #endif
5729 
5730  /* allocate new threads for the hot team */
5731  for( f = team->t.t_nproc ; f < new_nproc ; f++ ) {
5732  kmp_info_t * new_worker = __kmp_allocate_thread( root, team, f );
5733  KMP_DEBUG_ASSERT( new_worker );
5734  team->t.t_threads[ f ] = new_worker;
5735  new_worker->th.th_team_nproc = team->t.t_nproc;
5736 
5737  KA_TRACE( 20, ("__kmp_allocate_team: team %d init T#%d arrived: join=%u, plain=%u\n",
5738  team->t.t_id, __kmp_gtid_from_tid( f, team ), team->t.t_id, f,
5739  team->t.t_bar[bs_forkjoin_barrier].b_arrived,
5740  team->t.t_bar[bs_plain_barrier].b_arrived ) );
5741 
5742  { // Initialize barrier data for new threads.
5743  int b;
5744  kmp_balign_t * balign = new_worker->th.th_bar;
5745  for ( b = 0; b < bp_last_bar; ++ b ) {
5746  balign[ b ].bb.b_arrived = team->t.t_bar[ b ].b_arrived;
5747  }
5748  }
5749  }
5750 
5751 #if KMP_OS_LINUX
5752  if ( KMP_AFFINITY_CAPABLE() ) {
5753  /* Restore initial master thread's affinity mask */
5754  __kmp_set_system_affinity( old_mask, TRUE );
5755  KMP_CPU_FREE(old_mask);
5756  }
5757 #endif
5758 
5759  /* make sure everyone is syncronized */
5760  __kmp_initialize_team( team, new_nproc,
5761 #if OMP_30_ENABLED
5762  new_icvs,
5763  root->r.r_uber_thread->th.th_ident
5764 #else
5765  new_set_nproc, new_set_dynamic, new_set_nested,
5766  new_set_blocktime, new_bt_intervals, new_bt_set
5767 #endif
5768  );
5769 
5770 #if OMP_30_ENABLED
5771  if ( __kmp_tasking_mode != tskm_immediate_exec ) {
5772  kmp_task_team_t *task_team = team->t.t_task_team;
5773  if ( task_team != NULL ) {
5774  KMP_DEBUG_ASSERT( ! TCR_4(task_team->tt.tt_found_tasks) );
5775  task_team->tt.tt_nproc = new_nproc;
5776  task_team->tt.tt_unfinished_threads = new_nproc;
5777  task_team->tt.tt_ref_ct = new_nproc - 1;
5778  }
5779  }
5780 #endif
5781 
5782  /* reinitialize the old threads */
5783  for( f = 0 ; f < team->t.t_nproc ; f++ )
5784  __kmp_initialize_info( team->t.t_threads[ f ], team, f,
5785  __kmp_gtid_from_tid( f, team ) );
5786 #ifdef KMP_DEBUG
5787  for ( f = 0; f < team->t.t_nproc; ++ f ) {
5788  KMP_DEBUG_ASSERT( team->t.t_threads[f] &&
5789  team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc );
5790  }
5791 #endif
5792 
5793 #if OMP_40_ENABLED
5794  team->t.t_proc_bind = new_proc_bind;
5795 # if KMP_OS_WINDOWS || KMP_OS_LINUX
5796  __kmp_partition_places( team );
5797 # endif
5798 #endif
5799 
5800  }
5801  else {
5802  KA_TRACE( 20, ("__kmp_allocate_team: reusing hot team\n" ));
5803 #if KMP_MIC
5804  // This case can mean that omp_set_num_threads() was called and the hot team size
5805  // was already reduced, so we check the special flag
5806  if ( team -> t.t_size_changed == -1 ) {
5807  team -> t.t_size_changed = 1;
5808  } else {
5809  team -> t.t_size_changed = 0;
5810  }
5811 #endif
5812 
5813 #if OMP_30_ENABLED
5814  // TODO???: team -> t.t_max_active_levels = new_max_active_levels;
5815  team -> t.t_sched = new_icvs->sched;
5816 #endif
5817 
5818  __kmp_reinitialize_team( team,
5819 #if OMP_30_ENABLED
5820  new_icvs, root->r.r_uber_thread->th.th_ident
5821 #else
5822  new_set_nproc, new_set_dynamic, new_set_nested,
5823  new_set_blocktime, new_bt_intervals, new_bt_set
5824 #endif // OMP_30_ENABLED
5825  );
5826 
5827 #if OMP_30_ENABLED
5828  KF_TRACE( 10, ("__kmp_allocate_team2: T#%d, this_thread=%p team=%p\n",
5829  0, team->t.t_threads[0], team ) );
5830  __kmp_push_current_task_to_thread( team -> t.t_threads[ 0 ], team, 0 );
5831 #endif
5832 
5833 #if OMP_40_ENABLED
5834 # if (KMP_OS_WINDOWS || KMP_OS_LINUX)
5835  if ( team->t.t_proc_bind == new_proc_bind ) {
5836  KA_TRACE( 200, ("__kmp_allocate_team: reusing hot team #%d bindings: proc_bind = %d, partition = [%d,%d]\n",
5837  team->t.t_id, new_proc_bind, team->t.t_first_place,
5838  team->t.t_last_place ) );
5839  }
5840  else {
5841  team->t.t_proc_bind = new_proc_bind;
5842  __kmp_partition_places( team );
5843  }
5844 # else
5845  if ( team->t.t_proc_bind != new_proc_bind ) {
5846  team->t.t_proc_bind = new_proc_bind;
5847  }
5848 # endif /* (KMP_OS_WINDOWS || KMP_OS_LINUX) */
5849 #endif /* OMP_40_ENABLED */
5850  }
5851 
5852  /* reallocate space for arguments if necessary */
5853  __kmp_alloc_argv_entries( argc, team, TRUE );
5854  team -> t.t_argc = argc;
5855  //
5856  // The hot team re-uses the previous task team,
5857  // if untouched during the previous release->gather phase.
5858  //
5859 
5860  KF_TRACE( 10, ( " hot_team = %p\n", team ) );
5861 
5862 #if OMP_30_ENABLED && KMP_DEBUG
5863  if ( __kmp_tasking_mode != tskm_immediate_exec ) {
5864  KA_TRACE( 20, ("__kmp_allocate_team: hot team task_team = %p after reinit\n",
5865  team -> t.t_task_team ));
5866  }
5867 #endif
5868 
5869  KMP_MB();
5870 
5871  return team;
5872  }
5873 
5874  /* next, let's try to take one from the team pool */
5875  KMP_MB();
5876  for( team = (kmp_team_t*) __kmp_team_pool ; (team) ; )
5877  {
5878  /* TODO: consider resizing undersized teams instead of reaping them, now that we have a resizing mechanism */
5879  if ( team->t.t_max_nproc >= max_nproc ) {
5880  /* take this team from the team pool */
5881  __kmp_team_pool = team->t.t_next_pool;
5882 
5883  /* setup the team for fresh use */
5884  __kmp_initialize_team( team, new_nproc,
5885 #if OMP_30_ENABLED
5886  new_icvs,
5887  NULL // TODO: !!!
5888 #else
5889  new_set_nproc, new_set_dynamic, new_set_nested,
5890  new_set_blocktime, new_bt_intervals, new_bt_set
5891 #endif
5892  );
5893 
5894 #if OMP_30_ENABLED
5895  KA_TRACE( 20, ( "__kmp_allocate_team: setting task_team %p to NULL\n",
5896  &team->t.t_task_team ) );
5897  team -> t.t_task_team = NULL;
5898 #endif
5899 
5900  /* reallocate space for arguments if necessary */
5901  __kmp_alloc_argv_entries( argc, team, TRUE );
5902  team -> t.t_argc = argc;
5903 
5904  KA_TRACE( 20, ("__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5905  team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE ));
5906  { // Initialize barrier data.
5907  int b;
5908  for ( b = 0; b < bs_last_barrier; ++ b) {
5909  team->t.t_bar[ b ].b_arrived = KMP_INIT_BARRIER_STATE;
5910  }
5911  }
5912 
5913 #if OMP_40_ENABLED
5914  team->t.t_proc_bind = new_proc_bind;
5915 #endif
5916 
5917  KA_TRACE( 20, ("__kmp_allocate_team: using team from pool %d.\n", team->t.t_id ));
5918  KMP_MB();
5919 
5920  return team;
5921  }
5922 
5923  /* reap team if it is too small, then loop back and check the next one */
5924  /* not sure if this is wise, but, will be redone during the hot-teams rewrite. */
5925  /* TODO: Use technique to find the right size hot-team, don't reap them */
5926  team = __kmp_reap_team( team );
5927  __kmp_team_pool = team;
5928  }
5929 
5930  /* nothing available in the pool, no matter, make a new team! */
5931  KMP_MB();
5932  team = (kmp_team_t*) __kmp_allocate( sizeof( kmp_team_t ) );
5933 
5934  /* and set it up */
5935  team -> t.t_max_nproc = max_nproc;
5936  /* NOTE well, for some reason allocating one big buffer and dividing it
5937  * up seems to really hurt performance a lot on the P4, so, let's not use
5938  * this... */
5939  __kmp_allocate_team_arrays( team, max_nproc );
5940 
5941  KA_TRACE( 20, ( "__kmp_allocate_team: making a new team\n" ) );
5942  __kmp_initialize_team( team, new_nproc,
5943 #if OMP_30_ENABLED
5944  new_icvs,
5945  NULL // TODO: !!!
5946 #else
5947  new_set_nproc, new_set_dynamic, new_set_nested,
5948  new_set_blocktime, new_bt_intervals, new_bt_set
5949 #endif
5950  );
5951 
5952 #if OMP_30_ENABLED
5953  KA_TRACE( 20, ( "__kmp_allocate_team: setting task_team %p to NULL\n",
5954  &team->t.t_task_team ) );
5955  team -> t.t_task_team = NULL; // to be removed, as __kmp_allocate zeroes memory, no need to duplicate
5956 #endif
5957 
5958  if ( __kmp_storage_map ) {
5959  __kmp_print_team_storage_map( "team", team, team->t.t_id, new_nproc );
5960  }
5961 
5962  /* allocate space for arguments */
5963  __kmp_alloc_argv_entries( argc, team, FALSE );
5964  team -> t.t_argc = argc;
5965 
5966  KA_TRACE( 20, ("__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5967  team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE ));
5968  { // Initialize barrier data.
5969  int b;
5970  for ( b = 0; b < bs_last_barrier; ++ b ) {
5971  team->t.t_bar[ b ].b_arrived = KMP_INIT_BARRIER_STATE;
5972  }
5973  }
5974 
5975 #if OMP_40_ENABLED
5976  team->t.t_proc_bind = new_proc_bind;
5977 #endif
5978 
5979  KMP_MB();
5980 
5981  KA_TRACE( 20, ("__kmp_allocate_team: done creating a new team %d.\n", team->t.t_id ));
5982 
5983  return team;
5984 }
5985 
5986 /* TODO implement hot-teams at all levels */
5987 /* TODO implement lazy thread release on demand (disband request) */
5988 
5989 /* free the team. return it to the team pool. release all the threads
5990  * associated with it */
5991 void
5992 __kmp_free_team( kmp_root_t *root, kmp_team_t *team )
5993 {
5994  int f;
5995  KA_TRACE( 20, ("__kmp_free_team: T#%d freeing team %d\n", __kmp_get_gtid(), team->t.t_id ));
5996 
5997  /* verify state */
5998  KMP_DEBUG_ASSERT( root );
5999  KMP_DEBUG_ASSERT( team );
6000  KMP_DEBUG_ASSERT( team->t.t_nproc <= team->t.t_max_nproc );
6001  KMP_DEBUG_ASSERT( team->t.t_threads );
6002 
6003  /* team is done working */
6004  TCW_SYNC_PTR(team->t.t_pkfn, NULL); // Important for Debugging Support Library.
6005  team -> t.t_copyin_counter = 0; // init counter for possible reuse
6006  // Do not reset pointer to parent team to NULL for hot teams.
6007 
6008  /* if we are a nested team, release our threads */
6009  if( team != root->r.r_hot_team ) {
6010 
6011 #if OMP_30_ENABLED
6012  if ( __kmp_tasking_mode != tskm_immediate_exec ) {
6013  kmp_task_team_t *task_team = team->t.t_task_team;
6014  if ( task_team != NULL ) {
6015  //
6016  // Signal the worker threads to stop looking for tasks while
6017  // spin waiting. The task teams are reference counted and will
6018  // be deallocated by the last worker thread via the thread's
6019  // pointer to the task team.
6020  //
6021  KA_TRACE( 20, ( "__kmp_free_team: deactivating task_team %p\n",
6022  task_team ) );
6023  KMP_DEBUG_ASSERT( team->t.t_nproc > 1 );
6024  TCW_SYNC_4( task_team->tt.tt_active, FALSE );
6025  KMP_MB();
6026  team->t.t_task_team = NULL;
6027  }
6028  }
6029 #endif /* OMP_30_ENABLED */
6030 
6031  // Reset pointer to parent team only for non-hot teams.
6032  team -> t.t_parent = NULL;
6033 
6034 
6035  /* free the worker threads */
6036  for ( f = 1; f < team->t.t_nproc; ++ f ) {
6037  KMP_DEBUG_ASSERT( team->t.t_threads[ f ] );
6038  __kmp_free_thread( team->t.t_threads[ f ] );
6039  team->t.t_threads[ f ] = NULL;
6040  }
6041 
6042 
6043  /* put the team back in the team pool */
6044  /* TODO limit size of team pool, call reap_team if pool too large */
6045  team -> t.t_next_pool = (kmp_team_t*) __kmp_team_pool;
6046  __kmp_team_pool = (volatile kmp_team_t*) team;
6047  }
6048 
6049  KMP_MB();
6050 }
6051 
6052 
6053 /* reap the team. destroy it, reclaim all its resources and free its memory */
6054 kmp_team_t *
6055 __kmp_reap_team( kmp_team_t *team )
6056 {
6057  kmp_team_t *next_pool = team -> t.t_next_pool;
6058 
6059  KMP_DEBUG_ASSERT( team );
6060  KMP_DEBUG_ASSERT( team -> t.t_dispatch );
6061  KMP_DEBUG_ASSERT( team -> t.t_disp_buffer );
6062  KMP_DEBUG_ASSERT( team -> t.t_threads );
6063  #if OMP_30_ENABLED
6064  #else
6065  KMP_DEBUG_ASSERT( team -> t.t_set_nproc );
6066  #endif
6067  KMP_DEBUG_ASSERT( team -> t.t_argv );
6068 
6069  /* TODO clean the threads that are a part of this? */
6070 
6071  /* free stuff */
6072 
6073  __kmp_free_team_arrays( team );
6074 #if (KMP_PERF_V106 == KMP_ON)
6075  if ( team -> t.t_argv != &team -> t.t_inline_argv[0] )
6076  __kmp_free( (void*) team -> t.t_argv );
6077 #else
6078  __kmp_free( (void*) team -> t.t_argv );
6079 #endif
6080  __kmp_free( team );
6081 
6082  KMP_MB();
6083  return next_pool;
6084 }
6085 
6086 //
6087 // Free the thread. Don't reap it, just place it on the pool of available
6088 // threads.
6089 //
6090 // Changes for Quad issue 527845: We need a predictable OMP tid <-> gtid
6091 // binding for the affinity mechanism to be useful.
6092 //
6093 // Now, we always keep the free list (__kmp_thread_pool) sorted by gtid.
6094 // However, we want to avoid a potential performance problem by always
6095 // scanning through the list to find the correct point at which to insert
6096 // the thread (potential N**2 behavior). To do this we keep track of the
6097 // last place a thread struct was inserted (__kmp_thread_pool_insert_pt).
6098 // With single-level parallelism, threads will always be added to the tail
6099 // of the list, kept track of by __kmp_thread_pool_insert_pt. With nested
6100 // parallelism, all bets are off and we may need to scan through the entire
6101 // free list.
6102 //
6103 // This change also has a potentially large performance benefit, for some
6104 // applications. Previously, as threads were freed from the hot team, they
6105 // would be placed back on the free list in inverse order. If the hot team
6106 // grew back to it's original size, then the freed thread would be placed
6107 // back on the hot team in reverse order. This could cause bad cache
6108 // locality problems on programs where the size of the hot team regularly
6109 // grew and shrunk.
6110 //
6111 // Now, for single-level parallelism, the OMP tid is alway == gtid.
6112 //
6113 void
6114 __kmp_free_thread( kmp_info_t *this_th )
6115 {
6116  int gtid;
6117  kmp_info_t **scan;
6118 
6119  KA_TRACE( 20, ("__kmp_free_thread: T#%d putting T#%d back on free pool.\n",
6120  __kmp_get_gtid(), this_th->th.th_info.ds.ds_gtid ));
6121 
6122  KMP_DEBUG_ASSERT( this_th );
6123 
6124 
6125  /* put thread back on the free pool */
6126  TCW_PTR(this_th->th.th_team, NULL);
6127  TCW_PTR(this_th->th.th_root, NULL);
6128  TCW_PTR(this_th->th.th_dispatch, NULL); /* NOT NEEDED */
6129 
6130  //
6131  // If the __kmp_thread_pool_insert_pt is already past the new insert
6132  // point, then we need to re-scan the entire list.
6133  //
6134  gtid = this_th->th.th_info.ds.ds_gtid;
6135  if ( __kmp_thread_pool_insert_pt != NULL ) {
6136  KMP_DEBUG_ASSERT( __kmp_thread_pool != NULL );
6137  if ( __kmp_thread_pool_insert_pt->th.th_info.ds.ds_gtid > gtid ) {
6138  __kmp_thread_pool_insert_pt = NULL;
6139  }
6140  }
6141 
6142  //
6143  // Scan down the list to find the place to insert the thread.
6144  // scan is the address of a link in the list, possibly the address of
6145  // __kmp_thread_pool itself.
6146  //
6147  // In the absence of nested parallism, the for loop will have 0 iterations.
6148  //
6149  if ( __kmp_thread_pool_insert_pt != NULL ) {
6150  scan = &( __kmp_thread_pool_insert_pt->th.th_next_pool );
6151  }
6152  else {
6153  scan = (kmp_info_t **)&__kmp_thread_pool;
6154  }
6155  for (; ( *scan != NULL ) && ( (*scan)->th.th_info.ds.ds_gtid < gtid );
6156  scan = &( (*scan)->th.th_next_pool ) );
6157 
6158  //
6159  // Insert the new element on the list, and set __kmp_thread_pool_insert_pt
6160  // to its address.
6161  //
6162  TCW_PTR(this_th->th.th_next_pool, *scan);
6163  __kmp_thread_pool_insert_pt = *scan = this_th;
6164  KMP_DEBUG_ASSERT( ( this_th->th.th_next_pool == NULL )
6165  || ( this_th->th.th_info.ds.ds_gtid
6166  < this_th->th.th_next_pool->th.th_info.ds.ds_gtid ) );
6167  TCW_4(this_th->th.th_in_pool, TRUE);
6168  __kmp_thread_pool_nth++;
6169 
6170  TCW_4(__kmp_nth, __kmp_nth - 1);
6171 
6172 #ifdef KMP_ADJUST_BLOCKTIME
6173  /* Adjust blocktime back to user setting or default if necessary */
6174  /* Middle initialization might never have ocurred */
6175  if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
6176  KMP_DEBUG_ASSERT( __kmp_avail_proc > 0 );
6177  if ( __kmp_nth <= __kmp_avail_proc ) {
6178  __kmp_zero_bt = FALSE;
6179  }
6180  }
6181 #endif /* KMP_ADJUST_BLOCKTIME */
6182 
6183  KMP_MB();
6184 }
6185 
6186 void
6187 __kmp_join_barrier( int gtid )
6188 {
6189  register kmp_info_t *this_thr = __kmp_threads[ gtid ];
6190  register kmp_team_t *team;
6191  register kmp_uint nproc;
6192  kmp_info_t *master_thread;
6193  int tid;
6194  #ifdef KMP_DEBUG
6195  int team_id;
6196  #endif /* KMP_DEBUG */
6197 #if USE_ITT_BUILD
6198  void * itt_sync_obj = NULL;
6199  #if USE_ITT_NOTIFY
6200  if ( __itt_sync_create_ptr || KMP_ITT_DEBUG ) // don't call routine without need
6201  itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier ); // get object created at fork_barrier
6202  #endif
6203 #endif /* USE_ITT_BUILD */
6204 
6205  KMP_MB();
6206 
6207  /* get current info */
6208  team = this_thr -> th.th_team;
6209  /* nproc = team -> t.t_nproc;*/
6210  nproc = this_thr -> th.th_team_nproc;
6211  KMP_DEBUG_ASSERT( nproc == team->t.t_nproc );
6212  tid = __kmp_tid_from_gtid(gtid);
6213  #ifdef KMP_DEBUG
6214  team_id = team -> t.t_id;
6215  #endif /* KMP_DEBUG */
6216  /* master_thread = team -> t.t_threads[0];*/
6217  master_thread = this_thr -> th.th_team_master;
6218  #ifdef KMP_DEBUG
6219  if ( master_thread != team->t.t_threads[0] ) {
6220  __kmp_print_structure();
6221  }
6222  #endif /* KMP_DEBUG */
6223  KMP_DEBUG_ASSERT( master_thread == team->t.t_threads[0] );
6224  KMP_MB();
6225 
6226  /* verify state */
6227  KMP_DEBUG_ASSERT( __kmp_threads && __kmp_threads[gtid] );
6228  KMP_DEBUG_ASSERT( TCR_PTR(this_thr->th.th_team) );
6229  KMP_DEBUG_ASSERT( TCR_PTR(this_thr->th.th_root) );
6230  KMP_DEBUG_ASSERT( this_thr == team -> t.t_threads[tid] );
6231 
6232  KA_TRACE( 10, ("__kmp_join_barrier: T#%d(%d:%d) arrived at join barrier\n",
6233  gtid, team_id, tid ));
6234 
6235  #if OMP_30_ENABLED
6236  if ( __kmp_tasking_mode == tskm_extra_barrier ) {
6237  __kmp_tasking_barrier( team, this_thr, gtid );
6238 
6239  KA_TRACE( 10, ("__kmp_join_barrier: T#%d(%d:%d) past taking barrier\n",
6240  gtid, team_id, tid ));
6241  }
6242  #ifdef KMP_DEBUG
6243  if ( __kmp_tasking_mode != tskm_immediate_exec ) {
6244  KA_TRACE( 20, ( "__kmp_join_barrier: T#%d, old team = %d, old task_team = %p, th_task_team = %p\n",
6245  __kmp_gtid_from_thread( this_thr ), team_id, team -> t.t_task_team,
6246  this_thr->th.th_task_team ) );
6247  KMP_DEBUG_ASSERT( this_thr->th.th_task_team == team->t.t_task_team );
6248  }
6249  #endif /* KMP_DEBUG */
6250  #endif /* OMP_30_ENABLED */
6251 
6252  //
6253  // Copy the blocktime info to the thread, where __kmp_wait_sleep()
6254  // can access it when the team struct is not guaranteed to exist.
6255  //
6256  // Doing these loads causes a cache miss slows down EPCC parallel by 2x.
6257  // As a workaround, we do not perform the copy if blocktime=infinite,
6258  // since the values are not used by __kmp_wait_sleep() in that case.
6259  //
6260  if ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) {
6261  #if OMP_30_ENABLED
6262  this_thr -> th.th_team_bt_intervals = team -> t.t_implicit_task_taskdata[tid].td_icvs.bt_intervals;
6263  this_thr -> th.th_team_bt_set = team -> t.t_implicit_task_taskdata[tid].td_icvs.bt_set;
6264  #else
6265  this_thr -> th.th_team_bt_intervals = team -> t.t_set_bt_intervals[tid];
6266  this_thr -> th.th_team_bt_set= team -> t.t_set_bt_set[tid];
6267  #endif // OMP_30_ENABLED
6268  }
6269 
6270 #if USE_ITT_BUILD
6271  if ( __itt_sync_create_ptr || KMP_ITT_DEBUG )
6272  __kmp_itt_barrier_starting( gtid, itt_sync_obj );
6273 #endif /* USE_ITT_BUILD */
6274 
6275  if ( __kmp_barrier_gather_pattern[ bs_forkjoin_barrier ] == bp_linear_bar || __kmp_barrier_gather_branch_bits[ bs_forkjoin_barrier ] == 0 ) {
6276  __kmp_linear_barrier_gather( bs_forkjoin_barrier, this_thr, gtid, tid, NULL
6277  USE_ITT_BUILD_ARG( itt_sync_obj )
6278  );
6279  } else if ( __kmp_barrier_gather_pattern[ bs_forkjoin_barrier ] == bp_tree_bar ) {
6280  __kmp_tree_barrier_gather( bs_forkjoin_barrier, this_thr, gtid, tid, NULL
6281  USE_ITT_BUILD_ARG( itt_sync_obj )
6282  );
6283  } else {
6284  __kmp_hyper_barrier_gather( bs_forkjoin_barrier, this_thr, gtid, tid, NULL
6285  USE_ITT_BUILD_ARG( itt_sync_obj )
6286  );
6287  }; // if
6288 
6289 #if USE_ITT_BUILD
6290  if ( __itt_sync_create_ptr || KMP_ITT_DEBUG )
6291  __kmp_itt_barrier_middle( gtid, itt_sync_obj );
6292 #endif /* USE_ITT_BUILD */
6293 
6294  //
6295  // From this point on, the team data structure may be deallocated
6296  // at any time by the master thread - it is unsafe to reference it
6297  // in any of the worker threads.
6298  //
6299  // Any per-team data items that need to be referenced before the end
6300  // of the barrier should be moved to the kmp_task_team_t structs.
6301  //
6302 
6303  #if OMP_30_ENABLED
6304  if ( KMP_MASTER_TID( tid ) ) {
6305  if ( __kmp_tasking_mode != tskm_immediate_exec ) {
6306  // Master shouldn't call decrease_load(). // TODO: enable master threads.
6307  // Master should have th_may_decrease_load == 0. // TODO: enable master threads.
6308  __kmp_task_team_wait( this_thr, team
6309  USE_ITT_BUILD_ARG( itt_sync_obj )
6310  );
6311  }
6312 #if USE_ITT_BUILD && USE_ITT_NOTIFY
6313  // Join barrier - report frame end
6314  if( __itt_frame_submit_v3_ptr && __kmp_forkjoin_frames_mode ) {
6315  kmp_uint64 tmp = __itt_get_timestamp();
6316  ident_t * loc = team->t.t_ident;
6317  switch( __kmp_forkjoin_frames_mode ) {
6318  case 1:
6319  __kmp_itt_frame_submit( gtid, this_thr->th.th_frame_time, tmp, 0, loc );
6320  break;
6321  case 2:
6322  __kmp_itt_frame_submit( gtid, this_thr->th.th_bar_arrive_time, tmp, 1, loc );
6323  break;
6324  case 3:
6325  __kmp_itt_frame_submit( gtid, this_thr->th.th_frame_time, tmp, 0, loc );
6326  __kmp_itt_frame_submit( gtid, this_thr->th.th_bar_arrive_time, tmp, 1, loc );
6327  break;
6328  }
6329  }
6330 #endif /* USE_ITT_BUILD */
6331  }
6332  #endif /* OMP_30_ENABLED */
6333 
6334  #if KMP_DEBUG
6335  if( KMP_MASTER_TID( tid )) {
6336  KA_TRACE( 15, ( "__kmp_join_barrier: T#%d(%d:%d) says all %d team threads arrived\n",
6337  gtid, team_id, tid, nproc ));
6338  }
6339  #endif /* KMP_DEBUG */
6340 
6341  /* TODO now, mark worker threads as done so they may be disbanded */
6342 
6343  KMP_MB(); /* Flush all pending memory write invalidates. */
6344  KA_TRACE( 10, ("__kmp_join_barrier: T#%d(%d:%d) leaving\n",
6345  gtid, team_id, tid ));
6346 }
6347 
6348 
6349 /* TODO release worker threads' fork barriers as we are ready instead of all at once */
6350 
6351 void
6352 __kmp_fork_barrier( int gtid, int tid )
6353 {
6354  kmp_info_t *this_thr = __kmp_threads[ gtid ];
6355  kmp_team_t *team = ( tid == 0 ) ? this_thr -> th.th_team : NULL;
6356 #if USE_ITT_BUILD
6357  void * itt_sync_obj = NULL;
6358 #endif /* USE_ITT_BUILD */
6359 
6360  KA_TRACE( 10, ( "__kmp_fork_barrier: T#%d(%d:%d) has arrived\n",
6361  gtid, ( team != NULL ) ? team->t.t_id : -1, tid ));
6362 
6363  /* th_team pointer only valid for master thread here */
6364  if ( KMP_MASTER_TID( tid ) ) {
6365 
6366 #if USE_ITT_BUILD && USE_ITT_NOTIFY
6367  if ( __itt_sync_create_ptr || KMP_ITT_DEBUG ) {
6368  itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier, 1 ); // create itt barrier object
6369  //__kmp_itt_barrier_starting( gtid, itt_sync_obj ); // AC: no need to call prepare right before acquired
6370  __kmp_itt_barrier_middle( gtid, itt_sync_obj ); // call acquired / releasing
6371  }
6372 #endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
6373 
6374 #ifdef KMP_DEBUG
6375 
6376  register kmp_info_t **other_threads = team -> t.t_threads;
6377  register int i;
6378 
6379  /* verify state */
6380  KMP_MB();
6381 
6382  for( i = 1; i < team -> t.t_nproc ; i++ ) {
6383  KA_TRACE( 500, ( "__kmp_fork_barrier: T#%d(%d:0) checking T#%d(%d:%d) fork "
6384  "go == %u.\n",
6385  gtid, team->t.t_id, other_threads[i]->th.th_info.ds.ds_gtid,
6386  team->t.t_id, other_threads[i]->th.th_info.ds.ds_tid,
6387  other_threads[i]->th.th_bar[ bs_forkjoin_barrier ].bb.b_go ) );
6388 
6389  KMP_DEBUG_ASSERT( ( TCR_4( other_threads[i]->th.th_bar[bs_forkjoin_barrier].bb.b_go )
6390  & ~(KMP_BARRIER_SLEEP_STATE) )
6391  == KMP_INIT_BARRIER_STATE );
6392  KMP_DEBUG_ASSERT( other_threads[i]->th.th_team == team );
6393 
6394  }
6395 #endif
6396 
6397 #if OMP_30_ENABLED
6398  if ( __kmp_tasking_mode != tskm_immediate_exec ) {
6399  __kmp_task_team_setup( this_thr, team );
6400  }
6401 #endif /* OMP_30_ENABLED */
6402 
6403  //
6404  // The master thread may have changed its blocktime between the
6405  // join barrier and the fork barrier.
6406  //
6407  // Copy the blocktime info to the thread, where __kmp_wait_sleep()
6408  // can access it when the team struct is not guaranteed to exist.
6409  //
6410  // See the note about the corresponding code in __kmp_join_barrier()
6411  // being performance-critical.
6412  //
6413  if ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) {
6414 #if OMP_30_ENABLED
6415  this_thr -> th.th_team_bt_intervals = team -> t.t_implicit_task_taskdata[tid].td_icvs.bt_intervals;
6416  this_thr -> th.th_team_bt_set = team -> t.t_implicit_task_taskdata[tid].td_icvs.bt_set;
6417 #else
6418  this_thr -> th.th_team_bt_intervals = team -> t.t_set_bt_intervals[tid];
6419  this_thr -> th.th_team_bt_set= team -> t.t_set_bt_set[tid];
6420 #endif // OMP_30_ENABLED
6421  }
6422  } // master
6423 
6424  if ( __kmp_barrier_release_pattern[ bs_forkjoin_barrier ] == bp_linear_bar || __kmp_barrier_release_branch_bits[ bs_forkjoin_barrier ] == 0 ) {
6425  __kmp_linear_barrier_release( bs_forkjoin_barrier, this_thr, gtid, tid, TRUE
6426  USE_ITT_BUILD_ARG( itt_sync_obj )
6427  );
6428  } else if ( __kmp_barrier_release_pattern[ bs_forkjoin_barrier ] == bp_tree_bar ) {
6429  __kmp_tree_barrier_release( bs_forkjoin_barrier, this_thr, gtid, tid, TRUE
6430  USE_ITT_BUILD_ARG( itt_sync_obj )
6431  );
6432  } else {
6433  __kmp_hyper_barrier_release( bs_forkjoin_barrier, this_thr, gtid, tid, TRUE
6434  USE_ITT_BUILD_ARG( itt_sync_obj )
6435  );
6436  }; // if
6437 
6438  //
6439  // early exit for reaping threads releasing forkjoin barrier
6440  //
6441  if ( TCR_4(__kmp_global.g.g_done) ) {
6442 
6443 #if OMP_30_ENABLED
6444  if ( this_thr->th.th_task_team != NULL ) {
6445  if ( KMP_MASTER_TID( tid ) ) {
6446  TCW_PTR(this_thr->th.th_task_team, NULL);
6447  }
6448  else {
6449  __kmp_unref_task_team( this_thr->th.th_task_team, this_thr );
6450  }
6451  }
6452 #endif /* OMP_30_ENABLED */
6453 
6454 #if USE_ITT_BUILD && USE_ITT_NOTIFY
6455  if ( __itt_sync_create_ptr || KMP_ITT_DEBUG ) {
6456  if ( !KMP_MASTER_TID( tid ) ) {
6457  itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
6458  if ( itt_sync_obj )
6459  __kmp_itt_barrier_finished( gtid, itt_sync_obj );
6460  }
6461  }
6462 #endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
6463  KA_TRACE( 10, ( "__kmp_fork_barrier: T#%d is leaving early\n", gtid ));
6464  return;
6465  }
6466 
6467  //
6468  // We can now assume that a valid team structure has been allocated
6469  // by the master and propagated to all worker threads.
6470  //
6471  // The current thread, however, may not be part of the team, so we can't
6472  // blindly assume that the team pointer is non-null.
6473  //
6474  team = (kmp_team_t *)TCR_PTR(this_thr->th.th_team);
6475  KMP_DEBUG_ASSERT( team != NULL );
6476  tid = __kmp_tid_from_gtid( gtid );
6477 
6478 #if OMP_30_ENABLED
6479 
6480 # if KMP_BARRIER_ICV_PULL
6481  // Master thread's copy of the ICVs was set up on the implicit taskdata in __kmp_reinitialize_team.
6482  // __kmp_fork_call() assumes the master thread's implicit task has this data before this function is called.
6483  // We cannot modify __kmp_fork_call() to look at the fixed ICVs in the master's thread struct, because it is
6484  // not always the case that the threads arrays have been allocated when __kmp_fork_call() is executed.
6485  if (! KMP_MASTER_TID( tid ) ) { // master thread already has ICVs
6486  // Copy the initial ICVs from the master's thread struct to the implicit task for this tid.
6487  KA_TRACE( 10, ( "__kmp_fork_barrier: T#%d(%d) is PULLing ICVs\n", gtid, tid ));
6488  load_icvs(&team->t.t_threads[0]->th.th_fixed_icvs);
6489  __kmp_init_implicit_task( team->t.t_ident, team->t.t_threads[tid], team, tid, FALSE );
6490  store_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs, &team->t.t_threads[0]->th.th_fixed_icvs);
6491  sync_icvs();
6492  }
6493 # endif // KMP_BARRIER_ICV_PULL
6494 
6495  if ( __kmp_tasking_mode != tskm_immediate_exec ) {
6496  __kmp_task_team_sync( this_thr, team );
6497  }
6498 
6499 #endif /* OMP_30_ENABLED */
6500 
6501 #if OMP_40_ENABLED && (KMP_OS_WINDOWS || KMP_OS_LINUX)
6502  kmp_proc_bind_t proc_bind = team->t.t_proc_bind;
6503  if ( proc_bind == proc_bind_intel ) {
6504 #endif
6505 #if KMP_MIC
6506  //
6507  // Call dynamic affinity settings
6508  //
6509  if( __kmp_affinity_type == affinity_balanced && team->t.t_size_changed ) {
6510  __kmp_balanced_affinity( tid, team->t.t_nproc );
6511  }
6512 #endif
6513 #if OMP_40_ENABLED && (KMP_OS_WINDOWS || KMP_OS_LINUX)
6514  }
6515  else if ( ( proc_bind != proc_bind_false )
6516  && ( proc_bind != proc_bind_disabled )) {
6517  if ( this_thr->th.th_new_place == this_thr->th.th_current_place ) {
6518  KA_TRACE( 100, ( "__kmp_fork_barrier: T#%d already in correct place %d\n",
6519  __kmp_gtid_from_thread( this_thr ), this_thr->th.th_current_place ) );
6520  }
6521  else {
6522  __kmp_affinity_set_place( gtid );
6523  }
6524  }
6525 #endif
6526 
6527 #if USE_ITT_BUILD && USE_ITT_NOTIFY
6528  if ( __itt_sync_create_ptr || KMP_ITT_DEBUG ) {
6529  if ( !KMP_MASTER_TID( tid ) ) {
6530  itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier ); // get correct barrier object
6531  __kmp_itt_barrier_finished( gtid, itt_sync_obj ); // workers call acquired
6532  } // (prepare called inside barrier_release)
6533  }
6534 #endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
6535  KA_TRACE( 10, ( "__kmp_fork_barrier: T#%d(%d:%d) is leaving\n",
6536  gtid, team->t.t_id, tid ));
6537 }
6538 
6539 
6540 /* ------------------------------------------------------------------------ */
6541 /* ------------------------------------------------------------------------ */
6542 
6543 void *
6544 __kmp_launch_thread( kmp_info_t *this_thr )
6545 {
6546  int gtid = this_thr->th.th_info.ds.ds_gtid;
6547 /* void *stack_data;*/
6548  kmp_team_t *(*volatile pteam);
6549 
6550  KMP_MB();
6551  KA_TRACE( 10, ("__kmp_launch_thread: T#%d start\n", gtid ) );
6552 
6553  if( __kmp_env_consistency_check ) {
6554  this_thr -> th.th_cons = __kmp_allocate_cons_stack( gtid ); // ATT: Memory leak?
6555  }
6556 
6557  /* This is the place where threads wait for work */
6558  while( ! TCR_4(__kmp_global.g.g_done) ) {
6559  KMP_DEBUG_ASSERT( this_thr == __kmp_threads[ gtid ] );
6560  KMP_MB();
6561 
6562  /* wait for work to do */
6563  KA_TRACE( 20, ("__kmp_launch_thread: T#%d waiting for work\n", gtid ));
6564 
6565  /* No tid yet since not part of a team */
6566  __kmp_fork_barrier( gtid, KMP_GTID_DNE );
6567 
6568  pteam = (kmp_team_t *(*))(& this_thr->th.th_team);
6569 
6570  /* have we been allocated? */
6571  if ( TCR_SYNC_PTR(*pteam) && !TCR_4(__kmp_global.g.g_done) ) {
6572  /* we were just woken up, so run our new task */
6573  if ( TCR_SYNC_PTR((*pteam)->t.t_pkfn) != NULL ) {
6574  int rc;
6575  KA_TRACE( 20, ("__kmp_launch_thread: T#%d(%d:%d) invoke microtask = %p\n",
6576  gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid), (*pteam)->t.t_pkfn ));
6577 
6578 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
6579  if ( __kmp_inherit_fp_control && (*pteam)->t.t_fp_control_saved ) {
6580  __kmp_clear_x87_fpu_status_word();
6581  __kmp_load_x87_fpu_control_word( &(*pteam)->t.t_x87_fpu_control_word );
6582  __kmp_load_mxcsr( &(*pteam)->t.t_mxcsr );
6583  }
6584 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
6585 
6586  rc = (*pteam) -> t.t_invoke( gtid );
6587  KMP_ASSERT( rc );
6588 
6589  KMP_MB();
6590  KA_TRACE( 20, ("__kmp_launch_thread: T#%d(%d:%d) done microtask = %p\n",
6591  gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid), (*pteam)->t.t_pkfn ));
6592  }
6593 
6594  /* join barrier after parallel region */
6595  __kmp_join_barrier( gtid );
6596  }
6597  }
6598  TCR_SYNC_PTR(__kmp_global.g.g_done);
6599 
6600 #if OMP_30_ENABLED
6601  if ( TCR_PTR( this_thr->th.th_task_team ) != NULL ) {
6602  __kmp_unref_task_team( this_thr->th.th_task_team, this_thr );
6603  }
6604 #endif /* OMP_30_ENABLED */
6605 
6606  /* run the destructors for the threadprivate data for this thread */
6607  __kmp_common_destroy_gtid( gtid );
6608 
6609  KA_TRACE( 10, ("__kmp_launch_thread: T#%d done\n", gtid ) );
6610  KMP_MB();
6611  return this_thr;
6612 }
6613 
6614 /* ------------------------------------------------------------------------ */
6615 /* ------------------------------------------------------------------------ */
6616 
6617 
6618 
6619 void
6620 __kmp_internal_end_dest( void *specific_gtid )
6621 {
6622  #if KMP_COMPILER_ICC
6623  #pragma warning( push )
6624  #pragma warning( disable: 810 ) // conversion from "void *" to "int" may lose significant bits
6625  #endif
6626  // Make sure no significant bits are lost
6627  int gtid = (kmp_intptr_t)specific_gtid - 1;
6628  #if KMP_COMPILER_ICC
6629  #pragma warning( pop )
6630  #endif
6631 
6632  KA_TRACE( 30, ("__kmp_internal_end_dest: T#%d\n", gtid));
6633  /* NOTE: the gtid is stored as gitd+1 in the thread-local-storage
6634  * this is because 0 is reserved for the nothing-stored case */
6635 
6636  /* josh: One reason for setting the gtid specific data even when it is being
6637  destroyed by pthread is to allow gtid lookup through thread specific data
6638  (__kmp_gtid_get_specific). Some of the code, especially stat code,
6639  that gets executed in the call to __kmp_internal_end_thread, actually
6640  gets the gtid through the thread specific data. Setting it here seems
6641  rather inelegant and perhaps wrong, but allows __kmp_internal_end_thread
6642  to run smoothly.
6643  todo: get rid of this after we remove the dependence on
6644  __kmp_gtid_get_specific
6645  */
6646  if(gtid >= 0 && KMP_UBER_GTID(gtid))
6647  __kmp_gtid_set_specific( gtid );
6648  #ifdef KMP_TDATA_GTID
6649  __kmp_gtid = gtid;
6650  #endif
6651  __kmp_internal_end_thread( gtid );
6652 }
6653 
6654 #if KMP_OS_UNIX && GUIDEDLL_EXPORTS
6655 
6656 // 2009-09-08 (lev): It looks the destructor does not work. In simple test cases destructors work
6657 // perfectly, but in real libiomp5.so I have no evidence it is ever called. However, -fini linker
6658 // option in makefile.mk works fine.
6659 
6660 __attribute__(( destructor ))
6661 void
6662 __kmp_internal_end_dtor( void )
6663 {
6664  __kmp_internal_end_atexit();
6665 }
6666 
6667 void
6668 __kmp_internal_end_fini( void )
6669 {
6670  __kmp_internal_end_atexit();
6671 }
6672 
6673 #endif
6674 
6675 /* [Windows] josh: when the atexit handler is called, there may still be more than one thread alive */
6676 void
6677 __kmp_internal_end_atexit( void )
6678 {
6679  KA_TRACE( 30, ( "__kmp_internal_end_atexit\n" ) );
6680  /* [Windows]
6681  josh: ideally, we want to completely shutdown the library in this atexit handler, but
6682  stat code that depends on thread specific data for gtid fails because that data becomes
6683  unavailable at some point during the shutdown, so we call __kmp_internal_end_thread
6684  instead. We should eventually remove the dependency on __kmp_get_specific_gtid in the
6685  stat code and use __kmp_internal_end_library to cleanly shutdown the library.
6686 
6687 // TODO: Can some of this comment about GVS be removed?
6688  I suspect that the offending stat code is executed when the calling thread tries to
6689  clean up a dead root thread's data structures, resulting in GVS code trying to close
6690  the GVS structures for that thread, but since the stat code uses
6691  __kmp_get_specific_gtid to get the gtid with the assumption that the calling thread is
6692  cleaning up itself instead of another thread, it gets confused. This happens because
6693  allowing a thread to unregister and cleanup another thread is a recent modification for
6694  addressing an issue with Maxon Cinema4D. Based on the current design (20050722), a
6695  thread may end up trying to unregister another thread only if thread death does not
6696  trigger the calling of __kmp_internal_end_thread. For Linux* OS, there is the thread
6697  specific data destructor function to detect thread death. For Windows dynamic, there
6698  is DllMain(THREAD_DETACH). For Windows static, there is nothing. Thus, the
6699  workaround is applicable only for Windows static stat library.
6700  */
6701  __kmp_internal_end_library( -1 );
6702  #if KMP_OS_WINDOWS
6703  __kmp_close_console();
6704  #endif
6705 }
6706 
6707 static void
6708 __kmp_reap_thread(
6709  kmp_info_t * thread,
6710  int is_root
6711 ) {
6712 
6713  // It is assumed __kmp_forkjoin_lock is aquired.
6714 
6715  int gtid;
6716 
6717  KMP_DEBUG_ASSERT( thread != NULL );
6718 
6719  gtid = thread->th.th_info.ds.ds_gtid;
6720 
6721  if ( ! is_root ) {
6722 
6723  if ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) {
6724  /* Assume the threads are at the fork barrier here */
6725  KA_TRACE( 20, ("__kmp_reap_thread: releasing T#%d from fork barrier for reap\n", gtid ) );
6726  /* Need release fence here to prevent seg faults for tree forkjoin barrier (GEH) */
6727  __kmp_release(
6728  thread,
6729  &thread->th.th_bar[ bs_forkjoin_barrier ].bb.b_go,
6730  kmp_release_fence
6731  );
6732  }; // if
6733 
6734 
6735  // Terminate OS thread.
6736  __kmp_reap_worker( thread );
6737 
6738  //
6739  // The thread was killed asynchronously. If it was actively
6740  // spinning in the in the thread pool, decrement the global count.
6741  //
6742  // There is a small timing hole here - if the worker thread was
6743  // just waking up after sleeping in the pool, had reset it's
6744  // th_active_in_pool flag but not decremented the global counter
6745  // __kmp_thread_pool_active_nth yet, then the global counter
6746  // might not get updated.
6747  //
6748  // Currently, this can only happen as the library is unloaded,
6749  // so there are no harmful side effects.
6750  //
6751  if ( thread->th.th_active_in_pool ) {
6752  thread->th.th_active_in_pool = FALSE;
6753  KMP_TEST_THEN_DEC32(
6754  (kmp_int32 *) &__kmp_thread_pool_active_nth );
6755  KMP_DEBUG_ASSERT( TCR_4(__kmp_thread_pool_active_nth) >= 0 );
6756  }
6757 
6758  // Decrement # of [worker] threads in the pool.
6759  KMP_DEBUG_ASSERT( __kmp_thread_pool_nth > 0 );
6760  --__kmp_thread_pool_nth;
6761  }; // if
6762 
6763  // Free the fast memory for tasking
6764  #if USE_FAST_MEMORY
6765  __kmp_free_fast_memory( thread );
6766  #endif /* USE_FAST_MEMORY */
6767 
6768  __kmp_suspend_uninitialize_thread( thread );
6769 
6770  KMP_DEBUG_ASSERT( __kmp_threads[ gtid ] == thread );
6771  TCW_SYNC_PTR(__kmp_threads[gtid], NULL);
6772 
6773  -- __kmp_all_nth;
6774  // __kmp_nth was decremented when thread is added to the pool.
6775 
6776 #ifdef KMP_ADJUST_BLOCKTIME
6777  /* Adjust blocktime back to user setting or default if necessary */
6778  /* Middle initialization might never have ocurred */
6779  if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
6780  KMP_DEBUG_ASSERT( __kmp_avail_proc > 0 );
6781  if ( __kmp_nth <= __kmp_avail_proc ) {
6782  __kmp_zero_bt = FALSE;
6783  }
6784  }
6785 #endif /* KMP_ADJUST_BLOCKTIME */
6786 
6787  /* free the memory being used */
6788  if( __kmp_env_consistency_check ) {
6789  if ( thread->th.th_cons ) {
6790  __kmp_free_cons_stack( thread->th.th_cons );
6791  thread->th.th_cons = NULL;
6792  }; // if
6793  }
6794 
6795  if ( thread->th.th_pri_common != NULL ) {
6796  __kmp_free( thread->th.th_pri_common );
6797  thread->th.th_pri_common = NULL;
6798  }; // if
6799 
6800  #if KMP_USE_BGET
6801  if ( thread->th.th_local.bget_data != NULL ) {
6802  __kmp_finalize_bget( thread );
6803  }; // if
6804  #endif
6805 
6806 #if (KMP_OS_WINDOWS || KMP_OS_LINUX)
6807  if ( thread->th.th_affin_mask != NULL ) {
6808  KMP_CPU_FREE( thread->th.th_affin_mask );
6809  thread->th.th_affin_mask = NULL;
6810  }; // if
6811 #endif /* (KMP_OS_WINDOWS || KMP_OS_LINUX) */
6812 
6813  __kmp_reap_team( thread->th.th_serial_team );
6814  thread->th.th_serial_team = NULL;
6815  __kmp_free( thread );
6816 
6817  KMP_MB();
6818 
6819 } // __kmp_reap_thread
6820 
6821 static void
6822 __kmp_internal_end(void)
6823 {
6824  int i;
6825 
6826  /* First, unregister the library */
6827  __kmp_unregister_library();
6828 
6829  #if KMP_OS_WINDOWS
6830  /* In Win static library, we can't tell when a root actually dies, so we
6831  reclaim the data structures for any root threads that have died but not
6832  unregistered themselves, in order to shut down cleanly.
6833  In Win dynamic library we also can't tell when a thread dies.
6834  */
6835  __kmp_reclaim_dead_roots(); // AC: moved here to always clean resources of dead roots
6836  #endif
6837 
6838  for( i=0 ; i<__kmp_threads_capacity ; i++ )
6839  if( __kmp_root[i] )
6840  if( __kmp_root[i] -> r.r_active )
6841  break;
6842  KMP_MB(); /* Flush all pending memory write invalidates. */
6843  TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6844 
6845  if ( i < __kmp_threads_capacity ) {
6846  // 2009-09-08 (lev): Other alive roots found. Why do we kill the monitor??
6847  KMP_MB(); /* Flush all pending memory write invalidates. */
6848 
6849  //
6850  // Need to check that monitor was initialized before reaping it.
6851  // If we are called form __kmp_atfork_child (which sets
6852  // __kmp_init_parallel = 0), then __kmp_monitor will appear to
6853  // contain valid data, but it is only valid in the parent process,
6854  // not the child.
6855  //
6856  // One of the possible fixes for CQ138434 / CQ140126
6857  // (used in 20091103_dreamworks patch)
6858  //
6859  // New behavior (201008): instead of keying off of the flag
6860  // __kmp_init_parallel, the monitor thread creation is keyed off
6861  // of the new flag __kmp_init_monitor.
6862  //
6863  __kmp_acquire_bootstrap_lock( & __kmp_monitor_lock );
6864  if ( TCR_4( __kmp_init_monitor ) ) {
6865  __kmp_reap_monitor( & __kmp_monitor );
6866  TCW_4( __kmp_init_monitor, 0 );
6867  }
6868  __kmp_release_bootstrap_lock( & __kmp_monitor_lock );
6869  KA_TRACE( 10, ("__kmp_internal_end: monitor reaped\n" ) );
6870  } else {
6871  /* TODO move this to cleanup code */
6872  #ifdef KMP_DEBUG
6873  /* make sure that everything has properly ended */
6874  for ( i = 0; i < __kmp_threads_capacity; i++ ) {
6875  if( __kmp_root[i] ) {
6876  KMP_ASSERT( ! KMP_UBER_GTID( i ) );
6877  KMP_ASSERT( ! __kmp_root[i] -> r.r_active );
6878  }
6879  }
6880  #endif
6881 
6882  KMP_MB();
6883 
6884  // Reap the worker threads.
6885  // This is valid for now, but be careful if threads are reaped sooner.
6886  while ( __kmp_thread_pool != NULL ) { // Loop thru all the thread in the pool.
6887  // Get the next thread from the pool.
6888  kmp_info_t * thread = (kmp_info_t *) __kmp_thread_pool;
6889  __kmp_thread_pool = thread->th.th_next_pool;
6890  // Reap it.
6891  thread->th.th_next_pool = NULL;
6892  thread->th.th_in_pool = FALSE;
6893  __kmp_reap_thread( thread, 0 );
6894  }; // while
6895  __kmp_thread_pool_insert_pt = NULL;
6896 
6897  // Reap teams.
6898  while ( __kmp_team_pool != NULL ) { // Loop thru all the teams in the pool.
6899  // Get the next team from the pool.
6900  kmp_team_t * team = (kmp_team_t *) __kmp_team_pool;
6901  __kmp_team_pool = team->t.t_next_pool;
6902  // Reap it.
6903  team->t.t_next_pool = NULL;
6904  __kmp_reap_team( team );
6905  }; // while
6906 
6907  #if OMP_30_ENABLED
6908  __kmp_reap_task_teams( );
6909  #endif /* OMP_30_ENABLED */
6910 
6911  for ( i = 0; i < __kmp_threads_capacity; ++ i ) {
6912  // TBD: Add some checking...
6913  // Something like KMP_DEBUG_ASSERT( __kmp_thread[ i ] == NULL );
6914  }
6915 
6916  /* Make sure all threadprivate destructors get run by joining with all worker
6917  threads before resetting this flag */
6918  TCW_SYNC_4(__kmp_init_common, FALSE);
6919 
6920  KA_TRACE( 10, ("__kmp_internal_end: all workers reaped\n" ) );
6921  KMP_MB();
6922 
6923  //
6924  // See note above: One of the possible fixes for CQ138434 / CQ140126
6925  //
6926  // FIXME: push both code fragments down and CSE them?
6927  // push them into __kmp_cleanup() ?
6928  //
6929  __kmp_acquire_bootstrap_lock( & __kmp_monitor_lock );
6930  if ( TCR_4( __kmp_init_monitor ) ) {
6931  __kmp_reap_monitor( & __kmp_monitor );
6932  TCW_4( __kmp_init_monitor, 0 );
6933  }
6934  __kmp_release_bootstrap_lock( & __kmp_monitor_lock );
6935  KA_TRACE( 10, ("__kmp_internal_end: monitor reaped\n" ) );
6936 
6937  } /* else !__kmp_global.t_active */
6938  TCW_4(__kmp_init_gtid, FALSE);
6939  KMP_MB(); /* Flush all pending memory write invalidates. */
6940 
6941 
6942  __kmp_cleanup();
6943 }
6944 
6945 void
6946 __kmp_internal_end_library( int gtid_req )
6947 {
6948  int i;
6949 
6950  /* if we have already cleaned up, don't try again, it wouldn't be pretty */
6951  /* this shouldn't be a race condition because __kmp_internal_end() is the
6952  * only place to clear __kmp_serial_init */
6953  /* we'll check this later too, after we get the lock */
6954  // 2009-09-06: We do not set g_abort without setting g_done. This check looks redundaant,
6955  // because the next check will work in any case.
6956  if( __kmp_global.g.g_abort ) {
6957  KA_TRACE( 11, ("__kmp_internal_end_library: abort, exiting\n" ));
6958  /* TODO abort? */
6959  return;
6960  }
6961  if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) {
6962  KA_TRACE( 10, ("__kmp_internal_end_library: already finished\n" ));
6963  return;
6964  }
6965 
6966 
6967  KMP_MB(); /* Flush all pending memory write invalidates. */
6968 
6969  /* find out who we are and what we should do */
6970  {
6971  int gtid = (gtid_req>=0) ? gtid_req : __kmp_gtid_get_specific();
6972  KA_TRACE( 10, ("__kmp_internal_end_library: enter T#%d (%d)\n", gtid, gtid_req ));
6973  if( gtid == KMP_GTID_SHUTDOWN ) {
6974  KA_TRACE( 10, ("__kmp_internal_end_library: !__kmp_init_runtime, system already shutdown\n" ));
6975  return;
6976  } else if( gtid == KMP_GTID_MONITOR ) {
6977  KA_TRACE( 10, ("__kmp_internal_end_library: monitor thread, gtid not registered, or system shutdown\n" ));
6978  return;
6979  } else if( gtid == KMP_GTID_DNE ) {
6980  KA_TRACE( 10, ("__kmp_internal_end_library: gtid not registered or system shutdown\n" ));
6981  /* we don't know who we are, but we may still shutdown the library */
6982  } else if( KMP_UBER_GTID( gtid )) {
6983  /* unregister ourselves as an uber thread. gtid is no longer valid */
6984  if( __kmp_root[gtid] -> r.r_active ) {
6985  __kmp_global.g.g_abort = -1;
6986  TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6987  KA_TRACE( 10, ("__kmp_internal_end_library: root still active, abort T#%d\n", gtid ));
6988  return;
6989  } else {
6990  KA_TRACE( 10, ("__kmp_internal_end_library: unregistering sibling T#%d\n", gtid ));
6991  __kmp_unregister_root_current_thread( gtid );
6992  }
6993  } else {
6994  /* worker threads may call this function through the atexit handler, if they call exit() */
6995  /* For now, skip the usual subsequent processing and just dump the debug buffer.
6996  TODO: do a thorough shutdown instead
6997  */
6998  #ifdef DUMP_DEBUG_ON_EXIT
6999  if ( __kmp_debug_buf )
7000  __kmp_dump_debug_buffer( );
7001  #endif
7002  return;
7003  }
7004  }
7005  /* synchronize the termination process */
7006  __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
7007 
7008  /* have we already finished */
7009  if( __kmp_global.g.g_abort ) {
7010  KA_TRACE( 10, ("__kmp_internal_end_library: abort, exiting\n" ));
7011  /* TODO abort? */
7012  __kmp_release_bootstrap_lock( &__kmp_initz_lock );
7013  return;
7014  }
7015  if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) {
7016  __kmp_release_bootstrap_lock( &__kmp_initz_lock );
7017  return;
7018  }
7019 
7020  /* We need this lock to enforce mutex between this reading of
7021  __kmp_threads_capacity and the writing by __kmp_register_root.
7022  Alternatively, we can use a counter of roots that is
7023  atomically updated by __kmp_get_global_thread_id_reg,
7024  __kmp_do_serial_initialize and __kmp_internal_end_*.
7025  */
7026  __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
7027 
7028  /* now we can safely conduct the actual termination */
7029  __kmp_internal_end();
7030 
7031  __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
7032  __kmp_release_bootstrap_lock( &__kmp_initz_lock );
7033 
7034  KA_TRACE( 10, ("__kmp_internal_end_library: exit\n" ) );
7035 
7036  #ifdef DUMP_DEBUG_ON_EXIT
7037  if ( __kmp_debug_buf )
7038  __kmp_dump_debug_buffer();
7039  #endif
7040 
7041  #if KMP_OS_WINDOWS
7042  __kmp_close_console();
7043  #endif
7044 
7045  __kmp_fini_allocator();
7046 
7047 } // __kmp_internal_end_library
7048 
7049 void
7050 __kmp_internal_end_thread( int gtid_req )
7051 {
7052  int i;
7053 
7054  /* if we have already cleaned up, don't try again, it wouldn't be pretty */
7055  /* this shouldn't be a race condition because __kmp_internal_end() is the
7056  * only place to clear __kmp_serial_init */
7057  /* we'll check this later too, after we get the lock */
7058  // 2009-09-06: We do not set g_abort without setting g_done. This check looks redundant,
7059  // because the next check will work in any case.
7060  if( __kmp_global.g.g_abort ) {
7061  KA_TRACE( 11, ("__kmp_internal_end_thread: abort, exiting\n" ));
7062  /* TODO abort? */
7063  return;
7064  }
7065  if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) {
7066  KA_TRACE( 10, ("__kmp_internal_end_thread: already finished\n" ));
7067  return;
7068  }
7069 
7070  KMP_MB(); /* Flush all pending memory write invalidates. */
7071 
7072  /* find out who we are and what we should do */
7073  {
7074  int gtid = (gtid_req>=0) ? gtid_req : __kmp_gtid_get_specific();
7075  KA_TRACE( 10, ("__kmp_internal_end_thread: enter T#%d (%d)\n", gtid, gtid_req ));
7076  if( gtid == KMP_GTID_SHUTDOWN ) {
7077  KA_TRACE( 10, ("__kmp_internal_end_thread: !__kmp_init_runtime, system already shutdown\n" ));
7078  return;
7079  } else if( gtid == KMP_GTID_MONITOR ) {
7080  KA_TRACE( 10, ("__kmp_internal_end_thread: monitor thread, gtid not registered, or system shutdown\n" ));
7081  return;
7082  } else if( gtid == KMP_GTID_DNE ) {
7083  KA_TRACE( 10, ("__kmp_internal_end_thread: gtid not registered or system shutdown\n" ));
7084  return;
7085  /* we don't know who we are */
7086  } else if( KMP_UBER_GTID( gtid )) {
7087  /* unregister ourselves as an uber thread. gtid is no longer valid */
7088  if( __kmp_root[gtid] -> r.r_active ) {
7089  __kmp_global.g.g_abort = -1;
7090  TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
7091  KA_TRACE( 10, ("__kmp_internal_end_thread: root still active, abort T#%d\n", gtid ));
7092  return;
7093  } else {
7094  KA_TRACE( 10, ("__kmp_internal_end_thread: unregistering sibling T#%d\n", gtid ));
7095  __kmp_unregister_root_current_thread( gtid );
7096  }
7097  } else {
7098  /* just a worker thread, let's leave */
7099  KA_TRACE( 10, ("__kmp_internal_end_thread: worker thread T#%d\n", gtid ));
7100 
7101  #if OMP_30_ENABLED
7102  if ( gtid >= 0 ) {
7103  kmp_info_t *this_thr = __kmp_threads[ gtid ];
7104  if (TCR_PTR(this_thr->th.th_task_team) != NULL) {
7105  __kmp_unref_task_team(this_thr->th.th_task_team, this_thr);
7106  }
7107  }
7108  #endif /* OMP_30_ENABLED */
7109 
7110  KA_TRACE( 10, ("__kmp_internal_end_thread: worker thread done, exiting T#%d\n", gtid ));
7111  return;
7112  }
7113  }
7114  #if defined GUIDEDLL_EXPORTS
7115  // AC: lets not shutdown the Linux* OS dynamic library at the exit of uber thread,
7116  // because we will better shutdown later in the library destructor.
7117  // The reason of this change is performance problem when non-openmp thread
7118  // in a loop forks and joins many openmp threads. We can save a lot of time
7119  // keeping worker threads alive until the program shutdown.
7120  // OM: Removed Linux* OS restriction to fix the crash on OS X* (DPD200239966) and
7121  // Windows(DPD200287443) that occurs when using critical sections from foreign threads.
7122  KA_TRACE( 10, ("__kmp_internal_end_thread: exiting\n") );
7123  return;
7124  #endif
7125  /* synchronize the termination process */
7126  __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
7127 
7128  /* have we already finished */
7129  if( __kmp_global.g.g_abort ) {
7130  KA_TRACE( 10, ("__kmp_internal_end_thread: abort, exiting\n" ));
7131  /* TODO abort? */
7132  __kmp_release_bootstrap_lock( &__kmp_initz_lock );
7133  return;
7134  }
7135  if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) {
7136  __kmp_release_bootstrap_lock( &__kmp_initz_lock );
7137  return;
7138  }
7139 
7140  /* We need this lock to enforce mutex between this reading of
7141  __kmp_threads_capacity and the writing by __kmp_register_root.
7142  Alternatively, we can use a counter of roots that is
7143  atomically updated by __kmp_get_global_thread_id_reg,
7144  __kmp_do_serial_initialize and __kmp_internal_end_*.
7145  */
7146 
7147  /* should we finish the run-time? are all siblings done? */
7148  __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
7149 
7150  for ( i = 0; i < __kmp_threads_capacity; ++ i ) {
7151  if ( KMP_UBER_GTID( i ) ) {
7152  KA_TRACE( 10, ("__kmp_internal_end_thread: remaining sibling task: gtid==%d\n", i ));
7153  __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
7154  __kmp_release_bootstrap_lock( &__kmp_initz_lock );
7155  return;
7156  };
7157  }
7158 
7159  /* now we can safely conduct the actual termination */
7160 
7161  __kmp_internal_end();
7162 
7163  __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
7164  __kmp_release_bootstrap_lock( &__kmp_initz_lock );
7165 
7166  KA_TRACE( 10, ("__kmp_internal_end_thread: exit\n" ) );
7167 
7168  #ifdef DUMP_DEBUG_ON_EXIT
7169  if ( __kmp_debug_buf )
7170  __kmp_dump_debug_buffer();
7171  #endif
7172 } // __kmp_internal_end_thread
7173 
7174 // -------------------------------------------------------------------------------------------------
7175 // Library registration stuff.
7176 
7177 static long __kmp_registration_flag = 0;
7178  // Random value used to indicate library initialization.
7179 static char * __kmp_registration_str = NULL;
7180  // Value to be saved in env var __KMP_REGISTERED_LIB_<pid>.
7181 
7182 
7183 static inline
7184 char *
7185 __kmp_reg_status_name() {
7186  /*
7187  On RHEL 3u5 if linked statically, getpid() returns different values in each thread.
7188  If registration and unregistration go in different threads (omp_misc_other_root_exit.cpp test case),
7189  the name of registered_lib_env env var can not be found, because the name will contain different pid.
7190  */
7191  return __kmp_str_format( "__KMP_REGISTERED_LIB_%d", (int) getpid() );
7192 } // __kmp_reg_status_get
7193 
7194 
7195 void
7196 __kmp_register_library_startup(
7197  void
7198 ) {
7199 
7200  char * name = __kmp_reg_status_name(); // Name of the environment variable.
7201  int done = 0;
7202  union {
7203  double dtime;
7204  long ltime;
7205  } time;
7206  #if KMP_OS_WINDOWS
7207  __kmp_initialize_system_tick();
7208  #endif
7209  __kmp_read_system_time( & time.dtime );
7210  __kmp_registration_flag = 0xCAFE0000L | ( time.ltime & 0x0000FFFFL );
7211  __kmp_registration_str =
7212  __kmp_str_format(
7213  "%p-%lx-%s",
7214  & __kmp_registration_flag,
7215  __kmp_registration_flag,
7216  KMP_LIBRARY_FILE
7217  );
7218 
7219  KA_TRACE( 50, ( "__kmp_register_library_startup: %s=\"%s\"\n", name, __kmp_registration_str ) );
7220 
7221  while ( ! done ) {
7222 
7223  char * value = NULL; // Actual value of the environment variable.
7224 
7225  // Set environment variable, but do not overwrite if it is exist.
7226  __kmp_env_set( name, __kmp_registration_str, 0 );
7227  // Check the variable is written.
7228  value = __kmp_env_get( name );
7229  if ( value != NULL && strcmp( value, __kmp_registration_str ) == 0 ) {
7230 
7231  done = 1; // Ok, environment variable set successfully, exit the loop.
7232 
7233  } else {
7234 
7235  // Oops. Write failed. Another copy of OpenMP RTL is in memory.
7236  // Check whether it alive or dead.
7237  int neighbor = 0; // 0 -- unknown status, 1 -- alive, 2 -- dead.
7238  char * tail = value;
7239  char * flag_addr_str = NULL;
7240  char * flag_val_str = NULL;
7241  char const * file_name = NULL;
7242  __kmp_str_split( tail, '-', & flag_addr_str, & tail );
7243  __kmp_str_split( tail, '-', & flag_val_str, & tail );
7244  file_name = tail;
7245  if ( tail != NULL ) {
7246  long * flag_addr = 0;
7247  long flag_val = 0;
7248  sscanf( flag_addr_str, "%p", & flag_addr );
7249  sscanf( flag_val_str, "%lx", & flag_val );
7250  if ( flag_addr != 0 && flag_val != 0 && strcmp( file_name, "" ) != 0 ) {
7251  // First, check whether environment-encoded address is mapped into addr space.
7252  // If so, dereference it to see if it still has the right value.
7253 
7254  if ( __kmp_is_address_mapped( flag_addr ) && * flag_addr == flag_val ) {
7255  neighbor = 1;
7256  } else {
7257  // If not, then we know the other copy of the library is no longer running.
7258  neighbor = 2;
7259  }; // if
7260  }; // if
7261  }; // if
7262  switch ( neighbor ) {
7263  case 0 : // Cannot parse environment variable -- neighbor status unknown.
7264  // Assume it is the incompatible format of future version of the library.
7265  // Assume the other library is alive.
7266  // WARN( ... ); // TODO: Issue a warning.
7267  file_name = "unknown library";
7268  // Attention! Falling to the next case. That's intentional.
7269  case 1 : { // Neighbor is alive.
7270  // Check it is allowed.
7271  char * duplicate_ok = __kmp_env_get( "KMP_DUPLICATE_LIB_OK" );
7272  if ( ! __kmp_str_match_true( duplicate_ok ) ) {
7273  // That's not allowed. Issue fatal error.
7274  __kmp_msg(
7275  kmp_ms_fatal,
7276  KMP_MSG( DuplicateLibrary, KMP_LIBRARY_FILE, file_name ),
7277  KMP_HNT( DuplicateLibrary ),
7278  __kmp_msg_null
7279  );
7280  }; // if
7281  KMP_INTERNAL_FREE( duplicate_ok );
7282  __kmp_duplicate_library_ok = 1;
7283  done = 1; // Exit the loop.
7284  } break;
7285  case 2 : { // Neighbor is dead.
7286  // Clear the variable and try to register library again.
7287  __kmp_env_unset( name );
7288  } break;
7289  default : {
7290  KMP_DEBUG_ASSERT( 0 );
7291  } break;
7292  }; // switch
7293 
7294  }; // if
7295  KMP_INTERNAL_FREE( (void *) value );
7296 
7297  }; // while
7298  KMP_INTERNAL_FREE( (void *) name );
7299 
7300 } // func __kmp_register_library_startup
7301 
7302 
7303 void
7304 __kmp_unregister_library( void ) {
7305 
7306  char * name = __kmp_reg_status_name();
7307  char * value = __kmp_env_get( name );
7308 
7309  KMP_DEBUG_ASSERT( __kmp_registration_flag != 0 );
7310  KMP_DEBUG_ASSERT( __kmp_registration_str != NULL );
7311  if ( value != NULL && strcmp( value, __kmp_registration_str ) == 0 ) {
7312  // Ok, this is our variable. Delete it.
7313  __kmp_env_unset( name );
7314  }; // if
7315 
7316  KMP_INTERNAL_FREE( __kmp_registration_str );
7317  KMP_INTERNAL_FREE( value );
7318  KMP_INTERNAL_FREE( name );
7319 
7320  __kmp_registration_flag = 0;
7321  __kmp_registration_str = NULL;
7322 
7323 } // __kmp_unregister_library
7324 
7325 
7326 // End of Library registration stuff.
7327 // -------------------------------------------------------------------------------------------------
7328 
7329 static void
7330 __kmp_do_serial_initialize( void )
7331 {
7332  int i, gtid;
7333  int size;
7334 
7335  KA_TRACE( 10, ("__kmp_serial_initialize: enter\n" ) );
7336 
7337  KMP_DEBUG_ASSERT( sizeof( kmp_int32 ) == 4 );
7338  KMP_DEBUG_ASSERT( sizeof( kmp_uint32 ) == 4 );
7339  KMP_DEBUG_ASSERT( sizeof( kmp_int64 ) == 8 );
7340  KMP_DEBUG_ASSERT( sizeof( kmp_uint64 ) == 8 );
7341  KMP_DEBUG_ASSERT( sizeof( kmp_intptr_t ) == sizeof( void * ) );
7342 
7343  __kmp_validate_locks();
7344 
7345  /* Initialize internal memory allocator */
7346  __kmp_init_allocator();
7347 
7348  /* Register the library startup via an environment variable
7349  and check to see whether another copy of the library is already
7350  registered. */
7351 
7352  __kmp_register_library_startup( );
7353 
7354  /* TODO reinitialization of library */
7355  if( TCR_4(__kmp_global.g.g_done) ) {
7356  KA_TRACE( 10, ("__kmp_do_serial_initialize: reinitialization of library\n" ) );
7357  }
7358 
7359  __kmp_global.g.g_abort = 0;
7360  TCW_SYNC_4(__kmp_global.g.g_done, FALSE);
7361 
7362  /* initialize the locks */
7363 #if KMP_USE_ADAPTIVE_LOCKS
7364 #if KMP_DEBUG_ADAPTIVE_LOCKS
7365  __kmp_init_speculative_stats();
7366 #endif
7367 #endif
7368  __kmp_init_lock( & __kmp_global_lock );
7369  __kmp_init_queuing_lock( & __kmp_dispatch_lock );
7370  __kmp_init_lock( & __kmp_debug_lock );
7371  __kmp_init_atomic_lock( & __kmp_atomic_lock );
7372  __kmp_init_atomic_lock( & __kmp_atomic_lock_1i );
7373  __kmp_init_atomic_lock( & __kmp_atomic_lock_2i );
7374  __kmp_init_atomic_lock( & __kmp_atomic_lock_4i );
7375  __kmp_init_atomic_lock( & __kmp_atomic_lock_4r );
7376  __kmp_init_atomic_lock( & __kmp_atomic_lock_8i );
7377  __kmp_init_atomic_lock( & __kmp_atomic_lock_8r );
7378  __kmp_init_atomic_lock( & __kmp_atomic_lock_8c );
7379  __kmp_init_atomic_lock( & __kmp_atomic_lock_10r );
7380  __kmp_init_atomic_lock( & __kmp_atomic_lock_16r );
7381  __kmp_init_atomic_lock( & __kmp_atomic_lock_16c );
7382  __kmp_init_atomic_lock( & __kmp_atomic_lock_20c );
7383  __kmp_init_atomic_lock( & __kmp_atomic_lock_32c );
7384  __kmp_init_bootstrap_lock( & __kmp_forkjoin_lock );
7385  __kmp_init_bootstrap_lock( & __kmp_exit_lock );
7386  __kmp_init_bootstrap_lock( & __kmp_monitor_lock );
7387  __kmp_init_bootstrap_lock( & __kmp_tp_cached_lock );
7388 
7389  /* conduct initialization and initial setup of configuration */
7390 
7391  __kmp_runtime_initialize();
7392 
7393  // Some global variable initialization moved here from kmp_env_initialize()
7394 #ifdef KMP_DEBUG
7395  kmp_diag = 0;
7396 #endif
7397  __kmp_abort_delay = 0;
7398 
7399  // From __kmp_init_dflt_team_nth()
7400  /* assume the entire machine will be used */
7401  __kmp_dflt_team_nth_ub = __kmp_xproc;
7402  if( __kmp_dflt_team_nth_ub < KMP_MIN_NTH ) {
7403  __kmp_dflt_team_nth_ub = KMP_MIN_NTH;
7404  }
7405  if( __kmp_dflt_team_nth_ub > __kmp_sys_max_nth ) {
7406  __kmp_dflt_team_nth_ub = __kmp_sys_max_nth;
7407  }
7408  __kmp_max_nth = __kmp_sys_max_nth;
7409 
7410  // Three vars below moved here from __kmp_env_initialize() "KMP_BLOCKTIME" part
7411  __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME;
7412  __kmp_monitor_wakeups = KMP_WAKEUPS_FROM_BLOCKTIME( __kmp_dflt_blocktime, __kmp_monitor_wakeups );
7413  __kmp_bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME( __kmp_dflt_blocktime, __kmp_monitor_wakeups );
7414  // From "KMP_LIBRARY" part of __kmp_env_initialize()
7415  __kmp_library = library_throughput;
7416  // From KMP_SCHEDULE initialization
7417  __kmp_static = kmp_sch_static_balanced;
7418  // AC: do not use analytical here, because it is non-monotonous
7419  //__kmp_guided = kmp_sch_guided_iterative_chunked;
7420  #if OMP_30_ENABLED
7421  //__kmp_auto = kmp_sch_guided_analytical_chunked; // AC: it is the default, no need to repeate assignment
7422  #endif // OMP_30_ENABLED
7423  // Barrier initialization. Moved here from __kmp_env_initialize() Barrier branch bit control and barrier method
7424  // control parts
7425  #if KMP_FAST_REDUCTION_BARRIER
7426  #define kmp_reduction_barrier_gather_bb ((int)1)
7427  #define kmp_reduction_barrier_release_bb ((int)1)
7428  #define kmp_reduction_barrier_gather_pat bp_hyper_bar
7429  #define kmp_reduction_barrier_release_pat bp_hyper_bar
7430  #endif // KMP_FAST_REDUCTION_BARRIER
7431  for ( i=bs_plain_barrier; i<bs_last_barrier; i++ ) {
7432  __kmp_barrier_gather_branch_bits [ i ] = __kmp_barrier_gather_bb_dflt;
7433  __kmp_barrier_release_branch_bits[ i ] = __kmp_barrier_release_bb_dflt;
7434  __kmp_barrier_gather_pattern [ i ] = __kmp_barrier_gather_pat_dflt;
7435  __kmp_barrier_release_pattern[ i ] = __kmp_barrier_release_pat_dflt;
7436  #if KMP_FAST_REDUCTION_BARRIER
7437  if( i == bs_reduction_barrier ) { // tested and confirmed on ALTIX only ( lin_64 ): hyper,1
7438  __kmp_barrier_gather_branch_bits [ i ] = kmp_reduction_barrier_gather_bb;
7439  __kmp_barrier_release_branch_bits[ i ] = kmp_reduction_barrier_release_bb;
7440  __kmp_barrier_gather_pattern [ i ] = kmp_reduction_barrier_gather_pat;
7441  __kmp_barrier_release_pattern[ i ] = kmp_reduction_barrier_release_pat;
7442  }
7443  #endif // KMP_FAST_REDUCTION_BARRIER
7444  }
7445  #if KMP_FAST_REDUCTION_BARRIER
7446  #undef kmp_reduction_barrier_release_pat
7447  #undef kmp_reduction_barrier_gather_pat
7448  #undef kmp_reduction_barrier_release_bb
7449  #undef kmp_reduction_barrier_gather_bb
7450  #endif // KMP_FAST_REDUCTION_BARRIER
7451  #if KMP_MIC
7452  // AC: plane=3,2, forkjoin=2,1 are optimal for 240 threads on KNC
7453  __kmp_barrier_gather_branch_bits [ 0 ] = 3; // plane gather
7454  __kmp_barrier_release_branch_bits[ 1 ] = 1; // forkjoin release
7455  #endif
7456 
7457  // From KMP_CHECKS initialization
7458 #ifdef KMP_DEBUG
7459  __kmp_env_checks = TRUE; /* development versions have the extra checks */
7460 #else
7461  __kmp_env_checks = FALSE; /* port versions do not have the extra checks */
7462 #endif
7463 
7464  // From "KMP_FOREIGN_THREADS_THREADPRIVATE" initialization
7465  __kmp_foreign_tp = TRUE;
7466 
7467  __kmp_global.g.g_dynamic = FALSE;
7468  __kmp_global.g.g_dynamic_mode = dynamic_default;
7469 
7470  __kmp_env_initialize( NULL );
7471  // Print all messages in message catalog for testing purposes.
7472  #ifdef KMP_DEBUG
7473  char const * val = __kmp_env_get( "KMP_DUMP_CATALOG" );
7474  if ( __kmp_str_match_true( val ) ) {
7475  kmp_str_buf_t buffer;
7476  __kmp_str_buf_init( & buffer );
7477  __kmp_i18n_dump_catalog( & buffer );
7478  __kmp_printf( "%s", buffer.str );
7479  __kmp_str_buf_free( & buffer );
7480  }; // if
7481  __kmp_env_free( & val );
7482  #endif
7483 
7484  __kmp_threads_capacity = __kmp_initial_threads_capacity( __kmp_dflt_team_nth_ub );
7485  // Moved here from __kmp_env_initialize() "KMP_ALL_THREADPRIVATE" part
7486  __kmp_tp_capacity = __kmp_default_tp_capacity(__kmp_dflt_team_nth_ub, __kmp_max_nth, __kmp_allThreadsSpecified);
7487 
7488 
7489  // If the library is shut down properly, both pools must be NULL. Just in case, set them
7490  // to NULL -- some memory may leak, but subsequent code will work even if pools are not freed.
7491  KMP_DEBUG_ASSERT( __kmp_thread_pool == NULL );
7492  KMP_DEBUG_ASSERT( __kmp_thread_pool_insert_pt == NULL );
7493  KMP_DEBUG_ASSERT( __kmp_team_pool == NULL );
7494  __kmp_thread_pool = NULL;
7495  __kmp_thread_pool_insert_pt = NULL;
7496  __kmp_team_pool = NULL;
7497 
7498  /* Allocate all of the variable sized records */
7499  /* NOTE: __kmp_threads_capacity entries are allocated, but the arrays are expandable */
7500  /* Since allocation is cache-aligned, just add extra padding at the end */
7501  size = (sizeof(kmp_info_t*) + sizeof(kmp_root_t*))*__kmp_threads_capacity + CACHE_LINE;
7502  __kmp_threads = (kmp_info_t**) __kmp_allocate( size );
7503  __kmp_root = (kmp_root_t**) ((char*)__kmp_threads + sizeof(kmp_info_t*) * __kmp_threads_capacity );
7504 
7505  /* init thread counts */
7506  KMP_DEBUG_ASSERT( __kmp_all_nth == 0 ); // Asserts fail if the library is reinitializing and
7507  KMP_DEBUG_ASSERT( __kmp_nth == 0 ); // something was wrong in termination.
7508  __kmp_all_nth = 0;
7509  __kmp_nth = 0;
7510 
7511  /* setup the uber master thread and hierarchy */
7512  gtid = __kmp_register_root( TRUE );
7513  KA_TRACE( 10, ("__kmp_do_serial_initialize T#%d\n", gtid ));
7514  KMP_ASSERT( KMP_UBER_GTID( gtid ) );
7515  KMP_ASSERT( KMP_INITIAL_GTID( gtid ) );
7516 
7517  KMP_MB(); /* Flush all pending memory write invalidates. */
7518 
7519  __kmp_common_initialize();
7520 
7521  #if KMP_OS_UNIX
7522  /* invoke the child fork handler */
7523  __kmp_register_atfork();
7524  #endif
7525 
7526  #if ! defined GUIDEDLL_EXPORTS
7527  {
7528  /* Invoke the exit handler when the program finishes, only for static library.
7529  For dynamic library, we already have _fini and DllMain.
7530  */
7531  int rc = atexit( __kmp_internal_end_atexit );
7532  if ( rc != 0 ) {
7533  __kmp_msg( kmp_ms_fatal, KMP_MSG( FunctionError, "atexit()" ), KMP_ERR( rc ), __kmp_msg_null );
7534  }; // if
7535  }
7536  #endif
7537 
7538  #if KMP_HANDLE_SIGNALS
7539  #if KMP_OS_UNIX
7540  /* NOTE: make sure that this is called before the user installs
7541  * their own signal handlers so that the user handlers
7542  * are called first. this way they can return false,
7543  * not call our handler, avoid terminating the library,
7544  * and continue execution where they left off. */
7545  __kmp_install_signals( FALSE );
7546  #endif /* KMP_OS_UNIX */
7547  #if KMP_OS_WINDOWS
7548  __kmp_install_signals( TRUE );
7549  #endif /* KMP_OS_WINDOWS */
7550  #endif
7551 
7552  /* we have finished the serial initialization */
7553  __kmp_init_counter ++;
7554 
7555  __kmp_init_serial = TRUE;
7556 
7557  if (__kmp_settings) {
7558  __kmp_env_print();
7559  }
7560 
7561 #if OMP_40_ENABLED
7562  if (__kmp_display_env || __kmp_display_env_verbose) {
7563  __kmp_env_print_2();
7564  }
7565 #endif // OMP_40_ENABLED
7566 
7567  KMP_MB();
7568 
7569  KA_TRACE( 10, ("__kmp_do_serial_initialize: exit\n" ) );
7570 }
7571 
7572 void
7573 __kmp_serial_initialize( void )
7574 {
7575  if ( __kmp_init_serial ) {
7576  return;
7577  }
7578  __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
7579  if ( __kmp_init_serial ) {
7580  __kmp_release_bootstrap_lock( &__kmp_initz_lock );
7581  return;
7582  }
7583  __kmp_do_serial_initialize();
7584  __kmp_release_bootstrap_lock( &__kmp_initz_lock );
7585 }
7586 
7587 static void
7588 __kmp_do_middle_initialize( void )
7589 {
7590  int i, j;
7591  int prev_dflt_team_nth;
7592 
7593  if( !__kmp_init_serial ) {
7594  __kmp_do_serial_initialize();
7595  }
7596 
7597  KA_TRACE( 10, ("__kmp_middle_initialize: enter\n" ) );
7598 
7599  //
7600  // Save the previous value for the __kmp_dflt_team_nth so that
7601  // we can avoid some reinitialization if it hasn't changed.
7602  //
7603  prev_dflt_team_nth = __kmp_dflt_team_nth;
7604 
7605 #if KMP_OS_WINDOWS || KMP_OS_LINUX
7606  //
7607  // __kmp_affinity_initialize() will try to set __kmp_ncores to the
7608  // number of cores on the machine.
7609  //
7610  __kmp_affinity_initialize();
7611 
7612  //
7613  // Run through the __kmp_threads array and set the affinity mask
7614  // for each root thread that is currently registered with the RTL.
7615  //
7616  for ( i = 0; i < __kmp_threads_capacity; i++ ) {
7617  if ( TCR_PTR( __kmp_threads[ i ] ) != NULL ) {
7618  __kmp_affinity_set_init_mask( i, TRUE );
7619  }
7620  }
7621 #endif /* KMP_OS_WINDOWS || KMP_OS_LINUX */
7622 
7623  KMP_ASSERT( __kmp_xproc > 0 );
7624  if ( __kmp_avail_proc == 0 ) {
7625  __kmp_avail_proc = __kmp_xproc;
7626  }
7627 
7628  // If there were empty places in num_threads list (OMP_NUM_THREADS=,,2,3), correct them now
7629  j = 0;
7630  while ( __kmp_nested_nth.used && ! __kmp_nested_nth.nth[ j ] ) {
7631  __kmp_nested_nth.nth[ j ] = __kmp_dflt_team_nth = __kmp_dflt_team_nth_ub = __kmp_avail_proc;
7632  j++;
7633  }
7634 
7635  if ( __kmp_dflt_team_nth == 0 ) {
7636 #ifdef KMP_DFLT_NTH_CORES
7637  //
7638  // Default #threads = #cores
7639  //
7640  __kmp_dflt_team_nth = __kmp_ncores;
7641  KA_TRACE( 20, ("__kmp_middle_initialize: setting __kmp_dflt_team_nth = __kmp_ncores (%d)\n",
7642  __kmp_dflt_team_nth ) );
7643 #else
7644  //
7645  // Default #threads = #available OS procs
7646  //
7647  __kmp_dflt_team_nth = __kmp_avail_proc;
7648  KA_TRACE( 20, ("__kmp_middle_initialize: setting __kmp_dflt_team_nth = __kmp_avail_proc(%d)\n",
7649  __kmp_dflt_team_nth ) );
7650 #endif /* KMP_DFLT_NTH_CORES */
7651  }
7652 
7653  if ( __kmp_dflt_team_nth < KMP_MIN_NTH ) {
7654  __kmp_dflt_team_nth = KMP_MIN_NTH;
7655  }
7656  if( __kmp_dflt_team_nth > __kmp_sys_max_nth ) {
7657  __kmp_dflt_team_nth = __kmp_sys_max_nth;
7658  }
7659 
7660  //
7661  // There's no harm in continuing if the following check fails,
7662  // but it indicates an error in the previous logic.
7663  //
7664  KMP_DEBUG_ASSERT( __kmp_dflt_team_nth <= __kmp_dflt_team_nth_ub );
7665 
7666  if ( __kmp_dflt_team_nth != prev_dflt_team_nth ) {
7667  //
7668  // Run through the __kmp_threads array and set the num threads icv
7669  // for each root thread that is currently registered with the RTL
7670  // (which has not already explicitly set its nthreads-var with a
7671  // call to omp_set_num_threads()).
7672  //
7673  for ( i = 0; i < __kmp_threads_capacity; i++ ) {
7674  kmp_info_t *thread = __kmp_threads[ i ];
7675  if ( thread == NULL ) continue;
7676 #if OMP_30_ENABLED
7677  if ( thread->th.th_current_task->td_icvs.nproc != 0 ) continue;
7678 #else
7679  if ( thread->th.th_team->t.t_set_nproc[ thread->th.th_info.ds.ds_tid ] != 0 ) continue;
7680 #endif /* OMP_30_ENABLED */
7681 
7682  set__nproc_p( __kmp_threads[ i ], __kmp_dflt_team_nth );
7683  }
7684  }
7685  KA_TRACE( 20, ("__kmp_middle_initialize: final value for __kmp_dflt_team_nth = %d\n",
7686  __kmp_dflt_team_nth) );
7687 
7688 #ifdef KMP_ADJUST_BLOCKTIME
7689  /* Adjust blocktime to zero if necessary */
7690  /* now that __kmp_avail_proc is set */
7691  if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
7692  KMP_DEBUG_ASSERT( __kmp_avail_proc > 0 );
7693  if ( __kmp_nth > __kmp_avail_proc ) {
7694  __kmp_zero_bt = TRUE;
7695  }
7696  }
7697 #endif /* KMP_ADJUST_BLOCKTIME */
7698 
7699  /* we have finished middle initialization */
7700  TCW_SYNC_4(__kmp_init_middle, TRUE);
7701 
7702  KA_TRACE( 10, ("__kmp_do_middle_initialize: exit\n" ) );
7703 }
7704 
7705 void
7706 __kmp_middle_initialize( void )
7707 {
7708  if ( __kmp_init_middle ) {
7709  return;
7710  }
7711  __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
7712  if ( __kmp_init_middle ) {
7713  __kmp_release_bootstrap_lock( &__kmp_initz_lock );
7714  return;
7715  }
7716  __kmp_do_middle_initialize();
7717  __kmp_release_bootstrap_lock( &__kmp_initz_lock );
7718 }
7719 
7720 void
7721 __kmp_parallel_initialize( void )
7722 {
7723  int gtid = __kmp_entry_gtid(); // this might be a new root
7724 
7725  /* syncronize parallel initialization (for sibling) */
7726  if( TCR_4(__kmp_init_parallel) ) return;
7727  __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
7728  if( TCR_4(__kmp_init_parallel) ) { __kmp_release_bootstrap_lock( &__kmp_initz_lock ); return; }
7729 
7730  /* TODO reinitialization after we have already shut down */
7731  if( TCR_4(__kmp_global.g.g_done) ) {
7732  KA_TRACE( 10, ("__kmp_parallel_initialize: attempt to init while shutting down\n" ) );
7733  __kmp_infinite_loop();
7734  }
7735 
7736  /* jc: The lock __kmp_initz_lock is already held, so calling __kmp_serial_initialize
7737  would cause a deadlock. So we call __kmp_do_serial_initialize directly.
7738  */
7739  if( !__kmp_init_middle ) {
7740  __kmp_do_middle_initialize();
7741  }
7742 
7743  /* begin initialization */
7744  KA_TRACE( 10, ("__kmp_parallel_initialize: enter\n" ) );
7745  KMP_ASSERT( KMP_UBER_GTID( gtid ) );
7746 
7747 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
7748  //
7749  // Save the FP control regs.
7750  // Worker threads will set theirs to these values at thread startup.
7751  //
7752  __kmp_store_x87_fpu_control_word( &__kmp_init_x87_fpu_control_word );
7753  __kmp_store_mxcsr( &__kmp_init_mxcsr );
7754  __kmp_init_mxcsr &= KMP_X86_MXCSR_MASK;
7755 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
7756 
7757 #if KMP_OS_UNIX
7758 # if KMP_HANDLE_SIGNALS
7759  /* must be after __kmp_serial_initialize */
7760  __kmp_install_signals( TRUE );
7761 # endif
7762 #endif
7763 
7764  __kmp_suspend_initialize();
7765 
7766 # if defined(USE_LOAD_BALANCE)
7767  if ( __kmp_global.g.g_dynamic_mode == dynamic_default ) {
7768  __kmp_global.g.g_dynamic_mode = dynamic_load_balance;
7769  }
7770 #else
7771  if ( __kmp_global.g.g_dynamic_mode == dynamic_default ) {
7772  __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
7773  }
7774 #endif
7775 
7776  if ( __kmp_version ) {
7777  __kmp_print_version_2();
7778  }
7779 
7780  /* we have finished parallel initialization */
7781  TCW_SYNC_4(__kmp_init_parallel, TRUE);
7782 
7783  KMP_MB();
7784  KA_TRACE( 10, ("__kmp_parallel_initialize: exit\n" ) );
7785 
7786  __kmp_release_bootstrap_lock( &__kmp_initz_lock );
7787 }
7788 
7789 
7790 /* ------------------------------------------------------------------------ */
7791 
7792 void
7793 __kmp_run_before_invoked_task( int gtid, int tid, kmp_info_t *this_thr,
7794  kmp_team_t *team )
7795 {
7796  kmp_disp_t *dispatch;
7797 
7798  KMP_MB();
7799 
7800  /* none of the threads have encountered any constructs, yet. */
7801  this_thr->th.th_local.this_construct = 0;
7802  this_thr->th.th_local.last_construct = 0;
7803 #if KMP_CACHE_MANAGE
7804  KMP_CACHE_PREFETCH( &this_thr -> th.th_bar[ bs_forkjoin_barrier ].bb.b_arrived );
7805 #endif /* KMP_CACHE_MANAGE */
7806  dispatch = (kmp_disp_t *)TCR_PTR(this_thr->th.th_dispatch);
7807  KMP_DEBUG_ASSERT( dispatch );
7808  KMP_DEBUG_ASSERT( team -> t.t_dispatch );
7809  //KMP_DEBUG_ASSERT( this_thr -> th.th_dispatch == &team -> t.t_dispatch[ this_thr->th.th_info.ds.ds_tid ] );
7810 
7811  dispatch -> th_disp_index = 0; /* reset the dispatch buffer counter */
7812 
7813  if( __kmp_env_consistency_check )
7814  __kmp_push_parallel( gtid, team->t.t_ident );
7815 
7816  KMP_MB(); /* Flush all pending memory write invalidates. */
7817 }
7818 
7819 void
7820 __kmp_run_after_invoked_task( int gtid, int tid, kmp_info_t *this_thr,
7821  kmp_team_t *team )
7822 {
7823  if( __kmp_env_consistency_check )
7824  __kmp_pop_parallel( gtid, team->t.t_ident );
7825 }
7826 
7827 int
7828 __kmp_invoke_task_func( int gtid )
7829 {
7830  int rc;
7831  int tid = __kmp_tid_from_gtid( gtid );
7832  kmp_info_t *this_thr = __kmp_threads[ gtid ];
7833  kmp_team_t *team = this_thr -> th.th_team;
7834 
7835  __kmp_run_before_invoked_task( gtid, tid, this_thr, team );
7836 #if USE_ITT_BUILD
7837  if ( __itt_stack_caller_create_ptr ) {
7838  __kmp_itt_stack_callee_enter( (__itt_caller)team->t.t_stack_id ); // inform ittnotify about entering user's code
7839  }
7840 #endif /* USE_ITT_BUILD */
7841  rc = __kmp_invoke_microtask( (microtask_t) TCR_SYNC_PTR(team->t.t_pkfn),
7842  gtid, tid, (int) team->t.t_argc, (void **) team->t.t_argv );
7843 
7844 #if USE_ITT_BUILD
7845  if ( __itt_stack_caller_create_ptr ) {
7846  __kmp_itt_stack_callee_leave( (__itt_caller)team->t.t_stack_id ); // inform ittnotify about leaving user's code
7847  }
7848 #endif /* USE_ITT_BUILD */
7849  __kmp_run_after_invoked_task( gtid, tid, this_thr, team );
7850 
7851  return rc;
7852 }
7853 
7854 #if OMP_40_ENABLED
7855 void
7856 __kmp_teams_master( microtask_t microtask, int gtid )
7857 {
7858  // This routine is called by all master threads in teams construct
7859  kmp_info_t *this_thr = __kmp_threads[ gtid ];
7860  kmp_team_t *team = this_thr -> th.th_team;
7861  ident_t *loc = team->t.t_ident;
7862 
7863 #if KMP_DEBUG
7864  int tid = __kmp_tid_from_gtid( gtid );
7865  KA_TRACE( 20, ("__kmp_teams_master: T#%d, Tid %d, microtask %p\n",
7866  gtid, tid, microtask) );
7867 #endif
7868 
7869  // Launch league of teams now, but not let workers execute
7870  // (they hang on fork barrier until next parallel)
7871  this_thr->th.th_set_nproc = this_thr->th.th_set_nth_teams;
7872  __kmp_fork_call( loc, gtid, TRUE,
7873  team->t.t_argc,
7874  microtask,
7875  VOLATILE_CAST(launch_t) __kmp_invoke_task_func,
7876  NULL );
7877  __kmp_join_call( loc, gtid, 1 ); // AC: last parameter "1" eliminates join barrier which won't work because
7878  // worker threads are in a fork barrier waiting for more parallel regions
7879 }
7880 
7881 int
7882 __kmp_invoke_teams_master( int gtid )
7883 {
7884  #if KMP_DEBUG
7885  if ( !__kmp_threads[gtid]-> th.th_team->t.t_serialized )
7886  KMP_DEBUG_ASSERT( (void*)__kmp_threads[gtid]-> th.th_team->t.t_pkfn == (void*)__kmp_teams_master );
7887  #endif
7888 
7889  __kmp_teams_master( (microtask_t)__kmp_threads[gtid]->th.th_team_microtask, gtid );
7890 
7891  return 1;
7892 }
7893 #endif /* OMP_40_ENABLED */
7894 
7895 /* this sets the requested number of threads for the next parallel region
7896  * encountered by this team */
7897 /* since this should be enclosed in the forkjoin critical section it
7898  * should avoid race conditions with assymmetrical nested parallelism */
7899 
7900 void
7901 __kmp_push_num_threads( ident_t *id, int gtid, int num_threads )
7902 {
7903  kmp_info_t *thr = __kmp_threads[gtid];
7904 
7905  if( num_threads > 0 )
7906  thr -> th.th_set_nproc = num_threads;
7907 }
7908 
7909 #if OMP_40_ENABLED
7910 
7911 /* this sets the requested number of teams for the teams region and/or
7912  * the number of threads for the next parallel region encountered */
7913 void
7914 __kmp_push_num_teams( ident_t *id, int gtid, int num_teams, int num_threads )
7915 {
7916  kmp_info_t *thr = __kmp_threads[gtid];
7917  // The number of teams is the number of threads in the outer "parallel"
7918  if( num_teams > 0 ) {
7919  thr -> th.th_set_nproc = num_teams;
7920  } else {
7921  thr -> th.th_set_nproc = 1; // AC: default number of teams is 1;
7922  // TODO: should it be __kmp_ncores ?
7923  }
7924  // The number of threads is for inner parallel regions
7925  if( num_threads > 0 ) {
7926  thr -> th.th_set_nth_teams = num_threads;
7927  } else {
7928  if( !TCR_4(__kmp_init_middle) )
7929  __kmp_middle_initialize();
7930  thr -> th.th_set_nth_teams = __kmp_avail_proc / thr -> th.th_set_nproc;
7931  }
7932 }
7933 
7934 
7935 //
7936 // Set the proc_bind var to use in the following parallel region.
7937 //
7938 void
7939 __kmp_push_proc_bind( ident_t *id, int gtid, kmp_proc_bind_t proc_bind )
7940 {
7941  kmp_info_t *thr = __kmp_threads[gtid];
7942  thr -> th.th_set_proc_bind = proc_bind;
7943 }
7944 
7945 #endif /* OMP_40_ENABLED */
7946 
7947 /* Launch the worker threads into the microtask. */
7948 
7949 void
7950 __kmp_internal_fork( ident_t *id, int gtid, kmp_team_t *team )
7951 {
7952  kmp_info_t *this_thr = __kmp_threads[gtid];
7953 
7954 #ifdef KMP_DEBUG
7955  int f;
7956 #endif /* KMP_DEBUG */
7957 
7958  KMP_DEBUG_ASSERT( team );
7959  KMP_DEBUG_ASSERT( this_thr -> th.th_team == team );
7960  KMP_ASSERT( KMP_MASTER_GTID(gtid) );
7961  KMP_MB(); /* Flush all pending memory write invalidates. */
7962 
7963  team -> t.t_construct = 0; /* no single directives seen yet */
7964  team -> t.t_ordered.dt.t_value = 0; /* thread 0 enters the ordered section first */
7965 
7966  /* Reset the identifiers on the dispatch buffer */
7967  KMP_DEBUG_ASSERT( team -> t.t_disp_buffer );
7968  if ( team->t.t_max_nproc > 1 ) {
7969  int i;
7970  for (i = 0; i < KMP_MAX_DISP_BUF; ++i)
7971  team -> t.t_disp_buffer[ i ].buffer_index = i;
7972  } else {
7973  team -> t.t_disp_buffer[ 0 ].buffer_index = 0;
7974  }
7975 
7976  KMP_MB(); /* Flush all pending memory write invalidates. */
7977  KMP_ASSERT( this_thr -> th.th_team == team );
7978 
7979 #ifdef KMP_DEBUG
7980  for( f=0 ; f<team->t.t_nproc ; f++ ) {
7981  KMP_DEBUG_ASSERT( team->t.t_threads[f] &&
7982  team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc );
7983  }
7984 #endif /* KMP_DEBUG */
7985 
7986  /* release the worker threads so they may begin working */
7987  __kmp_fork_barrier( gtid, 0 );
7988 }
7989 
7990 
7991 void
7992 __kmp_internal_join( ident_t *id, int gtid, kmp_team_t *team )
7993 {
7994  kmp_info_t *this_thr = __kmp_threads[gtid];
7995 
7996  KMP_DEBUG_ASSERT( team );
7997  KMP_DEBUG_ASSERT( this_thr -> th.th_team == team );
7998  KMP_ASSERT( KMP_MASTER_GTID(gtid) );
7999  KMP_MB(); /* Flush all pending memory write invalidates. */
8000 
8001  /* Join barrier after fork */
8002 
8003 #ifdef KMP_DEBUG
8004  if (__kmp_threads[gtid] && __kmp_threads[gtid]->th.th_team_nproc != team->t.t_nproc ) {
8005  __kmp_printf("GTID: %d, __kmp_threads[%d]=%p\n",gtid, gtid, __kmp_threads[gtid]);
8006  __kmp_printf("__kmp_threads[%d]->th.th_team_nproc=%d, TEAM: %p, team->t.t_nproc=%d\n",
8007  gtid, __kmp_threads[gtid]->th.th_team_nproc, team, team->t.t_nproc);
8008  __kmp_print_structure();
8009  }
8010  KMP_DEBUG_ASSERT( __kmp_threads[gtid] &&
8011  __kmp_threads[gtid]->th.th_team_nproc == team->t.t_nproc );
8012 #endif /* KMP_DEBUG */
8013 
8014  __kmp_join_barrier( gtid ); /* wait for everyone */
8015 
8016  KMP_MB(); /* Flush all pending memory write invalidates. */
8017  KMP_ASSERT( this_thr -> th.th_team == team );
8018 }
8019 
8020 
8021 /* ------------------------------------------------------------------------ */
8022 /* ------------------------------------------------------------------------ */
8023 
8024 #ifdef USE_LOAD_BALANCE
8025 
8026 //
8027 // Return the worker threads actively spinning in the hot team, if we
8028 // are at the outermost level of parallelism. Otherwise, return 0.
8029 //
8030 static int
8031 __kmp_active_hot_team_nproc( kmp_root_t *root )
8032 {
8033  int i;
8034  int retval;
8035  kmp_team_t *hot_team;
8036 
8037  if ( root->r.r_active ) {
8038  return 0;
8039  }
8040  hot_team = root->r.r_hot_team;
8041  if ( __kmp_dflt_blocktime == KMP_MAX_BLOCKTIME ) {
8042  return hot_team->t.t_nproc - 1; // Don't count master thread
8043  }
8044 
8045  //
8046  // Skip the master thread - it is accounted for elsewhere.
8047  //
8048  retval = 0;
8049  for ( i = 1; i < hot_team->t.t_nproc; i++ ) {
8050  if ( hot_team->t.t_threads[i]->th.th_active ) {
8051  retval++;
8052  }
8053  }
8054  return retval;
8055 }
8056 
8057 //
8058 // Perform an automatic adjustment to the number of
8059 // threads used by the next parallel region.
8060 //
8061 static int
8062 __kmp_load_balance_nproc( kmp_root_t *root, int set_nproc )
8063 {
8064  int retval;
8065  int pool_active;
8066  int hot_team_active;
8067  int team_curr_active;
8068  int system_active;
8069 
8070  KB_TRACE( 20, ("__kmp_load_balance_nproc: called root:%p set_nproc:%d\n",
8071  root, set_nproc ) );
8072  KMP_DEBUG_ASSERT( root );
8073  #if OMP_30_ENABLED
8074  KMP_DEBUG_ASSERT( root->r.r_root_team->t.t_threads[0]->th.th_current_task->td_icvs.dynamic == TRUE );
8075  #else
8076  KMP_DEBUG_ASSERT( root->r.r_root_team->t.t_set_dynamic[0] == TRUE );
8077  #endif
8078  KMP_DEBUG_ASSERT( set_nproc > 1 );
8079 
8080  if ( set_nproc == 1) {
8081  KB_TRACE( 20, ("__kmp_load_balance_nproc: serial execution.\n" ) );
8082  return 1;
8083  }
8084 
8085  //
8086  // Threads that are active in the thread pool, active in the hot team
8087  // for this particular root (if we are at the outer par level), and
8088  // the currently executing thread (to become the master) are available
8089  // to add to the new team, but are currently contributing to the system
8090  // load, and must be accounted for.
8091  //
8092  pool_active = TCR_4(__kmp_thread_pool_active_nth);
8093  hot_team_active = __kmp_active_hot_team_nproc( root );
8094  team_curr_active = pool_active + hot_team_active + 1;
8095 
8096  //
8097  // Check the system load.
8098  //
8099  system_active = __kmp_get_load_balance( __kmp_avail_proc + team_curr_active );
8100  KB_TRACE( 30, ("__kmp_load_balance_nproc: system active = %d pool active = %d hot team active = %d\n",
8101  system_active, pool_active, hot_team_active ) );
8102 
8103  if ( system_active < 0 ) {
8104  //
8105  // There was an error reading the necessary info from /proc,
8106  // so use the thread limit algorithm instead. Once we set
8107  // __kmp_global.g.g_dynamic_mode = dynamic_thread_limit,
8108  // we shouldn't wind up getting back here.
8109  //
8110  __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
8111  KMP_WARNING( CantLoadBalUsing, "KMP_DYNAMIC_MODE=thread limit" );
8112 
8113  //
8114  // Make this call behave like the thread limit algorithm.
8115  //
8116  retval = __kmp_avail_proc - __kmp_nth + (root->r.r_active ? 1
8117  : root->r.r_hot_team->t.t_nproc);
8118  if ( retval > set_nproc ) {
8119  retval = set_nproc;
8120  }
8121  if ( retval < KMP_MIN_NTH ) {
8122  retval = KMP_MIN_NTH;
8123  }
8124 
8125  KB_TRACE( 20, ("__kmp_load_balance_nproc: thread limit exit. retval:%d\n", retval ) );
8126  return retval;
8127  }
8128 
8129  //
8130  // There is a slight delay in the load balance algorithm in detecting
8131  // new running procs. The real system load at this instant should be
8132  // at least as large as the #active omp thread that are available to
8133  // add to the team.
8134  //
8135  if ( system_active < team_curr_active ) {
8136  system_active = team_curr_active;
8137  }
8138  retval = __kmp_avail_proc - system_active + team_curr_active;
8139  if ( retval > set_nproc ) {
8140  retval = set_nproc;
8141  }
8142  if ( retval < KMP_MIN_NTH ) {
8143  retval = KMP_MIN_NTH;
8144  }
8145 
8146  KB_TRACE( 20, ("__kmp_load_balance_nproc: exit. retval:%d\n", retval ) );
8147  return retval;
8148 } // __kmp_load_balance_nproc()
8149 
8150 #endif /* USE_LOAD_BALANCE */
8151 
8152 
8153 /* ------------------------------------------------------------------------ */
8154 /* ------------------------------------------------------------------------ */
8155 
8156 /* NOTE: this is called with the __kmp_init_lock held */
8157 void
8158 __kmp_cleanup( void )
8159 {
8160  int f;
8161 
8162  KA_TRACE( 10, ("__kmp_cleanup: enter\n" ) );
8163 
8164  if (TCR_4(__kmp_init_parallel)) {
8165 #if KMP_HANDLE_SIGNALS
8166  __kmp_remove_signals();
8167 #endif
8168  TCW_4(__kmp_init_parallel, FALSE);
8169  }
8170 
8171  if (TCR_4(__kmp_init_middle)) {
8172 #if KMP_OS_WINDOWS || KMP_OS_LINUX
8173  __kmp_affinity_uninitialize();
8174 #endif /* KMP_OS_WINDOWS || KMP_OS_LINUX */
8175  TCW_4(__kmp_init_middle, FALSE);
8176  }
8177 
8178  KA_TRACE( 10, ("__kmp_cleanup: go serial cleanup\n" ) );
8179 
8180  if (__kmp_init_serial) {
8181 
8182  __kmp_runtime_destroy();
8183 
8184  __kmp_init_serial = FALSE;
8185  }
8186 
8187  for ( f = 0; f < __kmp_threads_capacity; f++ ) {
8188  if ( __kmp_root[ f ] != NULL ) {
8189  __kmp_free( __kmp_root[ f ] );
8190  __kmp_root[ f ] = NULL;
8191  }
8192  }
8193  __kmp_free( __kmp_threads );
8194  // __kmp_threads and __kmp_root were allocated at once, as single block, so there is no need in
8195  // freeing __kmp_root.
8196  __kmp_threads = NULL;
8197  __kmp_root = NULL;
8198  __kmp_threads_capacity = 0;
8199 
8200  __kmp_cleanup_user_locks();
8201 
8202  #if KMP_OS_LINUX || KMP_OS_WINDOWS
8203  KMP_INTERNAL_FREE( (void *) __kmp_cpuinfo_file );
8204  __kmp_cpuinfo_file = NULL;
8205  #endif /* KMP_OS_LINUX || KMP_OS_WINDOWS */
8206 
8207  #if KMP_USE_ADAPTIVE_LOCKS
8208  #if KMP_DEBUG_ADAPTIVE_LOCKS
8209  __kmp_print_speculative_stats();
8210  #endif
8211  #endif
8212  KMP_INTERNAL_FREE( __kmp_nested_nth.nth );
8213  __kmp_nested_nth.nth = NULL;
8214  __kmp_nested_nth.size = 0;
8215  __kmp_nested_nth.used = 0;
8216 
8217  __kmp_i18n_catclose();
8218 
8219  KA_TRACE( 10, ("__kmp_cleanup: exit\n" ) );
8220 }
8221 
8222 /* ------------------------------------------------------------------------ */
8223 /* ------------------------------------------------------------------------ */
8224 
8225 int
8226 __kmp_ignore_mppbeg( void )
8227 {
8228  char *env;
8229 
8230  if ((env = getenv( "KMP_IGNORE_MPPBEG" )) != NULL) {
8231  if (__kmp_str_match_false( env ))
8232  return FALSE;
8233  }
8234  // By default __kmpc_begin() is no-op.
8235  return TRUE;
8236 }
8237 
8238 int
8239 __kmp_ignore_mppend( void )
8240 {
8241  char *env;
8242 
8243  if ((env = getenv( "KMP_IGNORE_MPPEND" )) != NULL) {
8244  if (__kmp_str_match_false( env ))
8245  return FALSE;
8246  }
8247  // By default __kmpc_end() is no-op.
8248  return TRUE;
8249 }
8250 
8251 void
8252 __kmp_internal_begin( void )
8253 {
8254  int gtid;
8255  kmp_root_t *root;
8256 
8257  /* this is a very important step as it will register new sibling threads
8258  * and assign these new uber threads a new gtid */
8259  gtid = __kmp_entry_gtid();
8260  root = __kmp_threads[ gtid ] -> th.th_root;
8261  KMP_ASSERT( KMP_UBER_GTID( gtid ));
8262 
8263  if( root->r.r_begin ) return;
8264  __kmp_acquire_lock( &root->r.r_begin_lock, gtid );
8265  if( root->r.r_begin ) {
8266  __kmp_release_lock( & root->r.r_begin_lock, gtid );
8267  return;
8268  }
8269 
8270  root -> r.r_begin = TRUE;
8271 
8272  __kmp_release_lock( & root->r.r_begin_lock, gtid );
8273 }
8274 
8275 
8276 /* ------------------------------------------------------------------------ */
8277 /* ------------------------------------------------------------------------ */
8278 
8279 void
8280 __kmp_user_set_library (enum library_type arg)
8281 {
8282  int gtid;
8283  kmp_root_t *root;
8284  kmp_info_t *thread;
8285 
8286  /* first, make sure we are initialized so we can get our gtid */
8287 
8288  gtid = __kmp_entry_gtid();
8289  thread = __kmp_threads[ gtid ];
8290 
8291  root = thread -> th.th_root;
8292 
8293  KA_TRACE( 20, ("__kmp_user_set_library: enter T#%d, arg: %d, %d\n", gtid, arg, library_serial ));
8294  if (root->r.r_in_parallel) { /* Must be called in serial section of top-level thread */
8295  KMP_WARNING( SetLibraryIncorrectCall );
8296  return;
8297  }
8298 
8299  switch ( arg ) {
8300  case library_serial :
8301  thread -> th.th_set_nproc = 0;
8302  set__nproc_p( thread, 1 );
8303  break;
8304  case library_turnaround :
8305  thread -> th.th_set_nproc = 0;
8306  set__nproc_p( thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth : __kmp_dflt_team_nth_ub );
8307  break;
8308  case library_throughput :
8309  thread -> th.th_set_nproc = 0;
8310  set__nproc_p( thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth : __kmp_dflt_team_nth_ub );
8311  break;
8312  default:
8313  KMP_FATAL( UnknownLibraryType, arg );
8314  }
8315 
8316  __kmp_aux_set_library ( arg );
8317 }
8318 
8319 void
8320 __kmp_aux_set_stacksize( size_t arg )
8321 {
8322  if (! __kmp_init_serial)
8323  __kmp_serial_initialize();
8324 
8325 #if KMP_OS_DARWIN
8326  if (arg & (0x1000 - 1)) {
8327  arg &= ~(0x1000 - 1);
8328  if(arg + 0x1000) /* check for overflow if we round up */
8329  arg += 0x1000;
8330  }
8331 #endif
8332  __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
8333 
8334  /* only change the default stacksize before the first parallel region */
8335  if (! TCR_4(__kmp_init_parallel)) {
8336  size_t value = arg; /* argument is in bytes */
8337 
8338  if (value < __kmp_sys_min_stksize )
8339  value = __kmp_sys_min_stksize ;
8340  else if (value > KMP_MAX_STKSIZE)
8341  value = KMP_MAX_STKSIZE;
8342 
8343  __kmp_stksize = value;
8344 
8345  __kmp_env_stksize = TRUE; /* was KMP_STACKSIZE specified? */
8346  }
8347 
8348  __kmp_release_bootstrap_lock( &__kmp_initz_lock );
8349 }
8350 
8351 /* set the behaviour of the runtime library */
8352 /* TODO this can cause some odd behaviour with sibling parallelism... */
8353 void
8354 __kmp_aux_set_library (enum library_type arg)
8355 {
8356  __kmp_library = arg;
8357 
8358  switch ( __kmp_library ) {
8359  case library_serial :
8360  {
8361  KMP_INFORM( LibraryIsSerial );
8362  (void) __kmp_change_library( TRUE );
8363  }
8364  break;
8365  case library_turnaround :
8366  (void) __kmp_change_library( TRUE );
8367  break;
8368  case library_throughput :
8369  (void) __kmp_change_library( FALSE );
8370  break;
8371  default:
8372  KMP_FATAL( UnknownLibraryType, arg );
8373  }
8374 }
8375 
8376 /* ------------------------------------------------------------------------ */
8377 /* ------------------------------------------------------------------------ */
8378 
8379 void
8380 __kmp_aux_set_blocktime (int arg, kmp_info_t *thread, int tid)
8381 {
8382  int blocktime = arg; /* argument is in milliseconds */
8383  int bt_intervals;
8384  int bt_set;
8385 
8386  __kmp_save_internal_controls( thread );
8387 
8388  /* Normalize and set blocktime for the teams */
8389  if (blocktime < KMP_MIN_BLOCKTIME)
8390  blocktime = KMP_MIN_BLOCKTIME;
8391  else if (blocktime > KMP_MAX_BLOCKTIME)
8392  blocktime = KMP_MAX_BLOCKTIME;
8393 
8394  set__blocktime_team( thread -> th.th_team, tid, blocktime );
8395  set__blocktime_team( thread -> th.th_serial_team, 0, blocktime );
8396 
8397  /* Calculate and set blocktime intervals for the teams */
8398  bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME(blocktime, __kmp_monitor_wakeups);
8399 
8400  set__bt_intervals_team( thread -> th.th_team, tid, bt_intervals );
8401  set__bt_intervals_team( thread -> th.th_serial_team, 0, bt_intervals );
8402 
8403  /* Set whether blocktime has been set to "TRUE" */
8404  bt_set = TRUE;
8405 
8406  set__bt_set_team( thread -> th.th_team, tid, bt_set );
8407  set__bt_set_team( thread -> th.th_serial_team, 0, bt_set );
8408  KF_TRACE(10, ( "kmp_set_blocktime: T#%d(%d:%d), blocktime=%d, bt_intervals=%d, monitor_updates=%d\n",
8409  __kmp_gtid_from_tid(tid, thread->th.th_team),
8410  thread->th.th_team->t.t_id, tid, blocktime, bt_intervals, __kmp_monitor_wakeups ) );
8411 }
8412 
8413 void
8414 __kmp_aux_set_defaults(
8415  char const * str,
8416  int len
8417 ) {
8418  if ( ! __kmp_init_serial ) {
8419  __kmp_serial_initialize();
8420  };
8421  __kmp_env_initialize( str );
8422 
8423  if (__kmp_settings
8424 #if OMP_40_ENABLED
8425  || __kmp_display_env || __kmp_display_env_verbose
8426 #endif // OMP_40_ENABLED
8427  ) {
8428  __kmp_env_print();
8429  }
8430 } // __kmp_aux_set_defaults
8431 
8432 /* ------------------------------------------------------------------------ */
8433 
8434 /*
8435  * internal fast reduction routines
8436  */
8437 
8438 PACKED_REDUCTION_METHOD_T
8439 __kmp_determine_reduction_method( ident_t *loc, kmp_int32 global_tid,
8440  kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data),
8441  kmp_critical_name *lck )
8442 {
8443 
8444  // Default reduction method: critical construct ( lck != NULL, like in current PAROPT )
8445  // If ( reduce_data!=NULL && reduce_func!=NULL ): the tree-reduction method can be selected by RTL
8446  // If loc->flags contains KMP_IDENT_ATOMIC_REDUCE, the atomic reduce method can be selected by RTL
8447  // Finally, it's up to OpenMP RTL to make a decision on which method to select among generated by PAROPT.
8448 
8449  PACKED_REDUCTION_METHOD_T retval;
8450 
8451  int team_size;
8452 
8453  KMP_DEBUG_ASSERT( loc ); // it would be nice to test ( loc != 0 )
8454  KMP_DEBUG_ASSERT( lck ); // it would be nice to test ( lck != 0 )
8455 
8456  #define FAST_REDUCTION_ATOMIC_METHOD_GENERATED ( ( loc->flags & ( KMP_IDENT_ATOMIC_REDUCE ) ) == ( KMP_IDENT_ATOMIC_REDUCE ) )
8457  #define FAST_REDUCTION_TREE_METHOD_GENERATED ( ( reduce_data ) && ( reduce_func ) )
8458 
8459  retval = critical_reduce_block;
8460 
8461  team_size = __kmp_get_team_num_threads( global_tid ); // another choice of getting a team size ( with 1 dynamic deference ) is slower
8462 
8463  if( team_size == 1 ) {
8464 
8465  retval = empty_reduce_block;
8466 
8467  } else {
8468 
8469  int atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
8470  int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
8471 
8472  #if KMP_ARCH_X86_64
8473 
8474  #if KMP_OS_LINUX || KMP_OS_WINDOWS || KMP_OS_DARWIN
8475  #if KMP_MIC
8476  #define REDUCTION_TEAMSIZE_CUTOFF 8
8477  #else // KMP_MIC
8478  #define REDUCTION_TEAMSIZE_CUTOFF 4
8479  #endif // KMP_MIC
8480  if( tree_available ) {
8481  if( team_size <= REDUCTION_TEAMSIZE_CUTOFF ) {
8482  if ( atomic_available ) {
8483  retval = atomic_reduce_block;
8484  }
8485  } else {
8486  retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
8487  }
8488  } else if ( atomic_available ) {
8489  retval = atomic_reduce_block;
8490  }
8491  #else
8492  #error "Unknown or unsupported OS"
8493  #endif // KMP_OS_LINUX || KMP_OS_WINDOWS || KMP_OS_DARWIN
8494 
8495  #elif KMP_ARCH_X86 || KMP_ARCH_ARM
8496 
8497  #if KMP_OS_LINUX || KMP_OS_WINDOWS
8498 
8499  // basic tuning
8500 
8501  if( atomic_available ) {
8502  if( num_vars <= 2 ) { // && ( team_size <= 8 ) due to false-sharing ???
8503  retval = atomic_reduce_block;
8504  }
8505  } // otherwise: use critical section
8506 
8507  #elif KMP_OS_DARWIN
8508 
8509  if( atomic_available && ( num_vars <= 3 ) ) {
8510  retval = atomic_reduce_block;
8511  } else if( tree_available ) {
8512  if( ( reduce_size > ( 9 * sizeof( kmp_real64 ) ) ) && ( reduce_size < ( 2000 * sizeof( kmp_real64 ) ) ) ) {
8513  retval = TREE_REDUCE_BLOCK_WITH_PLAIN_BARRIER;
8514  }
8515  } // otherwise: use critical section
8516 
8517  #else
8518  #error "Unknown or unsupported OS"
8519  #endif
8520 
8521  #else
8522  #error "Unknown or unsupported architecture"
8523  #endif
8524 
8525  }
8526 
8527  // KMP_FORCE_REDUCTION
8528 
8529  if( __kmp_force_reduction_method != reduction_method_not_defined ) {
8530 
8531  PACKED_REDUCTION_METHOD_T forced_retval;
8532 
8533  int atomic_available, tree_available;
8534 
8535  switch( ( forced_retval = __kmp_force_reduction_method ) )
8536  {
8537  case critical_reduce_block:
8538  KMP_ASSERT( lck ); // lck should be != 0
8539  if( team_size <= 1 ) {
8540  forced_retval = empty_reduce_block;
8541  }
8542  break;
8543 
8544  case atomic_reduce_block:
8545  atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
8546  KMP_ASSERT( atomic_available ); // atomic_available should be != 0
8547  break;
8548 
8549  case tree_reduce_block:
8550  tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
8551  KMP_ASSERT( tree_available ); // tree_available should be != 0
8552  #if KMP_FAST_REDUCTION_BARRIER
8553  forced_retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
8554  #endif
8555  break;
8556 
8557  default:
8558  KMP_ASSERT( 0 ); // "unsupported method specified"
8559  }
8560 
8561  retval = forced_retval;
8562  }
8563 
8564  KA_TRACE(10, ( "reduction method selected=%08x\n", retval ) );
8565 
8566  #undef FAST_REDUCTION_TREE_METHOD_GENERATED
8567  #undef FAST_REDUCTION_ATOMIC_METHOD_GENERATED
8568 
8569  return ( retval );
8570 }
8571 
8572 // this function is for testing set/get/determine reduce method
8573 kmp_int32
8574 __kmp_get_reduce_method( void ) {
8575  return ( ( __kmp_entry_thread() -> th.th_local.packed_reduction_method ) >> 8 );
8576 }
8577 
8578 /* ------------------------------------------------------------------------ */
KMP_EXPORT void __kmpc_end_serialized_parallel(ident_t *, kmp_int32 global_tid)
Definition: kmp_csupport.c:671
Definition: kmp.h:200
KMP_EXPORT void __kmpc_serialized_parallel(ident_t *, kmp_int32 global_tid)
Definition: kmp_csupport.c:413
sched_type
Definition: kmp.h:302