Intel® OpenMP* Runtime Library
 All Classes Functions Variables Typedefs Enumerations Enumerator Groups Pages
kmp_csupport.c
1 /*
2  * kmp_csupport.c -- kfront linkage support for OpenMP.
3  * $Revision: 42826 $
4  * $Date: 2013-11-20 03:39:45 -0600 (Wed, 20 Nov 2013) $
5  */
6 
7 /* <copyright>
8  Copyright (c) 1997-2013 Intel Corporation. All Rights Reserved.
9 
10  Redistribution and use in source and binary forms, with or without
11  modification, are permitted provided that the following conditions
12  are met:
13 
14  * Redistributions of source code must retain the above copyright
15  notice, this list of conditions and the following disclaimer.
16  * Redistributions in binary form must reproduce the above copyright
17  notice, this list of conditions and the following disclaimer in the
18  documentation and/or other materials provided with the distribution.
19  * Neither the name of Intel Corporation nor the names of its
20  contributors may be used to endorse or promote products derived
21  from this software without specific prior written permission.
22 
23  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27  HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
29  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
30  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
31  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
32  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
33  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 
35 </copyright> */
36 
37 #include "omp.h" /* extern "C" declarations of user-visible routines */
38 #include "kmp.h"
39 #include "kmp_i18n.h"
40 #include "kmp_itt.h"
41 #include "kmp_error.h"
42 
43 #define MAX_MESSAGE 512
44 
45 /* ------------------------------------------------------------------------ */
46 /* ------------------------------------------------------------------------ */
47 
48 /* flags will be used in future, e.g., to implement */
49 /* openmp_strict library restrictions */
50 
60 void
61 __kmpc_begin(ident_t *loc, kmp_int32 flags)
62 {
63  // By default __kmp_ignore_mppbeg() returns TRUE.
64  if (__kmp_ignore_mppbeg() == FALSE) {
65  __kmp_internal_begin();
66 
67  KC_TRACE( 10, ("__kmpc_begin: called\n" ) );
68  }
69 }
70 
78 void
80 {
81  // By default, __kmp_ignore_mppend() returns TRUE which makes __kmpc_end() call no-op.
82  // However, this can be overridden with KMP_IGNORE_MPPEND environment variable.
83  // If KMP_IGNORE_MPPEND is 0, __kmp_ignore_mppend() returns FALSE and __kmpc_end()
84  // will unregister this root (it can cause library shut down).
85  if (__kmp_ignore_mppend() == FALSE) {
86  KC_TRACE( 10, ("__kmpc_end: called\n" ) );
87  KA_TRACE( 30, ("__kmpc_end\n" ));
88 
89  __kmp_internal_end_thread( -1 );
90  }
91 }
92 
112 kmp_int32
114 {
115  kmp_int32 gtid = __kmp_entry_gtid();
116 
117  KC_TRACE( 10, ("__kmpc_global_thread_num: T#%d\n", gtid ) );
118 
119  return gtid;
120 }
121 
135 kmp_int32
137 {
138  KC_TRACE( 10, ("__kmpc_global_num_threads: num_threads = %d\n", __kmp_nth ) );
139 
140  return TCR_4(__kmp_nth);
141 }
142 
149 kmp_int32
151 {
152  KC_TRACE( 10, ("__kmpc_bound_thread_num: called\n" ) );
153  return __kmp_tid_from_gtid( __kmp_entry_gtid() );
154 }
155 
161 kmp_int32
163 {
164  KC_TRACE( 10, ("__kmpc_bound_num_threads: called\n" ) );
165 
166  return __kmp_entry_thread() -> th.th_team -> t.t_nproc;
167 }
168 
175 kmp_int32
177 {
178 #ifndef KMP_DEBUG
179 
180  return TRUE;
181 
182 #else
183 
184  const char *semi2;
185  const char *semi3;
186  int line_no;
187 
188  if (__kmp_par_range == 0) {
189  return TRUE;
190  }
191  semi2 = loc->psource;
192  if (semi2 == NULL) {
193  return TRUE;
194  }
195  semi2 = strchr(semi2, ';');
196  if (semi2 == NULL) {
197  return TRUE;
198  }
199  semi2 = strchr(semi2 + 1, ';');
200  if (semi2 == NULL) {
201  return TRUE;
202  }
203  if (__kmp_par_range_filename[0]) {
204  const char *name = semi2 - 1;
205  while ((name > loc->psource) && (*name != '/') && (*name != ';')) {
206  name--;
207  }
208  if ((*name == '/') || (*name == ';')) {
209  name++;
210  }
211  if (strncmp(__kmp_par_range_filename, name, semi2 - name)) {
212  return __kmp_par_range < 0;
213  }
214  }
215  semi3 = strchr(semi2 + 1, ';');
216  if (__kmp_par_range_routine[0]) {
217  if ((semi3 != NULL) && (semi3 > semi2)
218  && (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) {
219  return __kmp_par_range < 0;
220  }
221  }
222  if (sscanf(semi3 + 1, "%d", &line_no) == 1) {
223  if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) {
224  return __kmp_par_range > 0;
225  }
226  return __kmp_par_range < 0;
227  }
228  return TRUE;
229 
230 #endif /* KMP_DEBUG */
231 
232 }
233 
239 kmp_int32
241 {
242  return __kmp_entry_thread() -> th.th_root -> r.r_active;
243 }
244 
254 void
255 __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads )
256 {
257  KA_TRACE( 20, ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n",
258  global_tid, num_threads ) );
259 
260  __kmp_push_num_threads( loc, global_tid, num_threads );
261 }
262 
263 void
264 __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid )
265 {
266  KA_TRACE( 20, ("__kmpc_pop_num_threads: enter\n" ) );
267 
268  /* the num_threads are automatically popped */
269 }
270 
271 
272 #if OMP_40_ENABLED
273 
274 void
275 __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, kmp_int32 proc_bind )
276 {
277  KA_TRACE( 20, ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n",
278  global_tid, proc_bind ) );
279 
280  __kmp_push_proc_bind( loc, global_tid, (kmp_proc_bind_t)proc_bind );
281 }
282 
283 #endif /* OMP_40_ENABLED */
284 
285 
295 void
296 __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...)
297 {
298  int gtid = __kmp_entry_gtid();
299  // maybe to save thr_state is enough here
300  {
301  va_list ap;
302  va_start( ap, microtask );
303 
304  __kmp_fork_call( loc, gtid, TRUE,
305  argc,
306  VOLATILE_CAST(microtask_t) microtask,
307  VOLATILE_CAST(launch_t) __kmp_invoke_task_func,
308 /* TODO: revert workaround for Intel(R) 64 tracker #96 */
309 #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM) && KMP_OS_LINUX
310  &ap
311 #else
312  ap
313 #endif
314  );
315  __kmp_join_call( loc, gtid );
316 
317  va_end( ap );
318  }
319 }
320 
321 #if OMP_40_ENABLED
322 
332 void
333 __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams, kmp_int32 num_threads )
334 {
335  KA_TRACE( 20, ("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n",
336  global_tid, num_teams, num_threads ) );
337 
338  __kmp_push_num_teams( loc, global_tid, num_teams, num_threads );
339 }
340 
350 void
351 __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...)
352 {
353  int gtid = __kmp_entry_gtid();
354  kmp_info_t *this_thr = __kmp_threads[ gtid ];
355  va_list ap;
356  va_start( ap, microtask );
357 
358  // remember teams entry point and nesting level
359  this_thr->th.th_team_microtask = microtask;
360  this_thr->th.th_teams_level = this_thr->th.th_team->t.t_level; // AC: can be >0 on host
361 
362  // check if __kmpc_push_num_teams called, set default number of teams otherwise
363  if ( this_thr->th.th_set_nth_teams == 0 ) {
364  __kmp_push_num_teams( loc, gtid, 0, 0 );
365  }
366  KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1);
367  KMP_DEBUG_ASSERT(this_thr->th.th_set_nth_teams >= 1);
368 
369  __kmp_fork_call( loc, gtid, TRUE,
370  argc,
371  VOLATILE_CAST(microtask_t) __kmp_teams_master,
372  VOLATILE_CAST(launch_t) __kmp_invoke_teams_master,
373 #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM) && KMP_OS_LINUX
374  &ap
375 #else
376  ap
377 #endif
378  );
379  __kmp_join_call( loc, gtid );
380  this_thr->th.th_team_microtask = NULL;
381  this_thr->th.th_teams_level = 0;
382 
383  va_end( ap );
384 }
385 #endif /* OMP_40_ENABLED */
386 
387 
388 //
389 // I don't think this function should ever have been exported.
390 // The __kmpc_ prefix was misapplied. I'm fairly certain that no generated
391 // openmp code ever called it, but it's been exported from the RTL for so
392 // long that I'm afraid to remove the definition.
393 //
394 int
395 __kmpc_invoke_task_func( int gtid )
396 {
397  return __kmp_invoke_task_func( gtid );
398 }
399 
412 void
413 __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
414 {
415  kmp_info_t *this_thr;
416  kmp_team_t *serial_team;
417 
418  KC_TRACE( 10, ("__kmpc_serialized_parallel: called by T#%d\n", global_tid ) );
419 
420  /* Skip all this code for autopar serialized loops since it results in
421  unacceptable overhead */
422  if( loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR ) )
423  return;
424 
425  if( ! TCR_4( __kmp_init_parallel ) )
426  __kmp_parallel_initialize();
427 
428  this_thr = __kmp_threads[ global_tid ];
429  serial_team = this_thr -> th.th_serial_team;
430 
431  /* utilize the serialized team held by this thread */
432  KMP_DEBUG_ASSERT( serial_team );
433  KMP_MB();
434 
435 #if OMP_30_ENABLED
436  if ( __kmp_tasking_mode != tskm_immediate_exec ) {
437  KMP_DEBUG_ASSERT( this_thr -> th.th_task_team == this_thr -> th.th_team -> t.t_task_team );
438  KMP_DEBUG_ASSERT( serial_team -> t.t_task_team == NULL );
439  KA_TRACE( 20, ( "__kmpc_serialized_parallel: T#%d pushing task_team %p / team %p, new task_team = NULL\n",
440  global_tid, this_thr -> th.th_task_team, this_thr -> th.th_team ) );
441  this_thr -> th.th_task_team = NULL;
442  }
443 #endif // OMP_30_ENABLED
444 
445 #if OMP_40_ENABLED
446  kmp_proc_bind_t proc_bind = this_thr->th.th_set_proc_bind;
447  if ( this_thr->th.th_current_task->td_icvs.proc_bind == proc_bind_false ) {
448  proc_bind = proc_bind_false;
449  }
450  else if ( proc_bind == proc_bind_default ) {
451  //
452  // No proc_bind clause was specified, so use the current value
453  // of proc-bind-var for this parallel region.
454  //
455  proc_bind = this_thr->th.th_current_task->td_icvs.proc_bind;
456  }
457  //
458  // Reset for next parallel region
459  //
460  this_thr->th.th_set_proc_bind = proc_bind_default;
461 #endif /* OMP_3_ENABLED */
462 
463  if( this_thr -> th.th_team != serial_team ) {
464 #if OMP_30_ENABLED
465  // Nested level will be an index in the nested nthreads array
466  int level = this_thr->th.th_team->t.t_level;
467 #endif
468  if( serial_team -> t.t_serialized ) {
469  /* this serial team was already used
470  * TODO increase performance by making this locks more specific */
471  kmp_team_t *new_team;
472  int tid = this_thr->th.th_info.ds.ds_tid;
473 
474  __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
475 
476  new_team = __kmp_allocate_team(this_thr->th.th_root, 1, 1,
477 #if OMP_40_ENABLED
478  proc_bind,
479 #endif
480 #if OMP_30_ENABLED
481  & this_thr->th.th_current_task->td_icvs,
482 #else
483  this_thr->th.th_team->t.t_set_nproc[tid],
484  this_thr->th.th_team->t.t_set_dynamic[tid],
485  this_thr->th.th_team->t.t_set_nested[tid],
486  this_thr->th.th_team->t.t_set_blocktime[tid],
487  this_thr->th.th_team->t.t_set_bt_intervals[tid],
488  this_thr->th.th_team->t.t_set_bt_set[tid],
489 #endif // OMP_30_ENABLED
490  0);
491  __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
492  KMP_ASSERT( new_team );
493 
494  /* setup new serialized team and install it */
495  new_team -> t.t_threads[0] = this_thr;
496  new_team -> t.t_parent = this_thr -> th.th_team;
497  serial_team = new_team;
498  this_thr -> th.th_serial_team = serial_team;
499 
500  KF_TRACE( 10, ( "__kmpc_serialized_parallel: T#%d allocated new serial team %p\n",
501  global_tid, serial_team ) );
502 
503 
504  /* TODO the above breaks the requirement that if we run out of
505  * resources, then we can still guarantee that serialized teams
506  * are ok, since we may need to allocate a new one */
507  } else {
508  KF_TRACE( 10, ( "__kmpc_serialized_parallel: T#%d reusing cached serial team %p\n",
509  global_tid, serial_team ) );
510  }
511 
512  /* we have to initialize this serial team */
513  KMP_DEBUG_ASSERT( serial_team->t.t_threads );
514  KMP_DEBUG_ASSERT( serial_team->t.t_threads[0] == this_thr );
515  KMP_DEBUG_ASSERT( this_thr->th.th_team != serial_team );
516  serial_team -> t.t_ident = loc;
517  serial_team -> t.t_serialized = 1;
518  serial_team -> t.t_nproc = 1;
519  serial_team -> t.t_parent = this_thr->th.th_team;
520 #if OMP_30_ENABLED
521  serial_team -> t.t_sched = this_thr->th.th_team->t.t_sched;
522 #endif // OMP_30_ENABLED
523  this_thr -> th.th_team = serial_team;
524  serial_team -> t.t_master_tid = this_thr->th.th_info.ds.ds_tid;
525 
526 #if OMP_30_ENABLED
527  KF_TRACE( 10, ( "__kmpc_serialized_parallel: T#d curtask=%p\n",
528  global_tid, this_thr->th.th_current_task ) );
529  KMP_ASSERT( this_thr->th.th_current_task->td_flags.executing == 1 );
530  this_thr->th.th_current_task->td_flags.executing = 0;
531 
532  __kmp_push_current_task_to_thread( this_thr, serial_team, 0 );
533 
534  /* TODO: GEH: do the ICVs work for nested serialized teams? Don't we need an implicit task for
535  each serialized task represented by team->t.t_serialized? */
536  copy_icvs(
537  & this_thr->th.th_current_task->td_icvs,
538  & this_thr->th.th_current_task->td_parent->td_icvs );
539 
540  // Thread value exists in the nested nthreads array for the next nested level
541  if ( __kmp_nested_nth.used && ( level + 1 < __kmp_nested_nth.used ) ) {
542  this_thr->th.th_current_task->td_icvs.nproc = __kmp_nested_nth.nth[ level + 1 ];
543  }
544 
545 #if OMP_40_ENABLED
546  if ( __kmp_nested_proc_bind.used && ( level + 1 < __kmp_nested_proc_bind.used ) ) {
547  this_thr->th.th_current_task->td_icvs.proc_bind
548  = __kmp_nested_proc_bind.bind_types[ level + 1 ];
549  }
550 #endif /* OMP_40_ENABLED */
551 
552 #else /* pre-3.0 icv's */
553  serial_team -> t.t_set_nproc[0] = serial_team->t.t_parent->
554  t.t_set_nproc[serial_team->
555  t.t_master_tid];
556  serial_team -> t.t_set_dynamic[0] = serial_team->t.t_parent->
557  t.t_set_dynamic[serial_team->
558  t.t_master_tid];
559  serial_team -> t.t_set_nested[0] = serial_team->t.t_parent->
560  t.t_set_nested[serial_team->
561  t.t_master_tid];
562  serial_team -> t.t_set_blocktime[0] = serial_team->t.t_parent->
563  t.t_set_blocktime[serial_team->
564  t.t_master_tid];
565  serial_team -> t.t_set_bt_intervals[0] = serial_team->t.t_parent->
566  t.t_set_bt_intervals[serial_team->
567  t.t_master_tid];
568  serial_team -> t.t_set_bt_set[0] = serial_team->t.t_parent->
569  t.t_set_bt_set[serial_team->
570  t.t_master_tid];
571 #endif // OMP_30_ENABLED
572  this_thr -> th.th_info.ds.ds_tid = 0;
573 
574  /* set thread cache values */
575  this_thr -> th.th_team_nproc = 1;
576  this_thr -> th.th_team_master = this_thr;
577  this_thr -> th.th_team_serialized = 1;
578 
579 #if OMP_30_ENABLED
580  serial_team -> t.t_level = serial_team -> t.t_parent -> t.t_level + 1;
581  serial_team -> t.t_active_level = serial_team -> t.t_parent -> t.t_active_level;
582 #endif // OMP_30_ENABLED
583 
584 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
585  if ( __kmp_inherit_fp_control ) {
586  __kmp_store_x87_fpu_control_word( &serial_team->t.t_x87_fpu_control_word );
587  __kmp_store_mxcsr( &serial_team->t.t_mxcsr );
588  serial_team->t.t_mxcsr &= KMP_X86_MXCSR_MASK;
589  serial_team->t.t_fp_control_saved = TRUE;
590  } else {
591  serial_team->t.t_fp_control_saved = FALSE;
592  }
593 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
594  /* check if we need to allocate dispatch buffers stack */
595  KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
596  if ( !serial_team->t.t_dispatch->th_disp_buffer ) {
597  serial_team->t.t_dispatch->th_disp_buffer = (dispatch_private_info_t *)
598  __kmp_allocate( sizeof( dispatch_private_info_t ) );
599  }
600  this_thr -> th.th_dispatch = serial_team->t.t_dispatch;
601 
602  KMP_MB();
603 
604  } else {
605  /* this serialized team is already being used,
606  * that's fine, just add another nested level */
607  KMP_DEBUG_ASSERT( this_thr->th.th_team == serial_team );
608  KMP_DEBUG_ASSERT( serial_team -> t.t_threads );
609  KMP_DEBUG_ASSERT( serial_team -> t.t_threads[0] == this_thr );
610  ++ serial_team -> t.t_serialized;
611  this_thr -> th.th_team_serialized = serial_team -> t.t_serialized;
612 
613 #if OMP_30_ENABLED
614  // Nested level will be an index in the nested nthreads array
615  int level = this_thr->th.th_team->t.t_level;
616  // Thread value exists in the nested nthreads array for the next nested level
617  if ( __kmp_nested_nth.used && ( level + 1 < __kmp_nested_nth.used ) ) {
618  this_thr->th.th_current_task->td_icvs.nproc = __kmp_nested_nth.nth[ level + 1 ];
619  }
620  serial_team -> t.t_level++;
621  KF_TRACE( 10, ( "__kmpc_serialized_parallel: T#%d increasing nesting level of serial team %p to %d\n",
622  global_tid, serial_team, serial_team -> t.t_level ) );
623 #else
624  KF_TRACE( 10, ( "__kmpc_serialized_parallel: T#%d reusing team %p for nested serialized parallel region\n",
625  global_tid, serial_team ) );
626 #endif // OMP_30_ENABLED
627 
628  /* allocate/push dispatch buffers stack */
629  KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
630  {
631  dispatch_private_info_t * disp_buffer = (dispatch_private_info_t *)
632  __kmp_allocate( sizeof( dispatch_private_info_t ) );
633  disp_buffer->next = serial_team->t.t_dispatch->th_disp_buffer;
634  serial_team->t.t_dispatch->th_disp_buffer = disp_buffer;
635  }
636  this_thr -> th.th_dispatch = serial_team->t.t_dispatch;
637 
638  KMP_MB();
639  }
640 
641  if ( __kmp_env_consistency_check )
642  __kmp_push_parallel( global_tid, NULL );
643 
644 // t_level is not available in 2.5 build, so check for OMP_30_ENABLED
645 #if USE_ITT_BUILD && OMP_30_ENABLED
646  // Mark the start of the "parallel" region for VTune. Only use one of frame notification scheme at the moment.
647  if ( ( __itt_frame_begin_v3_ptr && __kmp_forkjoin_frames && ! __kmp_forkjoin_frames_mode ) || KMP_ITT_DEBUG )
648  {
649  __kmp_itt_region_forking( global_tid, 1 );
650  }
651  if( ( __kmp_forkjoin_frames_mode == 1 || __kmp_forkjoin_frames_mode == 3 ) && __itt_frame_submit_v3_ptr && __itt_get_timestamp_ptr )
652  {
653 #if USE_ITT_NOTIFY
654  if( this_thr->th.th_team->t.t_level == 1 ) {
655  this_thr->th.th_frame_time_serialized = __itt_get_timestamp();
656  }
657 #endif
658  }
659 #endif /* USE_ITT_BUILD */
660 
661 }
662 
670 void
671 __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
672 {
673  kmp_internal_control_t *top;
674  kmp_info_t *this_thr;
675  kmp_team_t *serial_team;
676 
677  KC_TRACE( 10, ("__kmpc_end_serialized_parallel: called by T#%d\n", global_tid ) );
678 
679  /* skip all this code for autopar serialized loops since it results in
680  unacceptable overhead */
681  if( loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR ) )
682  return;
683 
684  // Not autopar code
685  if( ! TCR_4( __kmp_init_parallel ) )
686  __kmp_parallel_initialize();
687 
688  this_thr = __kmp_threads[ global_tid ];
689  serial_team = this_thr->th.th_serial_team;
690 
691  KMP_MB();
692  KMP_DEBUG_ASSERT( serial_team );
693  KMP_ASSERT( serial_team -> t.t_serialized );
694  KMP_DEBUG_ASSERT( this_thr -> th.th_team == serial_team );
695  KMP_DEBUG_ASSERT( serial_team != this_thr->th.th_root->r.r_root_team );
696  KMP_DEBUG_ASSERT( serial_team -> t.t_threads );
697  KMP_DEBUG_ASSERT( serial_team -> t.t_threads[0] == this_thr );
698 
699  /* If necessary, pop the internal control stack values and replace the team values */
700  top = serial_team -> t.t_control_stack_top;
701  if ( top && top -> serial_nesting_level == serial_team -> t.t_serialized ) {
702 #if OMP_30_ENABLED
703  copy_icvs(
704  &serial_team -> t.t_threads[0] -> th.th_current_task -> td_icvs,
705  top );
706 #else
707  serial_team -> t.t_set_nproc[0] = top -> nproc;
708  serial_team -> t.t_set_dynamic[0] = top -> dynamic;
709  serial_team -> t.t_set_nested[0] = top -> nested;
710  serial_team -> t.t_set_blocktime[0] = top -> blocktime;
711  serial_team -> t.t_set_bt_intervals[0] = top -> bt_intervals;
712  serial_team -> t.t_set_bt_set[0] = top -> bt_set;
713 #endif // OMP_30_ENABLED
714  serial_team -> t.t_control_stack_top = top -> next;
715  __kmp_free(top);
716  }
717 
718 #if OMP_30_ENABLED
719  //if( serial_team -> t.t_serialized > 1 )
720  serial_team -> t.t_level--;
721 #endif // OMP_30_ENABLED
722 
723  /* pop dispatch buffers stack */
724  KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer);
725  {
726  dispatch_private_info_t * disp_buffer = serial_team->t.t_dispatch->th_disp_buffer;
727  serial_team->t.t_dispatch->th_disp_buffer =
728  serial_team->t.t_dispatch->th_disp_buffer->next;
729  __kmp_free( disp_buffer );
730  }
731 
732  -- serial_team -> t.t_serialized;
733  if ( serial_team -> t.t_serialized == 0 ) {
734 
735  /* return to the parallel section */
736 
737 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
738  if ( __kmp_inherit_fp_control && serial_team->t.t_fp_control_saved ) {
739  __kmp_clear_x87_fpu_status_word();
740  __kmp_load_x87_fpu_control_word( &serial_team->t.t_x87_fpu_control_word );
741  __kmp_load_mxcsr( &serial_team->t.t_mxcsr );
742  }
743 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
744 
745  this_thr -> th.th_team = serial_team -> t.t_parent;
746  this_thr -> th.th_info.ds.ds_tid = serial_team -> t.t_master_tid;
747 
748  /* restore values cached in the thread */
749  this_thr -> th.th_team_nproc = serial_team -> t.t_parent -> t.t_nproc; /* JPH */
750  this_thr -> th.th_team_master = serial_team -> t.t_parent -> t.t_threads[0]; /* JPH */
751  this_thr -> th.th_team_serialized = this_thr -> th.th_team -> t.t_serialized;
752 
753  /* TODO the below shouldn't need to be adjusted for serialized teams */
754  this_thr -> th.th_dispatch = & this_thr -> th.th_team ->
755  t.t_dispatch[ serial_team -> t.t_master_tid ];
756 
757 #if OMP_30_ENABLED
758  __kmp_pop_current_task_from_thread( this_thr );
759 
760  KMP_ASSERT( this_thr -> th.th_current_task -> td_flags.executing == 0 );
761  this_thr -> th.th_current_task -> td_flags.executing = 1;
762 
763  if ( __kmp_tasking_mode != tskm_immediate_exec ) {
764  //
765  // Copy the task team from the new child / old parent team
766  // to the thread. If non-NULL, copy the state flag also.
767  //
768  if ( ( this_thr -> th.th_task_team = this_thr -> th.th_team -> t.t_task_team ) != NULL ) {
769  this_thr -> th.th_task_state = this_thr -> th.th_task_team -> tt.tt_state;
770  }
771  KA_TRACE( 20, ( "__kmpc_end_serialized_parallel: T#%d restoring task_team %p / team %p\n",
772  global_tid, this_thr -> th.th_task_team, this_thr -> th.th_team ) );
773  }
774 #endif // OMP_30_ENABLED
775 
776  }
777  else {
778 
779 #if OMP_30_ENABLED
780  if ( __kmp_tasking_mode != tskm_immediate_exec ) {
781  KA_TRACE( 20, ( "__kmpc_end_serialized_parallel: T#%d decreasing nesting depth of serial team %p to %d\n",
782  global_tid, serial_team, serial_team -> t.t_serialized ) );
783  }
784 #endif // OMP_30_ENABLED
785 
786  }
787 
788 // t_level is not available in 2.5 build, so check for OMP_30_ENABLED
789 #if USE_ITT_BUILD && OMP_30_ENABLED
790  // Mark the end of the "parallel" region for VTune. Only use one of frame notification scheme at the moment.
791  if ( ( __itt_frame_end_v3_ptr && __kmp_forkjoin_frames && ! __kmp_forkjoin_frames_mode ) || KMP_ITT_DEBUG )
792  {
793  this_thr->th.th_ident = loc;
794  __kmp_itt_region_joined( global_tid, 1 );
795  }
796  if( ( __kmp_forkjoin_frames_mode == 1 || __kmp_forkjoin_frames_mode == 3 ) && __itt_frame_submit_v3_ptr ) {
797  if( this_thr->th.th_team->t.t_level == 0 ) {
798  __kmp_itt_frame_submit( global_tid, this_thr->th.th_frame_time_serialized, __itt_timestamp_none, 0, loc );
799  }
800  }
801 #endif /* USE_ITT_BUILD */
802 
803  if ( __kmp_env_consistency_check )
804  __kmp_pop_parallel( global_tid, NULL );
805 }
806 
819 void
821 {
822  KC_TRACE( 10, ("__kmpc_flush: called\n" ) );
823 
824  /* need explicit __mf() here since use volatile instead in library */
825  KMP_MB(); /* Flush all pending memory write invalidates. */
826 
827  // This is not an OMP 3.0 feature.
828  // This macro is used here just not to let the change go to 10.1.
829  // This change will go to the mainline first.
830  #if OMP_30_ENABLED
831  #if ( KMP_ARCH_X86 || KMP_ARCH_X86_64 )
832  #if KMP_MIC
833  // fence-style instructions do not exist, but lock; xaddl $0,(%rsp) can be used.
834  // We shouldn't need it, though, since the ABI rules require that
835  // * If the compiler generates NGO stores it also generates the fence
836  // * If users hand-code NGO stores they should insert the fence
837  // therefore no incomplete unordered stores should be visible.
838  #else
839  // C74404
840  // This is to address non-temporal store instructions (sfence needed).
841  // The clflush instruction is addressed either (mfence needed).
842  // Probably the non-temporal load monvtdqa instruction should also be addressed.
843  // mfence is a SSE2 instruction. Do not execute it if CPU is not SSE2.
844  if ( ! __kmp_cpuinfo.initialized ) {
845  __kmp_query_cpuid( & __kmp_cpuinfo );
846  }; // if
847  if ( ! __kmp_cpuinfo.sse2 ) {
848  // CPU cannot execute SSE2 instructions.
849  } else {
850  #if KMP_COMPILER_ICC
851  _mm_mfence();
852  #else
853  __sync_synchronize();
854  #endif // KMP_COMPILER_ICC
855  }; // if
856  #endif // KMP_MIC
857  #elif KMP_ARCH_ARM
858  // Nothing yet
859  #else
860  #error Unknown or unsupported architecture
861  #endif
862  #endif // OMP_30_ENABLED
863 
864 }
865 
866 /* -------------------------------------------------------------------------- */
867 
868 /* -------------------------------------------------------------------------- */
869 
877 void
878 __kmpc_barrier(ident_t *loc, kmp_int32 global_tid)
879 {
880  int explicit_barrier_flag;
881  KC_TRACE( 10, ("__kmpc_barrier: called T#%d\n", global_tid ) );
882 
883  if (! TCR_4(__kmp_init_parallel))
884  __kmp_parallel_initialize();
885 
886  if ( __kmp_env_consistency_check ) {
887  if ( loc == 0 ) {
888  KMP_WARNING( ConstructIdentInvalid ); // ??? What does it mean for the user?
889  }; // if
890 
891  __kmp_check_barrier( global_tid, ct_barrier, loc );
892  }
893 
894  __kmp_threads[ global_tid ]->th.th_ident = loc;
895  // TODO: explicit barrier_wait_id:
896  // this function is called when 'barrier' directive is present or
897  // implicit barrier at the end of a worksharing construct.
898  // 1) better to add a per-thread barrier counter to a thread data structure
899  // 2) set to 0 when a new team is created
900  // 4) no sync is required
901 
902  __kmp_barrier( bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL );
903 }
904 
905 /* The BARRIER for a MASTER section is always explicit */
912 kmp_int32
913 __kmpc_master(ident_t *loc, kmp_int32 global_tid)
914 {
915  int status = 0;
916 
917  KC_TRACE( 10, ("__kmpc_master: called T#%d\n", global_tid ) );
918 
919  if( ! TCR_4( __kmp_init_parallel ) )
920  __kmp_parallel_initialize();
921 
922  if( KMP_MASTER_GTID( global_tid ))
923  status = 1;
924 
925  if ( __kmp_env_consistency_check ) {
926  if (status)
927  __kmp_push_sync( global_tid, ct_master, loc, NULL );
928  else
929  __kmp_check_sync( global_tid, ct_master, loc, NULL );
930  }
931 
932  return status;
933 }
934 
943 void
944 __kmpc_end_master(ident_t *loc, kmp_int32 global_tid)
945 {
946  KC_TRACE( 10, ("__kmpc_end_master: called T#%d\n", global_tid ) );
947 
948  KMP_DEBUG_ASSERT( KMP_MASTER_GTID( global_tid ));
949 
950  if ( __kmp_env_consistency_check ) {
951  if( global_tid < 0 )
952  KMP_WARNING( ThreadIdentInvalid );
953 
954  if( KMP_MASTER_GTID( global_tid ))
955  __kmp_pop_sync( global_tid, ct_master, loc );
956  }
957 }
958 
966 void
967 __kmpc_ordered( ident_t * loc, kmp_int32 gtid )
968 {
969  int cid = 0;
970  kmp_info_t *th;
971  KMP_DEBUG_ASSERT( __kmp_init_serial );
972 
973  KC_TRACE( 10, ("__kmpc_ordered: called T#%d\n", gtid ));
974 
975  if (! TCR_4(__kmp_init_parallel))
976  __kmp_parallel_initialize();
977 
978 #if USE_ITT_BUILD
979  __kmp_itt_ordered_prep( gtid );
980  // TODO: ordered_wait_id
981 #endif /* USE_ITT_BUILD */
982 
983  th = __kmp_threads[ gtid ];
984 
985  if ( th -> th.th_dispatch -> th_deo_fcn != 0 )
986  (*th->th.th_dispatch->th_deo_fcn)( & gtid, & cid, loc );
987  else
988  __kmp_parallel_deo( & gtid, & cid, loc );
989 
990 #if USE_ITT_BUILD
991  __kmp_itt_ordered_start( gtid );
992 #endif /* USE_ITT_BUILD */
993 }
994 
1002 void
1003 __kmpc_end_ordered( ident_t * loc, kmp_int32 gtid )
1004 {
1005  int cid = 0;
1006  kmp_info_t *th;
1007 
1008  KC_TRACE( 10, ("__kmpc_end_ordered: called T#%d\n", gtid ) );
1009 
1010 #if USE_ITT_BUILD
1011  __kmp_itt_ordered_end( gtid );
1012  // TODO: ordered_wait_id
1013 #endif /* USE_ITT_BUILD */
1014 
1015  th = __kmp_threads[ gtid ];
1016 
1017  if ( th -> th.th_dispatch -> th_dxo_fcn != 0 )
1018  (*th->th.th_dispatch->th_dxo_fcn)( & gtid, & cid, loc );
1019  else
1020  __kmp_parallel_dxo( & gtid, & cid, loc );
1021 }
1022 
1023 inline void
1024 __kmp_static_yield( int arg ) { // AC: needed in macro __kmp_acquire_user_lock_with_checks
1025  __kmp_yield( arg );
1026 }
1027 
1028 static kmp_user_lock_p
1029 __kmp_get_critical_section_ptr( kmp_critical_name * crit, ident_t const * loc, kmp_int32 gtid )
1030 {
1031  kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit;
1032 
1033  //
1034  // Because of the double-check, the following load
1035  // doesn't need to be volatile.
1036  //
1037  kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR( *lck_pp );
1038 
1039  if ( lck == NULL ) {
1040  void * idx;
1041 
1042  // Allocate & initialize the lock.
1043  // Remember allocated locks in table in order to free them in __kmp_cleanup()
1044  lck = __kmp_user_lock_allocate( &idx, gtid, kmp_lf_critical_section );
1045  __kmp_init_user_lock_with_checks( lck );
1046  __kmp_set_user_lock_location( lck, loc );
1047 #if USE_ITT_BUILD
1048  __kmp_itt_critical_creating( lck );
1049  // __kmp_itt_critical_creating() should be called *before* the first usage of underlying
1050  // lock. It is the only place where we can guarantee it. There are chances the lock will
1051  // destroyed with no usage, but it is not a problem, because this is not real event seen
1052  // by user but rather setting name for object (lock). See more details in kmp_itt.h.
1053 #endif /* USE_ITT_BUILD */
1054 
1055  //
1056  // Use a cmpxchg instruction to slam the start of the critical
1057  // section with the lock pointer. If another thread beat us
1058  // to it, deallocate the lock, and use the lock that the other
1059  // thread allocated.
1060  //
1061  int status = KMP_COMPARE_AND_STORE_PTR( lck_pp, 0, lck );
1062 
1063  if ( status == 0 ) {
1064  // Deallocate the lock and reload the value.
1065 #if USE_ITT_BUILD
1066  __kmp_itt_critical_destroyed( lck );
1067  // Let ITT know the lock is destroyed and the same memory location may be reused for
1068  // another purpose.
1069 #endif /* USE_ITT_BUILD */
1070  __kmp_destroy_user_lock_with_checks( lck );
1071  __kmp_user_lock_free( &idx, gtid, lck );
1072  lck = (kmp_user_lock_p)TCR_PTR( *lck_pp );
1073  KMP_DEBUG_ASSERT( lck != NULL );
1074  }
1075  }
1076  return lck;
1077 }
1078 
1089 void
1090 __kmpc_critical( ident_t * loc, kmp_int32 global_tid, kmp_critical_name * crit ) {
1091 
1092  kmp_user_lock_p lck;
1093 
1094  KC_TRACE( 10, ("__kmpc_critical: called T#%d\n", global_tid ) );
1095 
1096  //TODO: add THR_OVHD_STATE
1097 
1098  KMP_CHECK_USER_LOCK_INIT();
1099 
1100  if ( ( __kmp_user_lock_kind == lk_tas )
1101  && ( sizeof( lck->tas.lk.poll ) <= OMP_CRITICAL_SIZE ) ) {
1102  lck = (kmp_user_lock_p)crit;
1103  }
1104 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
1105  else if ( ( __kmp_user_lock_kind == lk_futex )
1106  && ( sizeof( lck->futex.lk.poll ) <= OMP_CRITICAL_SIZE ) ) {
1107  lck = (kmp_user_lock_p)crit;
1108  }
1109 #endif
1110  else { // ticket, queuing or drdpa
1111  lck = __kmp_get_critical_section_ptr( crit, loc, global_tid );
1112  }
1113 
1114  if ( __kmp_env_consistency_check )
1115  __kmp_push_sync( global_tid, ct_critical, loc, lck );
1116 
1117  /* since the critical directive binds to all threads, not just
1118  * the current team we have to check this even if we are in a
1119  * serialized team */
1120  /* also, even if we are the uber thread, we still have to conduct the lock,
1121  * as we have to contend with sibling threads */
1122 
1123 #if USE_ITT_BUILD
1124  __kmp_itt_critical_acquiring( lck );
1125 #endif /* USE_ITT_BUILD */
1126  // Value of 'crit' should be good for using as a critical_id of the critical section directive.
1127 
1128  __kmp_acquire_user_lock_with_checks( lck, global_tid );
1129 
1130 #if USE_ITT_BUILD
1131  __kmp_itt_critical_acquired( lck );
1132 #endif /* USE_ITT_BUILD */
1133 
1134  KA_TRACE( 15, ("__kmpc_critical: done T#%d\n", global_tid ));
1135 } // __kmpc_critical
1136 
1146 void
1147 __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
1148 {
1149  kmp_user_lock_p lck;
1150 
1151  KC_TRACE( 10, ("__kmpc_end_critical: called T#%d\n", global_tid ));
1152 
1153  if ( ( __kmp_user_lock_kind == lk_tas )
1154  && ( sizeof( lck->tas.lk.poll ) <= OMP_CRITICAL_SIZE ) ) {
1155  lck = (kmp_user_lock_p)crit;
1156  }
1157 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
1158  else if ( ( __kmp_user_lock_kind == lk_futex )
1159  && ( sizeof( lck->futex.lk.poll ) <= OMP_CRITICAL_SIZE ) ) {
1160  lck = (kmp_user_lock_p)crit;
1161  }
1162 #endif
1163  else { // ticket, queuing or drdpa
1164  lck = (kmp_user_lock_p) TCR_PTR(*((kmp_user_lock_p *)crit));
1165  }
1166 
1167  KMP_ASSERT(lck != NULL);
1168 
1169  if ( __kmp_env_consistency_check )
1170  __kmp_pop_sync( global_tid, ct_critical, loc );
1171 
1172 #if USE_ITT_BUILD
1173  __kmp_itt_critical_releasing( lck );
1174 #endif /* USE_ITT_BUILD */
1175  // Value of 'crit' should be good for using as a critical_id of the critical section directive.
1176 
1177  __kmp_release_user_lock_with_checks( lck, global_tid );
1178 
1179  KA_TRACE( 15, ("__kmpc_end_critical: done T#%d\n", global_tid ));
1180 }
1181 
1190 kmp_int32
1191 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid)
1192 {
1193  int status;
1194 
1195  KC_TRACE( 10, ("__kmpc_barrier_master: called T#%d\n", global_tid ) );
1196 
1197  if (! TCR_4(__kmp_init_parallel))
1198  __kmp_parallel_initialize();
1199 
1200  if ( __kmp_env_consistency_check )
1201  __kmp_check_barrier( global_tid, ct_barrier, loc );
1202 
1203  status = __kmp_barrier( bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL );
1204 
1205  return (status != 0) ? 0 : 1;
1206 }
1207 
1217 void
1218 __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid)
1219 {
1220  KC_TRACE( 10, ("__kmpc_end_barrier_master: called T#%d\n", global_tid ));
1221 
1222  __kmp_end_split_barrier ( bs_plain_barrier, global_tid );
1223 }
1224 
1235 kmp_int32
1236 __kmpc_barrier_master_nowait( ident_t * loc, kmp_int32 global_tid )
1237 {
1238  kmp_int32 ret;
1239 
1240  KC_TRACE( 10, ("__kmpc_barrier_master_nowait: called T#%d\n", global_tid ));
1241 
1242  if (! TCR_4(__kmp_init_parallel))
1243  __kmp_parallel_initialize();
1244 
1245  if ( __kmp_env_consistency_check ) {
1246  if ( loc == 0 ) {
1247  KMP_WARNING( ConstructIdentInvalid ); // ??? What does it mean for the user?
1248  }
1249  __kmp_check_barrier( global_tid, ct_barrier, loc );
1250  }
1251 
1252  __kmp_barrier( bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL );
1253 
1254  ret = __kmpc_master (loc, global_tid);
1255 
1256  if ( __kmp_env_consistency_check ) {
1257  /* there's no __kmpc_end_master called; so the (stats) */
1258  /* actions of __kmpc_end_master are done here */
1259 
1260  if ( global_tid < 0 ) {
1261  KMP_WARNING( ThreadIdentInvalid );
1262  }
1263  if (ret) {
1264  /* only one thread should do the pop since only */
1265  /* one did the push (see __kmpc_master()) */
1266 
1267  __kmp_pop_sync( global_tid, ct_master, loc );
1268  }
1269  }
1270 
1271  return (ret);
1272 }
1273 
1274 /* The BARRIER for a SINGLE process section is always explicit */
1286 kmp_int32
1287 __kmpc_single(ident_t *loc, kmp_int32 global_tid)
1288 {
1289  kmp_int32 rc = __kmp_enter_single( global_tid, loc, TRUE );
1290  return rc;
1291 }
1292 
1302 void
1303 __kmpc_end_single(ident_t *loc, kmp_int32 global_tid)
1304 {
1305  __kmp_exit_single( global_tid );
1306 }
1307 
1315 void
1316 __kmpc_for_static_fini( ident_t *loc, kmp_int32 global_tid )
1317 {
1318  KE_TRACE( 10, ("__kmpc_for_static_fini called T#%d\n", global_tid));
1319 
1320  if ( __kmp_env_consistency_check )
1321  __kmp_pop_workshare( global_tid, ct_pdo, loc );
1322 }
1323 
1324 /*
1325  * User routines which take C-style arguments (call by value)
1326  * different from the Fortran equivalent routines
1327  */
1328 
1329 void
1330 ompc_set_num_threads( int arg )
1331 {
1332 // !!!!! TODO: check the per-task binding
1333  __kmp_set_num_threads( arg, __kmp_entry_gtid() );
1334 }
1335 
1336 void
1337 ompc_set_dynamic( int flag )
1338 {
1339  kmp_info_t *thread;
1340 
1341  /* For the thread-private implementation of the internal controls */
1342  thread = __kmp_entry_thread();
1343 
1344  __kmp_save_internal_controls( thread );
1345 
1346  set__dynamic( thread, flag ? TRUE : FALSE );
1347 }
1348 
1349 void
1350 ompc_set_nested( int flag )
1351 {
1352  kmp_info_t *thread;
1353 
1354  /* For the thread-private internal controls implementation */
1355  thread = __kmp_entry_thread();
1356 
1357  __kmp_save_internal_controls( thread );
1358 
1359  set__nested( thread, flag ? TRUE : FALSE );
1360 }
1361 
1362 #if OMP_30_ENABLED
1363 
1364 void
1365 ompc_set_max_active_levels( int max_active_levels )
1366 {
1367  /* TO DO */
1368  /* we want per-task implementation of this internal control */
1369 
1370  /* For the per-thread internal controls implementation */
1371  __kmp_set_max_active_levels( __kmp_entry_gtid(), max_active_levels );
1372 }
1373 
1374 void
1375 ompc_set_schedule( omp_sched_t kind, int modifier )
1376 {
1377 // !!!!! TODO: check the per-task binding
1378  __kmp_set_schedule( __kmp_entry_gtid(), ( kmp_sched_t ) kind, modifier );
1379 }
1380 
1381 int
1382 ompc_get_ancestor_thread_num( int level )
1383 {
1384  return __kmp_get_ancestor_thread_num( __kmp_entry_gtid(), level );
1385 }
1386 
1387 int
1388 ompc_get_team_size( int level )
1389 {
1390  return __kmp_get_team_size( __kmp_entry_gtid(), level );
1391 }
1392 
1393 #endif // OMP_30_ENABLED
1394 
1395 void
1396 kmpc_set_stacksize( int arg )
1397 {
1398  // __kmp_aux_set_stacksize initializes the library if needed
1399  __kmp_aux_set_stacksize( arg );
1400 }
1401 
1402 void
1403 kmpc_set_stacksize_s( size_t arg )
1404 {
1405  // __kmp_aux_set_stacksize initializes the library if needed
1406  __kmp_aux_set_stacksize( arg );
1407 }
1408 
1409 void
1410 kmpc_set_blocktime( int arg )
1411 {
1412  int gtid, tid;
1413  kmp_info_t *thread;
1414 
1415  gtid = __kmp_entry_gtid();
1416  tid = __kmp_tid_from_gtid(gtid);
1417  thread = __kmp_thread_from_gtid(gtid);
1418 
1419  __kmp_aux_set_blocktime( arg, thread, tid );
1420 }
1421 
1422 void
1423 kmpc_set_library( int arg )
1424 {
1425  // __kmp_user_set_library initializes the library if needed
1426  __kmp_user_set_library( (enum library_type)arg );
1427 }
1428 
1429 void
1430 kmpc_set_defaults( char const * str )
1431 {
1432  // __kmp_aux_set_defaults initializes the library if needed
1433  __kmp_aux_set_defaults( str, strlen( str ) );
1434 }
1435 
1436 #ifdef OMP_30_ENABLED
1437 
1438 int
1439 kmpc_set_affinity_mask_proc( int proc, void **mask )
1440 {
1441 #if defined(KMP_STUB) || !(KMP_OS_WINDOWS || KMP_OS_LINUX)
1442  return -1;
1443 #else
1444  if ( ! TCR_4(__kmp_init_middle) ) {
1445  __kmp_middle_initialize();
1446  }
1447  return __kmp_aux_set_affinity_mask_proc( proc, mask );
1448 #endif
1449 }
1450 
1451 int
1452 kmpc_unset_affinity_mask_proc( int proc, void **mask )
1453 {
1454 #if defined(KMP_STUB) || !(KMP_OS_WINDOWS || KMP_OS_LINUX)
1455  return -1;
1456 #else
1457  if ( ! TCR_4(__kmp_init_middle) ) {
1458  __kmp_middle_initialize();
1459  }
1460  return __kmp_aux_unset_affinity_mask_proc( proc, mask );
1461 #endif
1462 }
1463 
1464 int
1465 kmpc_get_affinity_mask_proc( int proc, void **mask )
1466 {
1467 #if defined(KMP_STUB) || !(KMP_OS_WINDOWS || KMP_OS_LINUX)
1468  return -1;
1469 #else
1470  if ( ! TCR_4(__kmp_init_middle) ) {
1471  __kmp_middle_initialize();
1472  }
1473  return __kmp_aux_get_affinity_mask_proc( proc, mask );
1474 #endif
1475 }
1476 
1477 #endif /* OMP_30_ENABLED */
1478 
1479 /* -------------------------------------------------------------------------- */
1520 void
1521 __kmpc_copyprivate( ident_t *loc, kmp_int32 gtid, size_t cpy_size, void *cpy_data, void(*cpy_func)(void*,void*), kmp_int32 didit )
1522 {
1523  void **data_ptr;
1524 
1525  KC_TRACE( 10, ("__kmpc_copyprivate: called T#%d\n", gtid ));
1526 
1527  KMP_MB();
1528 
1529  data_ptr = & __kmp_team_from_gtid( gtid )->t.t_copypriv_data;
1530 
1531  if ( __kmp_env_consistency_check ) {
1532  if ( loc == 0 ) {
1533  KMP_WARNING( ConstructIdentInvalid );
1534  }
1535  }
1536 
1537  /* ToDo: Optimize the following two barriers into some kind of split barrier */
1538 
1539  if (didit) *data_ptr = cpy_data;
1540 
1541  /* This barrier is not a barrier region boundary */
1542  __kmp_barrier( bs_plain_barrier, gtid, FALSE , 0, NULL, NULL );
1543 
1544  if (! didit) (*cpy_func)( cpy_data, *data_ptr );
1545 
1546  /* Consider next barrier the user-visible barrier for barrier region boundaries */
1547  /* Nesting checks are already handled by the single construct checks */
1548 
1549  __kmp_barrier( bs_plain_barrier, gtid, FALSE , 0, NULL, NULL );
1550 }
1551 
1552 /* -------------------------------------------------------------------------- */
1553 
1554 #define INIT_LOCK __kmp_init_user_lock_with_checks
1555 #define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks
1556 #define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks
1557 #define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed
1558 #define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks
1559 #define ACQUIRE_NESTED_LOCK_TIMED __kmp_acquire_nested_user_lock_with_checks_timed
1560 #define RELEASE_LOCK __kmp_release_user_lock_with_checks
1561 #define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks
1562 #define TEST_LOCK __kmp_test_user_lock_with_checks
1563 #define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks
1564 #define DESTROY_LOCK __kmp_destroy_user_lock_with_checks
1565 #define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks
1566 
1567 
1568 /*
1569  * TODO: Make check abort messages use location info & pass it
1570  * into with_checks routines
1571  */
1572 
1573 /* initialize the lock */
1574 void
1575 __kmpc_init_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) {
1576  static char const * const func = "omp_init_lock";
1577  kmp_user_lock_p lck;
1578  KMP_DEBUG_ASSERT( __kmp_init_serial );
1579 
1580  if ( __kmp_env_consistency_check ) {
1581  if ( user_lock == NULL ) {
1582  KMP_FATAL( LockIsUninitialized, func );
1583  }
1584  }
1585 
1586  KMP_CHECK_USER_LOCK_INIT();
1587 
1588  if ( ( __kmp_user_lock_kind == lk_tas )
1589  && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
1590  lck = (kmp_user_lock_p)user_lock;
1591  }
1592 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
1593  else if ( ( __kmp_user_lock_kind == lk_futex )
1594  && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
1595  lck = (kmp_user_lock_p)user_lock;
1596  }
1597 #endif
1598  else {
1599  lck = __kmp_user_lock_allocate( user_lock, gtid, 0 );
1600  }
1601  INIT_LOCK( lck );
1602  __kmp_set_user_lock_location( lck, loc );
1603 
1604 #if USE_ITT_BUILD
1605  __kmp_itt_lock_creating( lck );
1606 #endif /* USE_ITT_BUILD */
1607 } // __kmpc_init_lock
1608 
1609 /* initialize the lock */
1610 void
1611 __kmpc_init_nest_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) {
1612  static char const * const func = "omp_init_nest_lock";
1613  kmp_user_lock_p lck;
1614  KMP_DEBUG_ASSERT( __kmp_init_serial );
1615 
1616  if ( __kmp_env_consistency_check ) {
1617  if ( user_lock == NULL ) {
1618  KMP_FATAL( LockIsUninitialized, func );
1619  }
1620  }
1621 
1622  KMP_CHECK_USER_LOCK_INIT();
1623 
1624  if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll )
1625  + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
1626  lck = (kmp_user_lock_p)user_lock;
1627  }
1628 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
1629  else if ( ( __kmp_user_lock_kind == lk_futex )
1630  && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked )
1631  <= OMP_NEST_LOCK_T_SIZE ) ) {
1632  lck = (kmp_user_lock_p)user_lock;
1633  }
1634 #endif
1635  else {
1636  lck = __kmp_user_lock_allocate( user_lock, gtid, 0 );
1637  }
1638 
1639  INIT_NESTED_LOCK( lck );
1640  __kmp_set_user_lock_location( lck, loc );
1641 
1642 #if USE_ITT_BUILD
1643  __kmp_itt_lock_creating( lck );
1644 #endif /* USE_ITT_BUILD */
1645 } // __kmpc_init_nest_lock
1646 
1647 void
1648 __kmpc_destroy_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) {
1649 
1650  kmp_user_lock_p lck;
1651 
1652  if ( ( __kmp_user_lock_kind == lk_tas )
1653  && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
1654  lck = (kmp_user_lock_p)user_lock;
1655  }
1656 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
1657  else if ( ( __kmp_user_lock_kind == lk_futex )
1658  && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
1659  lck = (kmp_user_lock_p)user_lock;
1660  }
1661 #endif
1662  else {
1663  lck = __kmp_lookup_user_lock( user_lock, "omp_destroy_lock" );
1664  }
1665 
1666 #if USE_ITT_BUILD
1667  __kmp_itt_lock_destroyed( lck );
1668 #endif /* USE_ITT_BUILD */
1669  DESTROY_LOCK( lck );
1670 
1671  if ( ( __kmp_user_lock_kind == lk_tas )
1672  && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
1673  ;
1674  }
1675 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
1676  else if ( ( __kmp_user_lock_kind == lk_futex )
1677  && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
1678  ;
1679  }
1680 #endif
1681  else {
1682  __kmp_user_lock_free( user_lock, gtid, lck );
1683  }
1684 } // __kmpc_destroy_lock
1685 
1686 /* destroy the lock */
1687 void
1688 __kmpc_destroy_nest_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) {
1689 
1690  kmp_user_lock_p lck;
1691 
1692  if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll )
1693  + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
1694  lck = (kmp_user_lock_p)user_lock;
1695  }
1696 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
1697  else if ( ( __kmp_user_lock_kind == lk_futex )
1698  && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked )
1699  <= OMP_NEST_LOCK_T_SIZE ) ) {
1700  lck = (kmp_user_lock_p)user_lock;
1701  }
1702 #endif
1703  else {
1704  lck = __kmp_lookup_user_lock( user_lock, "omp_destroy_nest_lock" );
1705  }
1706 
1707 #if USE_ITT_BUILD
1708  __kmp_itt_lock_destroyed( lck );
1709 #endif /* USE_ITT_BUILD */
1710 
1711  DESTROY_NESTED_LOCK( lck );
1712 
1713  if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll )
1714  + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
1715  ;
1716  }
1717 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
1718  else if ( ( __kmp_user_lock_kind == lk_futex )
1719  && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked )
1720  <= OMP_NEST_LOCK_T_SIZE ) ) {
1721  ;
1722  }
1723 #endif
1724  else {
1725  __kmp_user_lock_free( user_lock, gtid, lck );
1726  }
1727 } // __kmpc_destroy_nest_lock
1728 
1729 void
1730 __kmpc_set_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) {
1731  kmp_user_lock_p lck;
1732 
1733  if ( ( __kmp_user_lock_kind == lk_tas )
1734  && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
1735  lck = (kmp_user_lock_p)user_lock;
1736  }
1737 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
1738  else if ( ( __kmp_user_lock_kind == lk_futex )
1739  && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
1740  lck = (kmp_user_lock_p)user_lock;
1741  }
1742 #endif
1743  else {
1744  lck = __kmp_lookup_user_lock( user_lock, "omp_set_lock" );
1745  }
1746 
1747 #if USE_ITT_BUILD
1748  __kmp_itt_lock_acquiring( lck );
1749 #endif /* USE_ITT_BUILD */
1750 
1751  ACQUIRE_LOCK( lck, gtid );
1752 
1753 #if USE_ITT_BUILD
1754  __kmp_itt_lock_acquired( lck );
1755 #endif /* USE_ITT_BUILD */
1756 }
1757 
1758 
1759 void
1760 __kmpc_set_nest_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) {
1761  kmp_user_lock_p lck;
1762 
1763  if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll )
1764  + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
1765  lck = (kmp_user_lock_p)user_lock;
1766  }
1767 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
1768  else if ( ( __kmp_user_lock_kind == lk_futex )
1769  && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked )
1770  <= OMP_NEST_LOCK_T_SIZE ) ) {
1771  lck = (kmp_user_lock_p)user_lock;
1772  }
1773 #endif
1774  else {
1775  lck = __kmp_lookup_user_lock( user_lock, "omp_set_nest_lock" );
1776  }
1777 
1778 #if USE_ITT_BUILD
1779  __kmp_itt_lock_acquiring( lck );
1780 #endif /* USE_ITT_BUILD */
1781 
1782  ACQUIRE_NESTED_LOCK( lck, gtid );
1783 
1784 #if USE_ITT_BUILD
1785  __kmp_itt_lock_acquired( lck );
1786 #endif /* USE_ITT_BUILD */
1787 }
1788 
1789 void
1790 __kmpc_unset_lock( ident_t *loc, kmp_int32 gtid, void **user_lock )
1791 {
1792  kmp_user_lock_p lck;
1793 
1794  /* Can't use serial interval since not block structured */
1795  /* release the lock */
1796 
1797  if ( ( __kmp_user_lock_kind == lk_tas )
1798  && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
1799 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
1800  // "fast" path implemented to fix customer performance issue
1801 #if USE_ITT_BUILD
1802  __kmp_itt_lock_releasing( (kmp_user_lock_p)user_lock );
1803 #endif /* USE_ITT_BUILD */
1804  TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0);
1805  KMP_MB();
1806  return;
1807 #else
1808  lck = (kmp_user_lock_p)user_lock;
1809 #endif
1810  }
1811 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
1812  else if ( ( __kmp_user_lock_kind == lk_futex )
1813  && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
1814  lck = (kmp_user_lock_p)user_lock;
1815  }
1816 #endif
1817  else {
1818  lck = __kmp_lookup_user_lock( user_lock, "omp_unset_lock" );
1819  }
1820 
1821 #if USE_ITT_BUILD
1822  __kmp_itt_lock_releasing( lck );
1823 #endif /* USE_ITT_BUILD */
1824 
1825  RELEASE_LOCK( lck, gtid );
1826 }
1827 
1828 /* release the lock */
1829 void
1830 __kmpc_unset_nest_lock( ident_t *loc, kmp_int32 gtid, void **user_lock )
1831 {
1832  kmp_user_lock_p lck;
1833 
1834  /* Can't use serial interval since not block structured */
1835 
1836  if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll )
1837  + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
1838 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
1839  // "fast" path implemented to fix customer performance issue
1840  kmp_tas_lock_t *tl = (kmp_tas_lock_t*)user_lock;
1841 #if USE_ITT_BUILD
1842  __kmp_itt_lock_releasing( (kmp_user_lock_p)user_lock );
1843 #endif /* USE_ITT_BUILD */
1844  if ( --(tl->lk.depth_locked) == 0 ) {
1845  TCW_4(tl->lk.poll, 0);
1846  }
1847  KMP_MB();
1848  return;
1849 #else
1850  lck = (kmp_user_lock_p)user_lock;
1851 #endif
1852  }
1853 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
1854  else if ( ( __kmp_user_lock_kind == lk_futex )
1855  && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked )
1856  <= OMP_NEST_LOCK_T_SIZE ) ) {
1857  lck = (kmp_user_lock_p)user_lock;
1858  }
1859 #endif
1860  else {
1861  lck = __kmp_lookup_user_lock( user_lock, "omp_unset_nest_lock" );
1862  }
1863 
1864 #if USE_ITT_BUILD
1865  __kmp_itt_lock_releasing( lck );
1866 #endif /* USE_ITT_BUILD */
1867 
1868  RELEASE_NESTED_LOCK( lck, gtid );
1869 }
1870 
1871 /* try to acquire the lock */
1872 int
1873 __kmpc_test_lock( ident_t *loc, kmp_int32 gtid, void **user_lock )
1874 {
1875  kmp_user_lock_p lck;
1876  int rc;
1877 
1878  if ( ( __kmp_user_lock_kind == lk_tas )
1879  && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
1880  lck = (kmp_user_lock_p)user_lock;
1881  }
1882 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
1883  else if ( ( __kmp_user_lock_kind == lk_futex )
1884  && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
1885  lck = (kmp_user_lock_p)user_lock;
1886  }
1887 #endif
1888  else {
1889  lck = __kmp_lookup_user_lock( user_lock, "omp_test_lock" );
1890  }
1891 
1892 #if USE_ITT_BUILD
1893  __kmp_itt_lock_acquiring( lck );
1894 #endif /* USE_ITT_BUILD */
1895 
1896  rc = TEST_LOCK( lck, gtid );
1897 #if USE_ITT_BUILD
1898  if ( rc ) {
1899  __kmp_itt_lock_acquired( lck );
1900  } else {
1901  __kmp_itt_lock_cancelled( lck );
1902  }
1903 #endif /* USE_ITT_BUILD */
1904  return ( rc ? FTN_TRUE : FTN_FALSE );
1905 
1906  /* Can't use serial interval since not block structured */
1907 }
1908 
1909 /* try to acquire the lock */
1910 int
1911 __kmpc_test_nest_lock( ident_t *loc, kmp_int32 gtid, void **user_lock )
1912 {
1913  kmp_user_lock_p lck;
1914  int rc;
1915 
1916  if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll )
1917  + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
1918  lck = (kmp_user_lock_p)user_lock;
1919  }
1920 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
1921  else if ( ( __kmp_user_lock_kind == lk_futex )
1922  && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked )
1923  <= OMP_NEST_LOCK_T_SIZE ) ) {
1924  lck = (kmp_user_lock_p)user_lock;
1925  }
1926 #endif
1927  else {
1928  lck = __kmp_lookup_user_lock( user_lock, "omp_test_nest_lock" );
1929  }
1930 
1931 #if USE_ITT_BUILD
1932  __kmp_itt_lock_acquiring( lck );
1933 #endif /* USE_ITT_BUILD */
1934 
1935  rc = TEST_NESTED_LOCK( lck, gtid );
1936 #if USE_ITT_BUILD
1937  if ( rc ) {
1938  __kmp_itt_lock_acquired( lck );
1939  } else {
1940  __kmp_itt_lock_cancelled( lck );
1941  }
1942 #endif /* USE_ITT_BUILD */
1943  return rc;
1944 
1945  /* Can't use serial interval since not block structured */
1946 }
1947 
1948 
1949 /*--------------------------------------------------------------------------------------------------------------------*/
1950 
1951 /*
1952  * Interface to fast scalable reduce methods routines
1953  */
1954 
1955 // keep the selected method in a thread local structure for cross-function usage: will be used in __kmpc_end_reduce* functions;
1956 // another solution: to re-determine the method one more time in __kmpc_end_reduce* functions (new prototype required then)
1957 // AT: which solution is better?
1958 #define __KMP_SET_REDUCTION_METHOD(gtid,rmethod) \
1959  ( ( __kmp_threads[ ( gtid ) ] -> th.th_local.packed_reduction_method ) = ( rmethod ) )
1960 
1961 #define __KMP_GET_REDUCTION_METHOD(gtid) \
1962  ( __kmp_threads[ ( gtid ) ] -> th.th_local.packed_reduction_method )
1963 
1964 // description of the packed_reduction_method variable: look at the macros in kmp.h
1965 
1966 
1967 // used in a critical section reduce block
1968 static __forceinline void
1969 __kmp_enter_critical_section_reduce_block( ident_t * loc, kmp_int32 global_tid, kmp_critical_name * crit ) {
1970 
1971  // this lock was visible to a customer and to the thread profiler as a serial overhead span
1972  // (although it's used for an internal purpose only)
1973  // why was it visible in previous implementation?
1974  // should we keep it visible in new reduce block?
1975  kmp_user_lock_p lck;
1976 
1977  // We know that the fast reduction code is only emitted by Intel compilers
1978  // with 32 byte critical sections. If there isn't enough space, then we
1979  // have to use a pointer.
1980  if ( __kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE ) {
1981  lck = (kmp_user_lock_p)crit;
1982  }
1983  else {
1984  lck = __kmp_get_critical_section_ptr( crit, loc, global_tid );
1985  }
1986  KMP_DEBUG_ASSERT( lck != NULL );
1987 
1988  if ( __kmp_env_consistency_check )
1989  __kmp_push_sync( global_tid, ct_critical, loc, lck );
1990 
1991  __kmp_acquire_user_lock_with_checks( lck, global_tid );
1992 }
1993 
1994 // used in a critical section reduce block
1995 static __forceinline void
1996 __kmp_end_critical_section_reduce_block( ident_t * loc, kmp_int32 global_tid, kmp_critical_name * crit ) {
1997 
1998  kmp_user_lock_p lck;
1999 
2000  // We know that the fast reduction code is only emitted by Intel compilers with 32 byte critical
2001  // sections. If there isn't enough space, then we have to use a pointer.
2002  if ( __kmp_base_user_lock_size > 32 ) {
2003  lck = *( (kmp_user_lock_p *) crit );
2004  KMP_ASSERT( lck != NULL );
2005  } else {
2006  lck = (kmp_user_lock_p) crit;
2007  }
2008 
2009  if ( __kmp_env_consistency_check )
2010  __kmp_pop_sync( global_tid, ct_critical, loc );
2011 
2012  __kmp_release_user_lock_with_checks( lck, global_tid );
2013 
2014 } // __kmp_end_critical_section_reduce_block
2015 
2016 
2017 /* 2.a.i. Reduce Block without a terminating barrier */
2031 kmp_int32
2033  ident_t *loc, kmp_int32 global_tid,
2034  kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data),
2035  kmp_critical_name *lck ) {
2036 
2037  int retval;
2038  PACKED_REDUCTION_METHOD_T packed_reduction_method;
2039 
2040  KA_TRACE( 10, ( "__kmpc_reduce_nowait() enter: called T#%d\n", global_tid ) );
2041 
2042  // why do we need this initialization here at all?
2043  // Reduction clause can not be used as a stand-alone directive.
2044 
2045  // do not call __kmp_serial_initialize(), it will be called by __kmp_parallel_initialize() if needed
2046  // possible detection of false-positive race by the threadchecker ???
2047  if( ! TCR_4( __kmp_init_parallel ) )
2048  __kmp_parallel_initialize();
2049 
2050  // check correctness of reduce block nesting
2051  if ( __kmp_env_consistency_check )
2052  __kmp_push_sync( global_tid, ct_reduce, loc, NULL );
2053 
2054  // it's better to check an assertion ASSERT( thr_state == THR_WORK_STATE )
2055 
2056  // packed_reduction_method value will be reused by __kmp_end_reduce* function, the value should be kept in a variable
2057  // the variable should be either a construct-specific or thread-specific property, not a team specific property
2058  // (a thread can reach the next reduce block on the next construct, reduce method may differ on the next construct)
2059  // an ident_t "loc" parameter could be used as a construct-specific property (what if loc == 0?)
2060  // (if both construct-specific and team-specific variables were shared, then unness extra syncs should be needed)
2061  // a thread-specific variable is better regarding two issues above (next construct and extra syncs)
2062  // a thread-specific "th_local.reduction_method" variable is used currently
2063  // each thread executes 'determine' and 'set' lines (no need to execute by one thread, to avoid unness extra syncs)
2064 
2065  packed_reduction_method = __kmp_determine_reduction_method( loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck );
2066  __KMP_SET_REDUCTION_METHOD( global_tid, packed_reduction_method );
2067 
2068  if( packed_reduction_method == critical_reduce_block ) {
2069 
2070  __kmp_enter_critical_section_reduce_block( loc, global_tid, lck );
2071  retval = 1;
2072 
2073  } else if( packed_reduction_method == empty_reduce_block ) {
2074 
2075  // usage: if team size == 1, no synchronization is required ( Intel platforms only )
2076  retval = 1;
2077 
2078  } else if( packed_reduction_method == atomic_reduce_block ) {
2079 
2080  retval = 2;
2081 
2082  // all threads should do this pop here (because __kmpc_end_reduce_nowait() won't be called by the code gen)
2083  // (it's not quite good, because the checking block has been closed by this 'pop',
2084  // but atomic operation has not been executed yet, will be executed slightly later, literally on next instruction)
2085  if ( __kmp_env_consistency_check )
2086  __kmp_pop_sync( global_tid, ct_reduce, loc );
2087 
2088  } else if( TEST_REDUCTION_METHOD( packed_reduction_method, tree_reduce_block ) ) {
2089 
2090  //AT: performance issue: a real barrier here
2091  //AT: (if master goes slow, other threads are blocked here waiting for the master to come and release them)
2092  //AT: (it's not what a customer might expect specifying NOWAIT clause)
2093  //AT: (specifying NOWAIT won't result in improvement of performance, it'll be confusing to a customer)
2094  //AT: another implementation of *barrier_gather*nowait() (or some other design) might go faster
2095  // and be more in line with sense of NOWAIT
2096  //AT: TO DO: do epcc test and compare times
2097 
2098  // this barrier should be invisible to a customer and to the thread profiler
2099  // (it's neither a terminating barrier nor customer's code, it's used for an internal purpose)
2100  retval = __kmp_barrier( UNPACK_REDUCTION_BARRIER( packed_reduction_method ), global_tid, FALSE, reduce_size, reduce_data, reduce_func );
2101  retval = ( retval != 0 ) ? ( 0 ) : ( 1 );
2102 
2103  // all other workers except master should do this pop here
2104  // ( none of other workers will get to __kmpc_end_reduce_nowait() )
2105  if ( __kmp_env_consistency_check ) {
2106  if( retval == 0 ) {
2107  __kmp_pop_sync( global_tid, ct_reduce, loc );
2108  }
2109  }
2110 
2111  } else {
2112 
2113  // should never reach this block
2114  KMP_ASSERT( 0 ); // "unexpected method"
2115 
2116  }
2117 
2118  KA_TRACE( 10, ( "__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n", global_tid, packed_reduction_method, retval ) );
2119 
2120  return retval;
2121 }
2122 
2131 void
2132 __kmpc_end_reduce_nowait( ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck ) {
2133 
2134  PACKED_REDUCTION_METHOD_T packed_reduction_method;
2135 
2136  KA_TRACE( 10, ( "__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid ) );
2137 
2138  packed_reduction_method = __KMP_GET_REDUCTION_METHOD( global_tid );
2139 
2140  if( packed_reduction_method == critical_reduce_block ) {
2141 
2142  __kmp_end_critical_section_reduce_block( loc, global_tid, lck );
2143 
2144  } else if( packed_reduction_method == empty_reduce_block ) {
2145 
2146  // usage: if team size == 1, no synchronization is required ( on Intel platforms only )
2147 
2148  } else if( packed_reduction_method == atomic_reduce_block ) {
2149 
2150  // neither master nor other workers should get here
2151  // (code gen does not generate this call in case 2: atomic reduce block)
2152  // actually it's better to remove this elseif at all;
2153  // after removal this value will checked by the 'else' and will assert
2154 
2155  } else if( TEST_REDUCTION_METHOD( packed_reduction_method, tree_reduce_block ) ) {
2156 
2157  // only master gets here
2158 
2159  } else {
2160 
2161  // should never reach this block
2162  KMP_ASSERT( 0 ); // "unexpected method"
2163 
2164  }
2165 
2166  if ( __kmp_env_consistency_check )
2167  __kmp_pop_sync( global_tid, ct_reduce, loc );
2168 
2169  KA_TRACE( 10, ( "__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n", global_tid, packed_reduction_method ) );
2170 
2171  return;
2172 }
2173 
2174 /* 2.a.ii. Reduce Block with a terminating barrier */
2175 
2189 kmp_int32
2191  ident_t *loc, kmp_int32 global_tid,
2192  kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
2193  void (*reduce_func)(void *lhs_data, void *rhs_data),
2194  kmp_critical_name *lck )
2195 {
2196  int retval;
2197  PACKED_REDUCTION_METHOD_T packed_reduction_method;
2198 
2199  KA_TRACE( 10, ( "__kmpc_reduce() enter: called T#%d\n", global_tid ) );
2200 
2201  // why do we need this initialization here at all?
2202  // Reduction clause can not be a stand-alone directive.
2203 
2204  // do not call __kmp_serial_initialize(), it will be called by __kmp_parallel_initialize() if needed
2205  // possible detection of false-positive race by the threadchecker ???
2206  if( ! TCR_4( __kmp_init_parallel ) )
2207  __kmp_parallel_initialize();
2208 
2209  // check correctness of reduce block nesting
2210  if ( __kmp_env_consistency_check )
2211  __kmp_push_sync( global_tid, ct_reduce, loc, NULL );
2212 
2213  // it's better to check an assertion ASSERT( thr_state == THR_WORK_STATE )
2214 
2215  packed_reduction_method = __kmp_determine_reduction_method( loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck );
2216  __KMP_SET_REDUCTION_METHOD( global_tid, packed_reduction_method );
2217 
2218  if( packed_reduction_method == critical_reduce_block ) {
2219 
2220  __kmp_enter_critical_section_reduce_block( loc, global_tid, lck );
2221  retval = 1;
2222 
2223  } else if( packed_reduction_method == empty_reduce_block ) {
2224 
2225  // usage: if team size == 1, no synchronization is required ( Intel platforms only )
2226  retval = 1;
2227 
2228  } else if( packed_reduction_method == atomic_reduce_block ) {
2229 
2230  retval = 2;
2231 
2232  } else if( TEST_REDUCTION_METHOD( packed_reduction_method, tree_reduce_block ) ) {
2233 
2234  //case tree_reduce_block:
2235  // this barrier should be visible to a customer and to the thread profiler
2236  // (it's a terminating barrier on constructs if NOWAIT not specified)
2237  retval = __kmp_barrier( UNPACK_REDUCTION_BARRIER( packed_reduction_method ), global_tid, TRUE, reduce_size, reduce_data, reduce_func );
2238  retval = ( retval != 0 ) ? ( 0 ) : ( 1 );
2239 
2240  // all other workers except master should do this pop here
2241  // ( none of other workers except master will enter __kmpc_end_reduce() )
2242  if ( __kmp_env_consistency_check ) {
2243  if( retval == 0 ) { // 0: all other workers; 1: master
2244  __kmp_pop_sync( global_tid, ct_reduce, loc );
2245  }
2246  }
2247 
2248  } else {
2249 
2250  // should never reach this block
2251  KMP_ASSERT( 0 ); // "unexpected method"
2252 
2253  }
2254 
2255  KA_TRACE( 10, ( "__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n", global_tid, packed_reduction_method, retval ) );
2256 
2257  return retval;
2258 }
2259 
2269 void
2270 __kmpc_end_reduce( ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck ) {
2271 
2272  PACKED_REDUCTION_METHOD_T packed_reduction_method;
2273 
2274  KA_TRACE( 10, ( "__kmpc_end_reduce() enter: called T#%d\n", global_tid ) );
2275 
2276  packed_reduction_method = __KMP_GET_REDUCTION_METHOD( global_tid );
2277 
2278  // this barrier should be visible to a customer and to the thread profiler
2279  // (it's a terminating barrier on constructs if NOWAIT not specified)
2280 
2281  if( packed_reduction_method == critical_reduce_block ) {
2282 
2283  __kmp_end_critical_section_reduce_block( loc, global_tid, lck );
2284 
2285  // TODO: implicit barrier: should be exposed
2286  __kmp_barrier( bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL );
2287 
2288  } else if( packed_reduction_method == empty_reduce_block ) {
2289 
2290  // usage: if team size == 1, no synchronization is required ( Intel platforms only )
2291 
2292  // TODO: implicit barrier: should be exposed
2293  __kmp_barrier( bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL );
2294 
2295  } else if( packed_reduction_method == atomic_reduce_block ) {
2296 
2297  // TODO: implicit barrier: should be exposed
2298  __kmp_barrier( bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL );
2299 
2300  } else if( TEST_REDUCTION_METHOD( packed_reduction_method, tree_reduce_block ) ) {
2301 
2302  // only master executes here (master releases all other workers)
2303  __kmp_end_split_barrier( UNPACK_REDUCTION_BARRIER( packed_reduction_method ), global_tid );
2304 
2305  } else {
2306 
2307  // should never reach this block
2308  KMP_ASSERT( 0 ); // "unexpected method"
2309 
2310  }
2311 
2312  if ( __kmp_env_consistency_check )
2313  __kmp_pop_sync( global_tid, ct_reduce, loc );
2314 
2315  KA_TRACE( 10, ( "__kmpc_end_reduce() exit: called T#%d: method %08x\n", global_tid, packed_reduction_method ) );
2316 
2317  return;
2318 }
2319 
2320 #undef __KMP_GET_REDUCTION_METHOD
2321 #undef __KMP_SET_REDUCTION_METHOD
2322 
2323 /*-- end of interface to fast scalable reduce routines ---------------------------------------------------------------*/
2324 
2325 kmp_uint64
2326 __kmpc_get_taskid() {
2327 
2328  #if OMP_30_ENABLED
2329 
2330  kmp_int32 gtid;
2331  kmp_info_t * thread;
2332 
2333  gtid = __kmp_get_gtid();
2334  if ( gtid < 0 ) {
2335  return 0;
2336  }; // if
2337  thread = __kmp_thread_from_gtid( gtid );
2338  return thread->th.th_current_task->td_task_id;
2339 
2340  #else
2341 
2342  return 0;
2343 
2344  #endif
2345 
2346 } // __kmpc_get_taskid
2347 
2348 
2349 kmp_uint64
2350 __kmpc_get_parent_taskid() {
2351 
2352  #if OMP_30_ENABLED
2353 
2354  kmp_int32 gtid;
2355  kmp_info_t * thread;
2356  kmp_taskdata_t * parent_task;
2357 
2358  gtid = __kmp_get_gtid();
2359  if ( gtid < 0 ) {
2360  return 0;
2361  }; // if
2362  thread = __kmp_thread_from_gtid( gtid );
2363  parent_task = thread->th.th_current_task->td_parent;
2364  return ( parent_task == NULL ? 0 : parent_task->td_task_id );
2365 
2366  #else
2367 
2368  return 0;
2369 
2370  #endif
2371 
2372 } // __kmpc_get_parent_taskid
2373 
2374 void __kmpc_place_threads(int nC, int nT, int nO)
2375 {
2376 #if KMP_MIC
2377  if ( ! __kmp_init_serial ) {
2378  __kmp_serial_initialize();
2379  }
2380  __kmp_place_num_cores = nC;
2381  __kmp_place_num_threads_per_core = nT;
2382  __kmp_place_core_offset = nO;
2383 #endif
2384 }
2385 
2386 // end of file //
2387 
kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid)
Definition: kmp_csupport.c:913
kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid)
void(* kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
Definition: kmp.h:1260
kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
kmp_int32 __kmpc_global_thread_num(ident_t *loc)
Definition: kmp_csupport.c:113
void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end(ident_t *loc)
Definition: kmp_csupport.c:79
void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid)
void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
Definition: kmp_csupport.c:671
#define KMP_IDENT_AUTOPAR
Definition: kmp.h:183
void __kmpc_begin(ident_t *loc, kmp_int32 flags)
Definition: kmp_csupport.c:61
kmp_int32 __kmpc_bound_thread_num(ident_t *loc)
Definition: kmp_csupport.c:150
kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), kmp_int32 didit)
void __kmpc_ordered(ident_t *loc, kmp_int32 gtid)
Definition: kmp_csupport.c:967
void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
Definition: kmp.h:200
void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid)
Definition: kmp_csupport.c:944
void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads)
Definition: kmp_csupport.c:255
void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,...)
Definition: kmp_csupport.c:351
kmp_int32 __kmpc_in_parallel(ident_t *loc)
Definition: kmp_csupport.c:240
kmp_int32 __kmpc_ok_to_fork(ident_t *loc)
Definition: kmp_csupport.c:176
kmp_int32 __kmpc_global_num_threads(ident_t *loc)
Definition: kmp_csupport.c:136
kmp_int32 __kmpc_bound_num_threads(ident_t *loc)
Definition: kmp_csupport.c:162
void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck)
void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid)
Definition: kmp_csupport.c:878
void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck)
void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
void __kmpc_flush(ident_t *loc,...)
Definition: kmp_csupport.c:820
void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams, kmp_int32 num_threads)
Definition: kmp_csupport.c:333
kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid)
void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
Definition: kmp_csupport.c:413
void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,...)
Definition: kmp_csupport.c:296
char const * psource
Definition: kmp.h:209
kmp_int32 flags
Definition: kmp.h:202