40 #include "kmp_error.h"
42 #define MAX_MESSAGE 512
51 #define KMP_DEBUG_REF_CTS(x) KF_TRACE(1, x);
53 #define THREAD_ALLOC_FOR_TASKQ
56 __kmp_static_delay(
int arg )
59 #if KMP_ARCH_X86_64 && KMP_OS_LINUX
60 KMP_ASSERT( arg != 0 );
62 KMP_ASSERT( arg >= 0 );
67 __kmp_static_yield(
int arg )
73 in_parallel_context( kmp_team_t *team )
75 return ! team -> t.t_serialized;
79 __kmp_taskq_eo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref )
82 int tid = __kmp_tid_from_gtid( gtid );
85 kmpc_task_queue_t *taskq;
86 kmp_taskq_t *tq = & __kmp_threads[gtid] -> th.th_team -> t.t_taskq;
88 if ( __kmp_env_consistency_check )
89 __kmp_push_sync( gtid, ct_ordered_in_taskq, loc_ref, NULL );
91 if ( ! __kmp_threads[ gtid ]-> th.th_team -> t.t_serialized ) {
97 my_token =tq->tq_curr_thunk[ tid ]-> th_tasknum;
99 taskq = tq->tq_curr_thunk[ tid ]-> th.th_shareds -> sv_queue;
101 KMP_WAIT_YIELD(&taskq->tq_tasknum_serving, my_token, KMP_EQ, NULL);
107 __kmp_taskq_xo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref )
109 int gtid = *gtid_ref;
110 int tid = __kmp_tid_from_gtid( gtid );
112 kmp_taskq_t *tq = & __kmp_threads[gtid] -> th.th_team -> t.t_taskq;
114 if ( __kmp_env_consistency_check )
115 __kmp_pop_sync( gtid, ct_ordered_in_taskq, loc_ref );
117 if ( ! __kmp_threads[ gtid ]-> th.th_team -> t.t_serialized ) {
123 my_token = tq->tq_curr_thunk[ tid ]->th_tasknum;
127 tq->tq_curr_thunk[ tid ]-> th.th_shareds -> sv_queue -> tq_tasknum_serving = my_token + 1;
134 __kmp_taskq_check_ordered( kmp_int32 gtid, kmpc_thunk_t *thunk )
138 kmpc_task_queue_t *taskq;
144 my_token = thunk -> th_tasknum;
146 taskq = thunk -> th.th_shareds -> sv_queue;
148 if(taskq->tq_tasknum_serving <= my_token) {
149 KMP_WAIT_YIELD(&taskq->tq_tasknum_serving, my_token, KMP_GE, NULL);
151 taskq->tq_tasknum_serving = my_token +1;
157 __kmp_dump_TQF(kmp_int32 flags)
159 if (flags & TQF_IS_ORDERED)
160 __kmp_printf(
"ORDERED ");
161 if (flags & TQF_IS_LASTPRIVATE)
162 __kmp_printf(
"LAST_PRIV ");
163 if (flags & TQF_IS_NOWAIT)
164 __kmp_printf(
"NOWAIT ");
165 if (flags & TQF_HEURISTICS)
166 __kmp_printf(
"HEURIST ");
167 if (flags & TQF_INTERFACE_RESERVED1)
168 __kmp_printf(
"RESERV1 ");
169 if (flags & TQF_INTERFACE_RESERVED2)
170 __kmp_printf(
"RESERV2 ");
171 if (flags & TQF_INTERFACE_RESERVED3)
172 __kmp_printf(
"RESERV3 ");
173 if (flags & TQF_INTERFACE_RESERVED4)
174 __kmp_printf(
"RESERV4 ");
175 if (flags & TQF_IS_LAST_TASK)
176 __kmp_printf(
"LAST_TASK ");
177 if (flags & TQF_TASKQ_TASK)
178 __kmp_printf(
"TASKQ_TASK ");
179 if (flags & TQF_RELEASE_WORKERS)
180 __kmp_printf(
"RELEASE ");
181 if (flags & TQF_ALL_TASKS_QUEUED)
182 __kmp_printf(
"ALL_QUEUED ");
183 if (flags & TQF_PARALLEL_CONTEXT)
184 __kmp_printf(
"PARALLEL ");
185 if (flags & TQF_DEALLOCATED)
186 __kmp_printf(
"DEALLOC ");
187 if (!(flags & (TQF_INTERNAL_FLAGS|TQF_INTERFACE_FLAGS)))
188 __kmp_printf(
"(NONE)");
192 __kmp_dump_thunk( kmp_taskq_t *tq, kmpc_thunk_t *thunk, kmp_int32 global_tid )
195 int nproc = __kmp_threads[global_tid] -> th.th_team -> t.t_nproc;
197 __kmp_printf(
"\tThunk at %p on (%d): ", thunk, global_tid);
200 for (i = 0; i < nproc; i++) {
201 if( tq->tq_curr_thunk[i] == thunk ) {
202 __kmp_printf(
"[%i] ", i);
205 __kmp_printf(
"th_shareds=%p, ", thunk->th.th_shareds);
206 __kmp_printf(
"th_task=%p, ", thunk->th_task);
207 __kmp_printf(
"th_encl_thunk=%p, ", thunk->th_encl_thunk);
208 __kmp_printf(
"th_status=%d, ", thunk->th_status);
209 __kmp_printf(
"th_tasknum=%u, ", thunk->th_tasknum);
210 __kmp_printf(
"th_flags="); __kmp_dump_TQF(thunk->th_flags);
217 __kmp_dump_thunk_stack(kmpc_thunk_t *thunk, kmp_int32 thread_num)
221 __kmp_printf(
" Thunk stack for T#%d: ", thread_num);
223 for (th = thunk; th != NULL; th = th->th_encl_thunk )
224 __kmp_printf(
"%p ", th);
230 __kmp_dump_task_queue( kmp_taskq_t *tq, kmpc_task_queue_t *queue, kmp_int32 global_tid )
234 kmpc_task_queue_t *taskq;
236 __kmp_printf(
"Task Queue at %p on (%d):\n", queue, global_tid);
239 int in_parallel = queue->tq_flags & TQF_PARALLEL_CONTEXT;
241 if ( __kmp_env_consistency_check ) {
242 __kmp_printf(
" tq_loc : ");
254 __kmp_printf(
" tq_parent : %p\n", queue->tq.tq_parent);
255 __kmp_printf(
" tq_first_child : %p\n", queue->tq_first_child);
256 __kmp_printf(
" tq_next_child : %p\n", queue->tq_next_child);
257 __kmp_printf(
" tq_prev_child : %p\n", queue->tq_prev_child);
258 __kmp_printf(
" tq_ref_count : %d\n", queue->tq_ref_count);
272 __kmp_printf(
" tq_shareds : ");
273 for (i=0; i<((queue == tq->tq_root) ? queue->tq_nproc : 1); i++)
274 __kmp_printf(
"%p ", queue->tq_shareds[i].ai_data);
278 __kmp_printf(
" tq_tasknum_queuing : %u\n", queue->tq_tasknum_queuing);
279 __kmp_printf(
" tq_tasknum_serving : %u\n", queue->tq_tasknum_serving);
282 __kmp_printf(
" tq_queue : %p\n", queue->tq_queue);
283 __kmp_printf(
" tq_thunk_space : %p\n", queue->tq_thunk_space);
284 __kmp_printf(
" tq_taskq_slot : %p\n", queue->tq_taskq_slot);
286 __kmp_printf(
" tq_free_thunks : ");
287 for (thunk = queue->tq_free_thunks; thunk != NULL; thunk = thunk->th.th_next_free )
288 __kmp_printf(
"%p ", thunk);
291 __kmp_printf(
" tq_nslots : %d\n", queue->tq_nslots);
292 __kmp_printf(
" tq_head : %d\n", queue->tq_head);
293 __kmp_printf(
" tq_tail : %d\n", queue->tq_tail);
294 __kmp_printf(
" tq_nfull : %d\n", queue->tq_nfull);
295 __kmp_printf(
" tq_hiwat : %d\n", queue->tq_hiwat);
296 __kmp_printf(
" tq_flags : "); __kmp_dump_TQF(queue->tq_flags);
300 __kmp_printf(
" tq_th_thunks : ");
301 for (i = 0; i < queue->tq_nproc; i++) {
302 __kmp_printf(
"%d ", queue->tq_th_thunks[i].ai_data);
308 __kmp_printf(
" Queue slots:\n");
312 for ( count = 0; count < queue->tq_nfull; ++count ) {
313 __kmp_printf(
"(%d)", qs);
314 __kmp_dump_thunk( tq, queue->tq_queue[qs].qs_thunk, global_tid );
315 qs = (qs+1) % queue->tq_nslots;
321 if (queue->tq_taskq_slot != NULL) {
322 __kmp_printf(
" TaskQ slot:\n");
323 __kmp_dump_thunk( tq, (kmpc_thunk_t *) queue->tq_taskq_slot, global_tid );
331 __kmp_printf(
" Taskq freelist: ");
338 for( taskq = tq->tq_freelist; taskq != NULL; taskq = taskq->tq.tq_next_free )
339 __kmp_printf(
"%p ", taskq);
343 __kmp_printf(
"\n\n");
347 __kmp_aux_dump_task_queue_tree( kmp_taskq_t *tq, kmpc_task_queue_t *curr_queue, kmp_int32 level, kmp_int32 global_tid )
350 int nproc = __kmp_threads[global_tid] -> th.th_team -> t.t_nproc;
351 kmpc_task_queue_t *queue = curr_queue;
353 if (curr_queue == NULL)
358 for (i=0; i<level; i++)
361 __kmp_printf(
"%p", curr_queue);
363 for (i = 0; i < nproc; i++) {
364 if( tq->tq_curr_thunk[i] && tq->tq_curr_thunk[i]->th.th_shareds->sv_queue == curr_queue ) {
365 __kmp_printf(
" [%i]", i);
376 qs = curr_queue->tq_tail;
378 for ( count = 0; count < curr_queue->tq_nfull; ++count ) {
379 __kmp_printf(
"%p ", curr_queue->tq_queue[qs].qs_thunk);
380 qs = (qs+1) % curr_queue->tq_nslots;
387 if (curr_queue->tq_first_child) {
393 if (curr_queue->tq_first_child) {
394 for(queue = (kmpc_task_queue_t *)curr_queue->tq_first_child;
396 queue = queue->tq_next_child) {
397 __kmp_aux_dump_task_queue_tree( tq, queue, level+1, global_tid );
406 __kmp_dump_task_queue_tree( kmp_taskq_t *tq, kmpc_task_queue_t *tqroot, kmp_int32 global_tid)
408 __kmp_printf(
"TaskQ Tree at root %p on (%d):\n", tqroot, global_tid);
410 __kmp_aux_dump_task_queue_tree( tq, tqroot, 0, global_tid );
424 __kmp_taskq_allocate(
size_t size, kmp_int32 global_tid)
426 void *addr, *orig_addr;
429 KB_TRACE( 5, (
"__kmp_taskq_allocate: called size=%d, gtid=%d\n", (
int) size, global_tid ) );
431 bytes =
sizeof(
void *) + CACHE_LINE + size;
433 #ifdef THREAD_ALLOC_FOR_TASKQ
434 orig_addr = (
void *) __kmp_thread_malloc( __kmp_thread_from_gtid(global_tid), bytes );
436 KE_TRACE( 10, (
"%%%%%% MALLOC( %d )\n", bytes ) );
437 orig_addr = (
void *) KMP_INTERNAL_MALLOC( bytes );
441 KMP_FATAL( OutOfHeapMemory );
445 if (((kmp_uintptr_t) addr & ( CACHE_LINE - 1 )) != 0) {
446 KB_TRACE( 50, (
"__kmp_taskq_allocate: adjust for cache alignment\n" ) );
447 addr = (
void *) (((kmp_uintptr_t) addr + CACHE_LINE) & ~( CACHE_LINE - 1 ));
450 (* (
void **) addr) = orig_addr;
452 KB_TRACE( 10, (
"__kmp_taskq_allocate: allocate: %p, use: %p - %p, size: %d, gtid: %d\n",
453 orig_addr, ((
void **) addr) + 1, ((
char *)(((
void **) addr) + 1)) + size-1,
454 (
int) size, global_tid ));
456 return ( ((
void **) addr) + 1 );
460 __kmpc_taskq_free(
void *p, kmp_int32 global_tid)
462 KB_TRACE( 5, (
"__kmpc_taskq_free: called addr=%p, gtid=%d\n", p, global_tid ) );
464 KB_TRACE(10, (
"__kmpc_taskq_free: freeing: %p, gtid: %d\n", (*( ((
void **) p)-1)), global_tid ));
466 #ifdef THREAD_ALLOC_FOR_TASKQ
467 __kmp_thread_free( __kmp_thread_from_gtid(global_tid), *( ((
void **) p)-1) );
469 KMP_INTERNAL_FREE( *( ((
void **) p)-1) );
480 static kmpc_task_queue_t *
481 __kmp_alloc_taskq ( kmp_taskq_t *tq,
int in_parallel, kmp_int32 nslots, kmp_int32 nthunks,
482 kmp_int32 nshareds, kmp_int32 nproc,
size_t sizeof_thunk,
483 size_t sizeof_shareds, kmpc_thunk_t **new_taskq_thunk, kmp_int32 global_tid )
487 kmpc_task_queue_t *new_queue;
488 kmpc_aligned_shared_vars_t *shared_var_array;
489 char *shared_var_storage;
492 __kmp_acquire_lock( & tq->tq_freelist_lck, global_tid );
497 if( tq->tq_freelist ) {
498 new_queue = tq -> tq_freelist;
499 tq -> tq_freelist = tq -> tq_freelist -> tq.tq_next_free;
501 KMP_DEBUG_ASSERT(new_queue->tq_flags & TQF_DEALLOCATED);
503 new_queue->tq_flags = 0;
505 __kmp_release_lock( & tq->tq_freelist_lck, global_tid );
508 __kmp_release_lock( & tq->tq_freelist_lck, global_tid );
510 new_queue = (kmpc_task_queue_t *) __kmp_taskq_allocate (
sizeof (kmpc_task_queue_t), global_tid);
511 new_queue->tq_flags = 0;
517 sizeof_thunk += (CACHE_LINE - (sizeof_thunk % CACHE_LINE));
518 pt = (
char *) __kmp_taskq_allocate (nthunks * sizeof_thunk, global_tid);
519 new_queue->tq_thunk_space = (kmpc_thunk_t *)pt;
520 *new_taskq_thunk = (kmpc_thunk_t *)(pt + (nthunks - 1) * sizeof_thunk);
524 new_queue->tq_free_thunks = (kmpc_thunk_t *)pt;
526 for (i = 0; i < (nthunks - 2); i++) {
527 ((kmpc_thunk_t *)(pt+i*sizeof_thunk))->th.th_next_free = (kmpc_thunk_t *)(pt + (i+1)*sizeof_thunk);
529 ((kmpc_thunk_t *)(pt+i*sizeof_thunk))->th_flags = TQF_DEALLOCATED;
533 ((kmpc_thunk_t *)(pt+(nthunks-2)*sizeof_thunk))->th.th_next_free = NULL;
535 ((kmpc_thunk_t *)(pt+(nthunks-2)*sizeof_thunk))->th_flags = TQF_DEALLOCATED;
541 __kmp_init_lock( & new_queue->tq_link_lck );
542 __kmp_init_lock( & new_queue->tq_free_thunks_lck );
543 __kmp_init_lock( & new_queue->tq_queue_lck );
548 bytes = nslots *
sizeof (kmpc_aligned_queue_slot_t);
549 new_queue->tq_queue = (kmpc_aligned_queue_slot_t *) __kmp_taskq_allocate( bytes, global_tid );
552 sizeof_shareds +=
sizeof(kmpc_task_queue_t *);
553 sizeof_shareds += (CACHE_LINE - (sizeof_shareds % CACHE_LINE));
555 bytes = nshareds *
sizeof (kmpc_aligned_shared_vars_t);
556 shared_var_array = (kmpc_aligned_shared_vars_t *) __kmp_taskq_allocate ( bytes, global_tid);
558 bytes = nshareds * sizeof_shareds;
559 shared_var_storage = (
char *) __kmp_taskq_allocate ( bytes, global_tid);
561 for (i=0; i<nshareds; i++) {
562 shared_var_array[i].ai_data = (kmpc_shared_vars_t *) (shared_var_storage + i*sizeof_shareds);
563 shared_var_array[i].ai_data->sv_queue = new_queue;
565 new_queue->tq_shareds = shared_var_array;
571 bytes = nproc *
sizeof(kmpc_aligned_int32_t);
572 new_queue->tq_th_thunks = (kmpc_aligned_int32_t *) __kmp_taskq_allocate ( bytes, global_tid);
573 new_queue->tq_nproc = nproc;
575 for (i=0; i<nproc; i++)
576 new_queue->tq_th_thunks[i].ai_data = 0;
583 __kmp_free_taskq (kmp_taskq_t *tq, kmpc_task_queue_t *p,
int in_parallel, kmp_int32 global_tid)
585 __kmpc_taskq_free(p->tq_thunk_space, global_tid);
586 __kmpc_taskq_free(p->tq_queue, global_tid);
589 __kmpc_taskq_free((
void *) p->tq_shareds[0].ai_data, global_tid);
592 __kmpc_taskq_free(p->tq_shareds, global_tid);
595 p->tq_first_child = NULL;
596 p->tq_next_child = NULL;
597 p->tq_prev_child = NULL;
598 p->tq_ref_count = -10;
599 p->tq_shareds = NULL;
600 p->tq_tasknum_queuing = 0;
601 p->tq_tasknum_serving = 0;
603 p->tq_thunk_space = NULL;
604 p->tq_taskq_slot = NULL;
605 p->tq_free_thunks = NULL;
615 for (i=0; i<p->tq_nproc; i++)
616 p->tq_th_thunks[i].ai_data = 0;
618 if ( __kmp_env_consistency_check )
620 KMP_DEBUG_ASSERT( p->tq_flags & TQF_DEALLOCATED );
621 p->tq_flags = TQF_DEALLOCATED;
625 __kmpc_taskq_free(p->tq_th_thunks, global_tid);
626 __kmp_destroy_lock(& p->tq_link_lck);
627 __kmp_destroy_lock(& p->tq_queue_lck);
628 __kmp_destroy_lock(& p->tq_free_thunks_lck);
631 p->tq_th_thunks = NULL;
637 __kmp_acquire_lock( & tq->tq_freelist_lck, global_tid );
638 p->tq.tq_next_free = tq->tq_freelist;
641 __kmp_release_lock( & tq->tq_freelist_lck, global_tid );
650 static kmpc_thunk_t *
651 __kmp_alloc_thunk (kmpc_task_queue_t *queue,
int in_parallel, kmp_int32 global_tid)
656 __kmp_acquire_lock(& queue->tq_free_thunks_lck, global_tid);
662 fl = queue->tq_free_thunks;
664 KMP_DEBUG_ASSERT (fl != NULL);
666 queue->tq_free_thunks = fl->th.th_next_free;
670 __kmp_release_lock(& queue->tq_free_thunks_lck, global_tid);
676 __kmp_free_thunk (kmpc_task_queue_t *queue, kmpc_thunk_t *p,
int in_parallel, kmp_int32 global_tid)
680 p->th_encl_thunk = 0;
687 __kmp_acquire_lock(& queue->tq_free_thunks_lck, global_tid);
693 p->th.th_next_free = queue->tq_free_thunks;
694 queue->tq_free_thunks = p;
697 p->th_flags = TQF_DEALLOCATED;
701 __kmp_release_lock(& queue->tq_free_thunks_lck, global_tid);
709 __kmp_enqueue_task ( kmp_taskq_t *tq, kmp_int32 global_tid, kmpc_task_queue_t *queue, kmpc_thunk_t *thunk,
int in_parallel )
715 __kmp_acquire_lock(& queue->tq_queue_lck, global_tid);
721 KMP_DEBUG_ASSERT (queue->tq_nfull < queue->tq_nslots);
723 queue->tq_queue[(queue->tq_head)++].qs_thunk = thunk;
725 if (queue->tq_head >= queue->tq_nslots)
732 ret = (in_parallel) ? (queue->tq_nfull == queue->tq_nslots) : FALSE;
736 __kmp_release_lock(& queue->tq_queue_lck, global_tid);
738 if( tq->tq_global_flags & TQF_RELEASE_WORKERS ) {
745 tq->tq_global_flags &= ~TQF_RELEASE_WORKERS;
756 static kmpc_thunk_t *
757 __kmp_dequeue_task (kmp_int32 global_tid, kmpc_task_queue_t *queue,
int in_parallel)
760 int tid = __kmp_tid_from_gtid( global_tid );
762 KMP_DEBUG_ASSERT (queue->tq_nfull > 0);
764 if (queue->tq.tq_parent != NULL && in_parallel) {
766 __kmp_acquire_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
767 ct = ++(queue->tq_ref_count);
768 __kmp_release_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
769 KMP_DEBUG_REF_CTS((
"line %d gtid %d: Q %p inc %d\n",
770 __LINE__, global_tid, queue, ct));
773 pt = queue->tq_queue[(queue->tq_tail)++].qs_thunk;
775 if (queue->tq_tail >= queue->tq_nslots)
779 queue->tq_th_thunks[tid].ai_data++;
783 KF_TRACE(200, (
"__kmp_dequeue_task: T#%d(:%d) now has %d outstanding thunks from queue %p\n",
784 global_tid, tid, queue->tq_th_thunks[tid].ai_data, queue));
794 KMP_DEBUG_ASSERT(queue->tq_nfull >= 0);
797 KMP_DEBUG_ASSERT(queue->tq_th_thunks[tid].ai_data <= __KMP_TASKQ_THUNKS_PER_TH);
828 static kmpc_thunk_t *
829 __kmp_find_task_in_queue (kmp_int32 global_tid, kmpc_task_queue_t *queue)
831 kmpc_thunk_t *pt = NULL;
832 int tid = __kmp_tid_from_gtid( global_tid );
835 if ( !(queue->tq_flags & TQF_DEALLOCATED) ) {
837 __kmp_acquire_lock(& queue->tq_queue_lck, global_tid);
840 if ( !(queue->tq_flags & TQF_DEALLOCATED) ) {
845 if ((queue->tq_taskq_slot != NULL) && (queue->tq_nfull <= queue->tq_hiwat)) {
848 pt = (kmpc_thunk_t *) queue->tq_taskq_slot;
849 queue->tq_taskq_slot = NULL;
851 else if (queue->tq_nfull == 0 ||
852 queue->tq_th_thunks[tid].ai_data >= __KMP_TASKQ_THUNKS_PER_TH) {
858 else if (queue->tq_nfull > 1) {
861 pt = __kmp_dequeue_task (global_tid, queue, TRUE);
863 else if (!(queue->tq_flags & TQF_IS_LASTPRIVATE)) {
866 pt = __kmp_dequeue_task (global_tid, queue, TRUE);
868 else if (queue->tq_flags & TQF_IS_LAST_TASK) {
873 pt = __kmp_dequeue_task (global_tid, queue, TRUE);
874 pt->th_flags |= TQF_IS_LAST_TASK;
879 __kmp_release_lock(& queue->tq_queue_lck, global_tid);
891 static kmpc_thunk_t *
892 __kmp_find_task_in_descendant_queue (kmp_int32 global_tid, kmpc_task_queue_t *curr_queue)
894 kmpc_thunk_t *pt = NULL;
895 kmpc_task_queue_t *queue = curr_queue;
897 if (curr_queue->tq_first_child != NULL) {
898 __kmp_acquire_lock(& curr_queue->tq_link_lck, global_tid);
903 queue = (kmpc_task_queue_t *) curr_queue->tq_first_child;
905 __kmp_release_lock(& curr_queue->tq_link_lck, global_tid);
909 while (queue != NULL) {
911 kmpc_task_queue_t *next;
913 ct= ++(queue->tq_ref_count);
914 __kmp_release_lock(& curr_queue->tq_link_lck, global_tid);
915 KMP_DEBUG_REF_CTS((
"line %d gtid %d: Q %p inc %d\n",
916 __LINE__, global_tid, queue, ct));
918 pt = __kmp_find_task_in_queue (global_tid, queue);
923 __kmp_acquire_lock(& curr_queue->tq_link_lck, global_tid);
928 ct = --(queue->tq_ref_count);
929 KMP_DEBUG_REF_CTS((
"line %d gtid %d: Q %p dec %d\n",
930 __LINE__, global_tid, queue, ct));
931 KMP_DEBUG_ASSERT( queue->tq_ref_count >= 0 );
933 __kmp_release_lock(& curr_queue->tq_link_lck, global_tid);
941 pt = __kmp_find_task_in_descendant_queue (global_tid, queue);
946 __kmp_acquire_lock(& curr_queue->tq_link_lck, global_tid);
951 ct = --(queue->tq_ref_count);
952 KMP_DEBUG_REF_CTS((
"line %d gtid %d: Q %p dec %d\n",
953 __LINE__, global_tid, queue, ct));
954 KMP_DEBUG_ASSERT( ct >= 0 );
956 __kmp_release_lock(& curr_queue->tq_link_lck, global_tid);
961 __kmp_acquire_lock(& curr_queue->tq_link_lck, global_tid);
966 next = queue->tq_next_child;
968 ct = --(queue->tq_ref_count);
969 KMP_DEBUG_REF_CTS((
"line %d gtid %d: Q %p dec %d\n",
970 __LINE__, global_tid, queue, ct));
971 KMP_DEBUG_ASSERT( ct >= 0 );
976 __kmp_release_lock(& curr_queue->tq_link_lck, global_tid);
988 static kmpc_thunk_t *
989 __kmp_find_task_in_ancestor_queue (kmp_taskq_t *tq, kmp_int32 global_tid, kmpc_task_queue_t *curr_queue)
991 kmpc_task_queue_t *queue;
996 if (curr_queue->tq.tq_parent != NULL) {
997 queue = curr_queue->tq.tq_parent;
999 while (queue != NULL) {
1000 if (queue->tq.tq_parent != NULL) {
1002 __kmp_acquire_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
1007 ct = ++(queue->tq_ref_count);
1008 __kmp_release_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
1009 KMP_DEBUG_REF_CTS((
"line %d gtid %d: Q %p inc %d\n",
1010 __LINE__, global_tid, queue, ct));
1013 pt = __kmp_find_task_in_queue (global_tid, queue);
1015 if (queue->tq.tq_parent != NULL) {
1017 __kmp_acquire_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
1022 ct = --(queue->tq_ref_count);
1023 KMP_DEBUG_REF_CTS((
"line %d gtid %d: Q %p dec %d\n",
1024 __LINE__, global_tid, queue, ct));
1025 KMP_DEBUG_ASSERT( ct >= 0 );
1027 __kmp_release_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
1033 if (queue->tq.tq_parent != NULL) {
1035 __kmp_acquire_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
1040 ct = --(queue->tq_ref_count);
1041 KMP_DEBUG_REF_CTS((
"line %d gtid %d: Q %p dec %d\n",
1042 __LINE__, global_tid, queue, ct));
1043 KMP_DEBUG_ASSERT( ct >= 0 );
1045 queue = queue->tq.tq_parent;
1048 __kmp_release_lock(& queue->tq_link_lck, global_tid);
1053 pt = __kmp_find_task_in_descendant_queue( global_tid, tq->tq_root );
1059 __kmp_taskq_tasks_finished (kmpc_task_queue_t *queue)
1065 for (i=0; i<queue->tq_nproc; i++) {
1066 if (queue->tq_th_thunks[i].ai_data != 0)
1074 __kmp_taskq_has_any_children (kmpc_task_queue_t *queue)
1076 return (queue->tq_first_child != NULL);
1080 __kmp_remove_queue_from_tree( kmp_taskq_t *tq, kmp_int32 global_tid, kmpc_task_queue_t *queue,
int in_parallel )
1084 kmpc_thunk_t *thunk;
1087 KF_TRACE(50, (
"Before Deletion of TaskQ at %p on (%d):\n", queue, global_tid));
1088 KF_DUMP(50, __kmp_dump_task_queue( tq, queue, global_tid ));
1091 KMP_DEBUG_ASSERT (queue->tq.tq_parent != NULL);
1094 __kmp_acquire_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
1100 KMP_DEBUG_ASSERT (queue->tq_first_child == NULL);
1103 if (queue->tq_prev_child != NULL)
1104 queue->tq_prev_child->tq_next_child = queue->tq_next_child;
1105 if (queue->tq_next_child != NULL)
1106 queue->tq_next_child->tq_prev_child = queue->tq_prev_child;
1107 if (queue->tq.tq_parent->tq_first_child == queue)
1108 queue->tq.tq_parent->tq_first_child = queue->tq_next_child;
1110 queue->tq_prev_child = NULL;
1111 queue->tq_next_child = NULL;
1116 KMP_DEBUG_REF_CTS((
"line %d gtid %d: Q %p waiting for ref_count of %d to reach 1\n",
1117 __LINE__, global_tid, queue, queue->tq_ref_count));
1120 while (queue->tq_ref_count > 1) {
1121 __kmp_release_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
1123 KMP_WAIT_YIELD((
volatile kmp_uint32*)&queue->tq_ref_count, 1, KMP_LE, NULL);
1125 __kmp_acquire_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
1131 __kmp_release_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
1134 KMP_DEBUG_REF_CTS((
"line %d gtid %d: Q %p freeing queue\n",
1135 __LINE__, global_tid, queue));
1138 KMP_DEBUG_ASSERT(queue->tq_flags & TQF_ALL_TASKS_QUEUED);
1139 KMP_DEBUG_ASSERT(queue->tq_nfull == 0);
1141 for (i=0; i<queue->tq_nproc; i++) {
1142 KMP_DEBUG_ASSERT(queue->tq_th_thunks[i].ai_data == 0);
1146 for (thunk=queue->tq_free_thunks; thunk != NULL; thunk=thunk->th.th_next_free)
1149 KMP_ASSERT (i == queue->tq_nslots + (queue->tq_nproc * __KMP_TASKQ_THUNKS_PER_TH));
1153 __kmp_free_taskq ( tq, queue, TRUE, global_tid );
1155 KF_TRACE(50, (
"After Deletion of TaskQ at %p on (%d):\n", queue, global_tid));
1156 KF_DUMP(50, __kmp_dump_task_queue_tree( tq, tq->tq_root, global_tid ));
1167 __kmp_find_and_remove_finished_child_taskq( kmp_taskq_t *tq, kmp_int32 global_tid, kmpc_task_queue_t *curr_queue )
1169 kmpc_task_queue_t *queue = curr_queue;
1171 if (curr_queue->tq_first_child != NULL) {
1172 __kmp_acquire_lock(& curr_queue->tq_link_lck, global_tid);
1177 queue = (kmpc_task_queue_t *) curr_queue->tq_first_child;
1178 if (queue != NULL) {
1179 __kmp_release_lock(& curr_queue->tq_link_lck, global_tid);
1183 while (queue != NULL) {
1184 kmpc_task_queue_t *next;
1185 int ct = ++(queue->tq_ref_count);
1186 KMP_DEBUG_REF_CTS((
"line %d gtid %d: Q %p inc %d\n",
1187 __LINE__, global_tid, queue, ct));
1194 if (queue->tq_flags & TQF_IS_NOWAIT) {
1195 __kmp_find_and_remove_finished_child_taskq ( tq, global_tid, queue );
1197 if ((queue->tq_flags & TQF_ALL_TASKS_QUEUED) && (queue->tq_nfull == 0) &&
1198 __kmp_taskq_tasks_finished(queue) && ! __kmp_taskq_has_any_children(queue)) {
1205 if ( __kmp_test_lock(& queue->tq_queue_lck, global_tid) ) {
1206 if ( !(queue->tq_flags & TQF_DEALLOCATED) ) {
1207 queue->tq_flags |= TQF_DEALLOCATED;
1208 __kmp_release_lock(& queue->tq_queue_lck, global_tid);
1210 __kmp_remove_queue_from_tree( tq, global_tid, queue, TRUE );
1216 __kmp_release_lock(& queue->tq_queue_lck, global_tid);
1223 __kmp_acquire_lock(& curr_queue->tq_link_lck, global_tid);
1228 next = queue->tq_next_child;
1230 ct = --(queue->tq_ref_count);
1231 KMP_DEBUG_REF_CTS((
"line %d gtid %d: Q %p dec %d\n",
1232 __LINE__, global_tid, queue, ct));
1233 KMP_DEBUG_ASSERT( ct >= 0 );
1238 __kmp_release_lock(& curr_queue->tq_link_lck, global_tid);
1249 __kmp_remove_all_child_taskq( kmp_taskq_t *tq, kmp_int32 global_tid, kmpc_task_queue_t *queue )
1251 kmpc_task_queue_t *next_child;
1253 queue = (kmpc_task_queue_t *) queue->tq_first_child;
1255 while (queue != NULL) {
1256 __kmp_remove_all_child_taskq ( tq, global_tid, queue );
1258 next_child = queue->tq_next_child;
1259 queue->tq_flags |= TQF_DEALLOCATED;
1260 __kmp_remove_queue_from_tree ( tq, global_tid, queue, FALSE );
1266 __kmp_execute_task_from_queue( kmp_taskq_t *tq,
ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *thunk,
int in_parallel )
1268 kmpc_task_queue_t *queue = thunk->th.th_shareds->sv_queue;
1269 kmp_int32 tid = __kmp_tid_from_gtid( global_tid );
1271 KF_TRACE(100, (
"After dequeueing this Task on (%d):\n", global_tid));
1272 KF_DUMP(100, __kmp_dump_thunk( tq, thunk, global_tid ));
1273 KF_TRACE(100, (
"Task Queue: %p looks like this (%d):\n", queue, global_tid));
1274 KF_DUMP(100, __kmp_dump_task_queue( tq, queue, global_tid ));
1290 if (!(thunk->th_flags & TQF_TASKQ_TASK)) {
1291 kmp_int32 index = (queue == tq->tq_root) ? tid : 0;
1292 thunk->th.th_shareds = (kmpc_shared_vars_t *) queue->tq_shareds[index].ai_data;
1294 if ( __kmp_env_consistency_check ) {
1295 __kmp_push_workshare( global_tid,
1296 (queue->tq_flags & TQF_IS_ORDERED) ? ct_task_ordered : ct_task,
1301 if ( __kmp_env_consistency_check )
1302 __kmp_push_workshare( global_tid, ct_taskq, queue->tq_loc );
1306 thunk->th_encl_thunk = tq->tq_curr_thunk[tid];
1307 tq->tq_curr_thunk[tid] = thunk;
1309 KF_DUMP( 200, __kmp_dump_thunk_stack( tq->tq_curr_thunk[tid], global_tid ));
1312 KF_TRACE( 50, (
"Begin Executing Thunk %p from queue %p on (%d)\n", thunk, queue, global_tid));
1313 thunk->th_task (global_tid, thunk);
1314 KF_TRACE( 50, (
"End Executing Thunk %p from queue %p on (%d)\n", thunk, queue, global_tid));
1316 if (!(thunk->th_flags & TQF_TASKQ_TASK)) {
1317 if ( __kmp_env_consistency_check )
1318 __kmp_pop_workshare( global_tid, (queue->tq_flags & TQF_IS_ORDERED) ? ct_task_ordered : ct_task,
1322 tq->tq_curr_thunk[tid] = thunk->th_encl_thunk;
1323 thunk->th_encl_thunk = NULL;
1324 KF_DUMP( 200, __kmp_dump_thunk_stack( tq->tq_curr_thunk[tid], global_tid ));
1327 if ((thunk->th_flags & TQF_IS_ORDERED) && in_parallel) {
1328 __kmp_taskq_check_ordered(global_tid, thunk);
1331 __kmp_free_thunk (queue, thunk, in_parallel, global_tid);
1333 KF_TRACE(100, (
"T#%d After freeing thunk: %p, TaskQ looks like this:\n", global_tid, thunk));
1334 KF_DUMP(100, __kmp_dump_task_queue( tq, queue, global_tid ));
1339 KMP_DEBUG_ASSERT(queue->tq_th_thunks[tid].ai_data >= 1);
1341 KF_TRACE( 200, (
"__kmp_execute_task_from_queue: T#%d has %d thunks in queue %p\n",
1342 global_tid, queue->tq_th_thunks[tid].ai_data-1, queue));
1344 queue->tq_th_thunks[tid].ai_data--;
1349 if (queue->tq.tq_parent != NULL && in_parallel) {
1351 __kmp_acquire_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
1352 ct = --(queue->tq_ref_count);
1353 __kmp_release_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
1354 KMP_DEBUG_REF_CTS((
"line %d gtid %d: Q %p dec %d\n",
1355 __LINE__, global_tid, queue, ct));
1356 KMP_DEBUG_ASSERT( ct >= 0 );
1367 __kmpc_taskq(
ident_t *loc, kmp_int32 global_tid, kmpc_task_t taskq_task,
1368 size_t sizeof_thunk,
size_t sizeof_shareds,
1369 kmp_int32 flags, kmpc_shared_vars_t **shareds )
1372 kmp_int32 nslots, nthunks, nshareds, nproc;
1373 kmpc_task_queue_t *new_queue, *curr_queue;
1374 kmpc_thunk_t *new_taskq_thunk;
1380 KE_TRACE( 10, (
"__kmpc_taskq called (%d)\n", global_tid));
1382 th = __kmp_threads[ global_tid ];
1383 team = th -> th.th_team;
1384 tq = & team -> t.t_taskq;
1385 nproc = team -> t.t_nproc;
1386 tid = __kmp_tid_from_gtid( global_tid );
1389 in_parallel = in_parallel_context( team );
1391 if( ! tq->tq_root ) {
1394 th->th.th_dispatch->th_deo_fcn = __kmp_taskq_eo;
1397 th->th.th_dispatch->th_dxo_fcn = __kmp_taskq_xo;
1403 if ( __kmp_barrier( bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL )) {
1410 *shareds = (kmpc_shared_vars_t *) tq->tq_root->tq_shareds[tid].ai_data;
1412 KE_TRACE( 10, (
"__kmpc_taskq return (%d)\n", global_tid));
1420 if( tq->tq_curr_thunk_capacity < nproc ) {
1423 if(tq->tq_curr_thunk)
1424 __kmp_free(tq->tq_curr_thunk);
1427 __kmp_init_lock( & tq->tq_freelist_lck );
1430 tq->tq_curr_thunk = (kmpc_thunk_t **) __kmp_allocate( nproc *
sizeof(kmpc_thunk_t *) );
1431 tq -> tq_curr_thunk_capacity = nproc;
1435 tq->tq_global_flags = TQF_RELEASE_WORKERS;
1441 nslots = (in_parallel) ? (2 * nproc) : 1;
1446 nthunks = (in_parallel) ? (nslots + (nproc * __KMP_TASKQ_THUNKS_PER_TH) + 1) : nslots + 2;
1451 nshareds = ( !tq->tq_root && in_parallel) ? nproc : 1;
1455 new_queue = __kmp_alloc_taskq ( tq, in_parallel, nslots, nthunks, nshareds, nproc,
1456 sizeof_thunk, sizeof_shareds, &new_taskq_thunk, global_tid );
1460 new_queue->tq_flags = flags & TQF_INTERFACE_FLAGS;
1463 new_queue->tq_tasknum_queuing = 0;
1464 new_queue->tq_tasknum_serving = 0;
1465 new_queue->tq_flags |= TQF_PARALLEL_CONTEXT;
1468 new_queue->tq_taskq_slot = NULL;
1469 new_queue->tq_nslots = nslots;
1470 new_queue->tq_hiwat = HIGH_WATER_MARK (nslots);
1471 new_queue->tq_nfull = 0;
1472 new_queue->tq_head = 0;
1473 new_queue->tq_tail = 0;
1474 new_queue->tq_loc = loc;
1476 if ((new_queue->tq_flags & TQF_IS_ORDERED) && in_parallel) {
1478 new_queue->tq_tasknum_serving = 1;
1481 th->th.th_dispatch->th_deo_fcn = __kmp_taskq_eo;
1484 th->th.th_dispatch->th_dxo_fcn = __kmp_taskq_xo;
1488 *shareds = (kmpc_shared_vars_t *) new_queue->tq_shareds[0].ai_data;
1490 new_taskq_thunk->th.th_shareds = *shareds;
1491 new_taskq_thunk->th_task = taskq_task;
1492 new_taskq_thunk->th_flags = new_queue->tq_flags | TQF_TASKQ_TASK;
1493 new_taskq_thunk->th_status = 0;
1495 KMP_DEBUG_ASSERT (new_taskq_thunk->th_flags & TQF_TASKQ_TASK);
1502 if( ! tq->tq_root ) {
1503 new_queue->tq.tq_parent = NULL;
1504 new_queue->tq_first_child = NULL;
1505 new_queue->tq_next_child = NULL;
1506 new_queue->tq_prev_child = NULL;
1507 new_queue->tq_ref_count = 1;
1508 tq->tq_root = new_queue;
1511 curr_queue = tq->tq_curr_thunk[tid]->th.th_shareds->sv_queue;
1512 new_queue->tq.tq_parent = curr_queue;
1513 new_queue->tq_first_child = NULL;
1514 new_queue->tq_prev_child = NULL;
1515 new_queue->tq_ref_count = 1;
1517 KMP_DEBUG_REF_CTS((
"line %d gtid %d: Q %p alloc %d\n",
1518 __LINE__, global_tid, new_queue, new_queue->tq_ref_count));
1520 __kmp_acquire_lock(& curr_queue->tq_link_lck, global_tid);
1525 new_queue->tq_next_child = (
struct kmpc_task_queue_t *) curr_queue->tq_first_child;
1527 if (curr_queue->tq_first_child != NULL)
1528 curr_queue->tq_first_child->tq_prev_child = new_queue;
1530 curr_queue->tq_first_child = new_queue;
1532 __kmp_release_lock(& curr_queue->tq_link_lck, global_tid);
1536 new_taskq_thunk->th_encl_thunk = tq->tq_curr_thunk[tid];
1537 tq->tq_curr_thunk[tid] = new_taskq_thunk;
1539 KF_DUMP( 200, __kmp_dump_thunk_stack( tq->tq_curr_thunk[tid], global_tid ));
1542 new_taskq_thunk->th_encl_thunk = 0;
1543 new_queue->tq.tq_parent = NULL;
1544 new_queue->tq_first_child = NULL;
1545 new_queue->tq_next_child = NULL;
1546 new_queue->tq_prev_child = NULL;
1547 new_queue->tq_ref_count = 1;
1551 KF_TRACE(150, (
"Creating TaskQ Task on (%d):\n", global_tid));
1552 KF_DUMP(150, __kmp_dump_thunk( tq, new_taskq_thunk, global_tid ));
1555 KF_TRACE(25, (
"After TaskQ at %p Creation on (%d):\n", new_queue, global_tid));
1557 KF_TRACE(25, (
"After Serial TaskQ at %p Creation on (%d):\n", new_queue, global_tid));
1560 KF_DUMP(25, __kmp_dump_task_queue( tq, new_queue, global_tid ));
1563 KF_DUMP(50, __kmp_dump_task_queue_tree( tq, tq->tq_root, global_tid ));
1567 if ( __kmp_env_consistency_check )
1568 __kmp_push_workshare( global_tid, ct_taskq, new_queue->tq_loc );
1570 KE_TRACE( 10, (
"__kmpc_taskq return (%d)\n", global_tid));
1572 return new_taskq_thunk;
1579 __kmpc_end_taskq(
ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *taskq_thunk)
1587 kmp_int32 is_outermost;
1588 kmpc_task_queue_t *queue;
1589 kmpc_thunk_t *thunk;
1592 KE_TRACE( 10, (
"__kmpc_end_taskq called (%d)\n", global_tid));
1594 tq = & __kmp_threads[global_tid] -> th.th_team -> t.t_taskq;
1595 nproc = __kmp_threads[global_tid] -> th.th_team -> t.t_nproc;
1598 queue = (taskq_thunk == NULL) ? tq->tq_root : taskq_thunk->th.th_shareds->sv_queue;
1600 KE_TRACE( 50, (
"__kmpc_end_taskq queue=%p (%d) \n", queue, global_tid));
1601 is_outermost = (queue == tq->tq_root);
1602 in_parallel = (queue->tq_flags & TQF_PARALLEL_CONTEXT);
1610 if (is_outermost && (KMP_MASTER_GTID( global_tid ))) {
1611 if( tq->tq_global_flags & TQF_RELEASE_WORKERS ) {
1613 tq->tq_global_flags &= ~TQF_RELEASE_WORKERS;
1615 __kmp_end_split_barrier( bs_plain_barrier, global_tid );
1623 KMP_INIT_YIELD(spins);
1625 while ( (queue->tq_nfull == 0)
1626 && (queue->tq_taskq_slot == NULL)
1627 && (! __kmp_taskq_has_any_children(queue) )
1628 && (! (queue->tq_flags & TQF_ALL_TASKS_QUEUED) )
1630 __kmp_static_delay( 1 );
1631 KMP_YIELD_WHEN( TRUE, spins );
1635 while ( ( (queue->tq_nfull != 0) || (queue->tq_taskq_slot != NULL) )
1636 && (thunk = __kmp_find_task_in_queue(global_tid, queue)) != NULL
1638 KF_TRACE(50, (
"Found thunk: %p in primary queue %p (%d)\n", thunk, queue, global_tid));
1639 __kmp_execute_task_from_queue( tq, loc, global_tid, thunk, in_parallel );
1643 if ( (__kmp_taskq_has_any_children(queue))
1644 && (thunk = __kmp_find_task_in_descendant_queue(global_tid, queue)) != NULL
1647 KF_TRACE(50, (
"Stole thunk: %p in descendant queue: %p while waiting in queue: %p (%d)\n",
1648 thunk, thunk->th.th_shareds->sv_queue, queue, global_tid ));
1650 __kmp_execute_task_from_queue( tq, loc, global_tid, thunk, in_parallel );
1653 }
while ( (! (queue->tq_flags & TQF_ALL_TASKS_QUEUED))
1654 || (queue->tq_nfull != 0)
1657 KF_TRACE(50, (
"All tasks queued and dequeued in queue: %p (%d)\n", queue, global_tid));
1662 while ( (!__kmp_taskq_tasks_finished(queue))
1663 && (thunk = __kmp_find_task_in_descendant_queue(global_tid, queue)) != NULL
1666 KF_TRACE(50, (
"Stole thunk: %p in descendant queue: %p while waiting in queue: %p (%d)\n",
1667 thunk, thunk->th.th_shareds->sv_queue, queue, global_tid));
1669 __kmp_execute_task_from_queue( tq, loc, global_tid, thunk, in_parallel );
1672 KF_TRACE(50, (
"No work found in descendent queues or all work finished in queue: %p (%d)\n", queue, global_tid));
1674 if (!is_outermost) {
1677 if (queue->tq_flags & TQF_IS_NOWAIT) {
1678 __kmp_acquire_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
1679 queue->tq_ref_count--;
1680 KMP_DEBUG_ASSERT( queue->tq_ref_count >= 0 );
1681 __kmp_release_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
1683 KE_TRACE( 10, (
"__kmpc_end_taskq return for nowait case (%d)\n", global_tid));
1688 __kmp_find_and_remove_finished_child_taskq( tq, global_tid, queue );
1691 KMP_INIT_YIELD(spins);
1693 while (!__kmp_taskq_tasks_finished(queue) || __kmp_taskq_has_any_children(queue)) {
1694 thunk = __kmp_find_task_in_ancestor_queue( tq, global_tid, queue );
1696 if (thunk != NULL) {
1697 KF_TRACE(50, (
"Stole thunk: %p in ancestor queue: %p while waiting in queue: %p (%d)\n",
1698 thunk, thunk->th.th_shareds->sv_queue, queue, global_tid));
1699 __kmp_execute_task_from_queue( tq, loc, global_tid, thunk, in_parallel );
1702 KMP_YIELD_WHEN( thunk == NULL, spins );
1704 __kmp_find_and_remove_finished_child_taskq( tq, global_tid, queue );
1707 __kmp_acquire_lock(& queue->tq_queue_lck, global_tid);
1708 if ( !(queue->tq_flags & TQF_DEALLOCATED) ) {
1709 queue->tq_flags |= TQF_DEALLOCATED;
1711 __kmp_release_lock(& queue->tq_queue_lck, global_tid);
1714 if (taskq_thunk != NULL) {
1715 __kmp_remove_queue_from_tree( tq, global_tid, queue, TRUE );
1718 KE_TRACE( 10, (
"__kmpc_end_taskq return for non_outermost queue, wait case (%d)\n", global_tid));
1725 KMP_INIT_YIELD(spins);
1727 while (!__kmp_taskq_tasks_finished(queue)) {
1728 thunk = __kmp_find_task_in_descendant_queue(global_tid, queue);
1730 if (thunk != NULL) {
1731 KF_TRACE(50, (
"Stole thunk: %p in descendant queue: %p while waiting in queue: %p (%d)\n",
1732 thunk, thunk->th.th_shareds->sv_queue, queue, global_tid));
1734 __kmp_execute_task_from_queue( tq, loc, global_tid, thunk, in_parallel );
1737 KMP_YIELD_WHEN( thunk == NULL, spins );
1743 if ( !__kmp_barrier( bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL )) {
1748 __kmp_remove_all_child_taskq( tq, global_tid, queue );
1751 KF_TRACE(100, (
"T#%d Before Deletion of top-level TaskQ at %p:\n", global_tid, queue ));
1752 KF_DUMP(100, __kmp_dump_task_queue( tq, queue, global_tid ));
1756 KMP_DEBUG_ASSERT ((queue->tq.tq_parent == NULL) && (queue->tq_next_child == NULL));
1759 KMP_DEBUG_ASSERT (queue->tq_first_child == NULL);
1761 for (i=0; i<nproc; i++) {
1762 KMP_DEBUG_ASSERT(queue->tq_th_thunks[i].ai_data == 0);
1765 for (i=0, thunk=queue->tq_free_thunks; thunk != NULL; i++, thunk=thunk->th.th_next_free);
1767 KMP_DEBUG_ASSERT (i == queue->tq_nslots + (nproc * __KMP_TASKQ_THUNKS_PER_TH));
1769 for (i = 0; i < nproc; i++) {
1770 KMP_DEBUG_ASSERT( ! tq->tq_curr_thunk[i] );
1774 tq -> tq_root = NULL;
1777 KF_TRACE(50, (
"After Deletion of top-level TaskQ at %p on (%d):\n", queue, global_tid));
1779 queue->tq_flags |= TQF_DEALLOCATED;
1780 __kmp_free_taskq ( tq, queue, in_parallel, global_tid );
1782 KF_DUMP(50, __kmp_dump_task_queue_tree( tq, tq->tq_root, global_tid ));
1785 __kmp_end_split_barrier( bs_plain_barrier, global_tid );
1788 th = __kmp_threads[ global_tid ];
1791 th->th.th_dispatch->th_deo_fcn = 0;
1794 th->th.th_dispatch->th_dxo_fcn = 0;
1800 if (queue->tq_nfull > 0) {
1801 KMP_DEBUG_ASSERT(queue->tq_nfull == 1);
1803 thunk = __kmp_dequeue_task(global_tid, queue, in_parallel);
1805 if (queue->tq_flags & TQF_IS_LAST_TASK) {
1811 thunk->th_flags |= TQF_IS_LAST_TASK;
1814 KF_TRACE(50, (
"T#%d found thunk: %p in serial queue: %p\n", global_tid, thunk, queue));
1816 __kmp_execute_task_from_queue( tq, loc, global_tid, thunk, in_parallel );
1820 KF_TRACE(100, (
"Before Deletion of Serialized TaskQ at %p on (%d):\n", queue, global_tid));
1821 KF_DUMP(100, __kmp_dump_task_queue( tq, queue, global_tid ));
1825 for (thunk=queue->tq_free_thunks; thunk != NULL; thunk=thunk->th.th_next_free)
1827 KMP_DEBUG_ASSERT (i == queue->tq_nslots + 1);
1830 KF_TRACE(50, (
"Serialized TaskQ at %p deleted on (%d).\n", queue, global_tid));
1832 queue->tq_flags |= TQF_DEALLOCATED;
1833 __kmp_free_taskq ( tq, queue, in_parallel, global_tid );
1836 KE_TRACE( 10, (
"__kmpc_end_taskq return (%d)\n", global_tid));
1843 __kmpc_task(
ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *thunk)
1846 kmpc_task_queue_t *queue;
1850 KE_TRACE( 10, (
"__kmpc_task called (%d)\n", global_tid));
1852 KMP_DEBUG_ASSERT (!(thunk->th_flags & TQF_TASKQ_TASK));
1854 tq = &__kmp_threads[global_tid] -> th.th_team -> t.t_taskq;
1855 queue = thunk->th.th_shareds->sv_queue;
1856 in_parallel = (queue->tq_flags & TQF_PARALLEL_CONTEXT);
1858 if (in_parallel && (thunk->th_flags & TQF_IS_ORDERED))
1859 thunk->th_tasknum = ++queue->tq_tasknum_queuing;
1864 if (!in_parallel && queue->tq_nfull > 0) {
1865 kmpc_thunk_t *prev_thunk;
1867 KMP_DEBUG_ASSERT(queue->tq_nfull == 1);
1869 prev_thunk = __kmp_dequeue_task(global_tid, queue, in_parallel);
1871 KF_TRACE(50, (
"T#%d found thunk: %p in serial queue: %p\n", global_tid, prev_thunk, queue));
1873 __kmp_execute_task_from_queue( tq, loc, global_tid, prev_thunk, in_parallel );
1881 KF_TRACE(100, (
"After enqueueing this Task on (%d):\n", global_tid));
1882 KF_DUMP(100, __kmp_dump_thunk( tq, thunk, global_tid ));
1884 ret = __kmp_enqueue_task ( tq, global_tid, queue, thunk, in_parallel );
1886 KF_TRACE(100, (
"Task Queue looks like this on (%d):\n", global_tid));
1887 KF_DUMP(100, __kmp_dump_task_queue( tq, queue, global_tid ));
1889 KE_TRACE( 10, (
"__kmpc_task return (%d)\n", global_tid));
1898 __kmpc_taskq_task(
ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *thunk, kmp_int32 status)
1900 kmpc_task_queue_t *queue;
1901 kmp_taskq_t *tq = &__kmp_threads[global_tid] -> th.th_team -> t.t_taskq;
1902 int tid = __kmp_tid_from_gtid( global_tid );
1904 KE_TRACE( 10, (
"__kmpc_taskq_task called (%d)\n", global_tid));
1905 KF_TRACE(100, (
"TaskQ Task argument thunk on (%d):\n", global_tid));
1906 KF_DUMP(100, __kmp_dump_thunk( tq, thunk, global_tid ));
1908 queue = thunk->th.th_shareds->sv_queue;
1910 if ( __kmp_env_consistency_check )
1911 __kmp_pop_workshare( global_tid, ct_taskq, loc );
1914 KMP_DEBUG_ASSERT (thunk->th_flags & TQF_TASKQ_TASK);
1917 KMP_DEBUG_ASSERT (queue->tq_taskq_slot == NULL);
1920 tq->tq_curr_thunk[tid] = thunk->th_encl_thunk;
1921 thunk->th_encl_thunk = NULL;
1923 KF_DUMP( 200, __kmp_dump_thunk_stack( tq->tq_curr_thunk[tid], global_tid ));
1925 thunk->th_status = status;
1933 queue->tq_taskq_slot = thunk;
1935 KE_TRACE( 10, (
"__kmpc_taskq_task return (%d)\n", global_tid));
1941 __kmpc_end_taskq_task(
ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *thunk)
1944 kmpc_task_queue_t *queue;
1948 KE_TRACE( 10, (
"__kmpc_end_taskq_task called (%d)\n", global_tid));
1950 tq = &__kmp_threads[global_tid] -> th.th_team -> t.t_taskq;
1951 queue = thunk->th.th_shareds->sv_queue;
1952 in_parallel = (queue->tq_flags & TQF_PARALLEL_CONTEXT);
1953 tid = __kmp_tid_from_gtid( global_tid );
1955 if ( __kmp_env_consistency_check )
1956 __kmp_pop_workshare( global_tid, ct_taskq, loc );
1959 #if KMP_ARCH_X86 || \
1962 KMP_TEST_THEN_OR32( &queue->tq_flags, (kmp_int32) TQF_ALL_TASKS_QUEUED );
1965 __kmp_acquire_lock(& queue->tq_queue_lck, global_tid);
1970 queue->tq_flags |= TQF_ALL_TASKS_QUEUED;
1972 __kmp_release_lock(& queue->tq_queue_lck, global_tid);
1977 if (thunk->th_flags & TQF_IS_LASTPRIVATE) {
1987 if (! in_parallel) {
1989 queue->tq_flags |= TQF_IS_LAST_TASK;
1992 #if KMP_ARCH_X86 || \
1995 KMP_TEST_THEN_OR32( &queue->tq_flags, (kmp_int32) TQF_IS_LAST_TASK );
1998 __kmp_acquire_lock(& queue->tq_queue_lck, global_tid);
2003 queue->tq_flags |= TQF_IS_LAST_TASK;
2005 __kmp_release_lock(& queue->tq_queue_lck, global_tid);
2016 tq->tq_curr_thunk[tid] = thunk->th_encl_thunk;
2017 thunk->th_encl_thunk = NULL;
2019 KF_DUMP( 200, __kmp_dump_thunk_stack( tq->tq_curr_thunk[tid], global_tid ));
2022 KE_TRACE( 10, (
"__kmpc_end_taskq_task return (%d)\n", global_tid));
2029 __kmpc_task_buffer(
ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *taskq_thunk, kmpc_task_t task)
2032 kmpc_task_queue_t *queue;
2033 kmpc_thunk_t *new_thunk;
2036 KE_TRACE( 10, (
"__kmpc_task_buffer called (%d)\n", global_tid));
2038 KMP_DEBUG_ASSERT (taskq_thunk->th_flags & TQF_TASKQ_TASK);
2040 tq = &__kmp_threads[global_tid] -> th.th_team -> t.t_taskq;
2041 queue = taskq_thunk->th.th_shareds->sv_queue;
2042 in_parallel = (queue->tq_flags & TQF_PARALLEL_CONTEXT);
2050 new_thunk = __kmp_alloc_thunk (queue, in_parallel, global_tid);
2051 new_thunk->th.th_shareds = (kmpc_shared_vars_t *) queue->tq_shareds[0].ai_data;
2052 new_thunk->th_encl_thunk = NULL;
2053 new_thunk->th_task = task;
2056 new_thunk->th_flags = queue->tq_flags & TQF_INTERFACE_FLAGS;
2058 new_thunk->th_status = 0;
2060 KMP_DEBUG_ASSERT (!(new_thunk->th_flags & TQF_TASKQ_TASK));
2062 KF_TRACE(100, (
"Creating Regular Task on (%d):\n", global_tid));
2063 KF_DUMP(100, __kmp_dump_thunk( tq, new_thunk, global_tid ));
2065 KE_TRACE( 10, (
"__kmpc_task_buffer return (%d)\n", global_tid));
KMP_EXPORT void __kmpc_end_barrier_master(ident_t *, kmp_int32 global_tid)