51 #ifdef KMP_SUPPORT_GRAPH_OUTPUT
52 static kmp_int32 kmp_node_id_seed = 0;
56 __kmp_init_node ( kmp_depnode_t *node )
59 node->dn.successors = NULL;
60 __kmp_init_lock(&node->dn.lock);
62 #ifdef KMP_SUPPORT_GRAPH_OUTPUT
63 node->dn.id = KMP_TEST_THEN_INC32(&kmp_node_id_seed);
67 static inline kmp_depnode_t *
68 __kmp_node_ref ( kmp_depnode_t *node )
70 KMP_TEST_THEN_INC32(&node->dn.nrefs);
75 __kmp_node_deref ( kmp_info_t *thread, kmp_depnode_t *node )
79 kmp_int32 n = KMP_TEST_THEN_DEC32(&node->dn.nrefs) - 1;
81 KMP_ASSERT(node->dn.nrefs == 0);
83 __kmp_fast_free(thread,node);
85 __kmp_thread_free(thread,node);
90 #define KMP_ACQUIRE_DEPNODE(gtid,n) __kmp_acquire_lock(&(n)->dn.lock,(gtid))
91 #define KMP_RELEASE_DEPNODE(gtid,n) __kmp_release_lock(&(n)->dn.lock,(gtid))
94 __kmp_depnode_list_free ( kmp_info_t *thread, kmp_depnode_list *list );
96 static const kmp_int32 kmp_dephash_log2 = 6;
97 static const kmp_int32 kmp_dephash_size = (1 << kmp_dephash_log2);
99 static inline kmp_int32
100 __kmp_dephash_hash ( kmp_intptr_t addr )
103 return ((addr >> kmp_dephash_log2) ^ addr) % kmp_dephash_size;
106 static kmp_dephash_t *
107 __kmp_dephash_create ( kmp_info_t *thread )
111 kmp_int32 size = kmp_dephash_size *
sizeof(kmp_dephash_entry_t) +
sizeof(kmp_dephash_t);
114 h = (kmp_dephash_t *) __kmp_fast_allocate( thread, size );
116 h = (kmp_dephash_t *) __kmp_thread_malloc( thread, size );
122 h->buckets = (kmp_dephash_entry **)(h+1);
124 for ( kmp_int32 i = 0; i < kmp_dephash_size; i++ )
131 __kmp_dephash_free ( kmp_info_t *thread, kmp_dephash_t *h )
133 for ( kmp_int32 i=0; i < kmp_dephash_size; i++ ) {
134 if ( h->buckets[i] ) {
135 kmp_dephash_entry_t *next;
136 for ( kmp_dephash_entry_t *entry = h->buckets[i]; entry; entry = next ) {
137 next = entry->next_in_bucket;
138 __kmp_depnode_list_free(thread,entry->last_ins);
139 __kmp_node_deref(thread,entry->last_out);
141 __kmp_fast_free(thread,entry);
143 __kmp_thread_free(thread,entry);
149 __kmp_fast_free(thread,h);
151 __kmp_thread_free(thread,h);
155 static kmp_dephash_entry *
156 __kmp_dephash_find ( kmp_info_t *thread, kmp_dephash_t *h, kmp_intptr_t addr )
158 kmp_int32 bucket = __kmp_dephash_hash(addr);
160 kmp_dephash_entry_t *entry;
161 for ( entry = h->buckets[bucket]; entry; entry = entry->next_in_bucket )
162 if ( entry->addr == addr )
break;
164 if ( entry == NULL ) {
167 entry = (kmp_dephash_entry_t *) __kmp_fast_allocate( thread,
sizeof(kmp_dephash_entry_t) );
169 entry = (kmp_dephash_entry_t *) __kmp_thread_malloc( thread,
sizeof(kmp_dephash_entry_t) );
172 entry->last_out = NULL;
173 entry->last_ins = NULL;
174 entry->next_in_bucket = h->buckets[bucket];
175 h->buckets[bucket] = entry;
178 if ( entry->next_in_bucket ) h->nconflicts++;
184 static kmp_depnode_list_t *
185 __kmp_add_node ( kmp_info_t *thread, kmp_depnode_list_t *list, kmp_depnode_t *node )
187 kmp_depnode_list_t *new_head;
190 new_head = (kmp_depnode_list_t *) __kmp_fast_allocate(thread,
sizeof(kmp_depnode_list_t));
192 new_head = (kmp_depnode_list_t *) __kmp_thread_malloc(thread,
sizeof(kmp_depnode_list_t));
195 new_head->node = __kmp_node_ref(node);
196 new_head->next = list;
202 __kmp_depnode_list_free ( kmp_info_t *thread, kmp_depnode_list *list )
204 kmp_depnode_list *next;
206 for ( ; list ; list = next ) {
209 __kmp_node_deref(thread,list->node);
211 __kmp_fast_free(thread,list);
213 __kmp_thread_free(thread,list);
219 __kmp_track_dependence ( kmp_depnode_t *source, kmp_depnode_t *sink )
221 #ifdef KMP_SUPPORT_GRAPH_OUTPUT
222 kmp_taskdata_t * task_source = KMP_TASK_TO_TASKDATA(source->dn.task);
223 kmp_taskdata_t * task_sink = KMP_TASK_TO_TASKDATA(sink->dn.task);
225 __kmp_printf(
"%d(%s) -> %d(%s)\n", source->dn.id, task_source->td_ident->psource, sink->dn.id, task_sink->td_ident->psource);
229 template<
bool filter >
230 static inline kmp_int32
231 __kmp_process_deps ( kmp_int32 gtid, kmp_depnode_t *node, kmp_dephash_t *hash,
232 bool dep_barrier,kmp_int32 ndeps, kmp_depend_info_t *dep_list)
234 kmp_info_t *thread = __kmp_threads[ gtid ];
235 kmp_int32 npredecessors=0;
236 for ( kmp_int32 i = 0; i < ndeps ; i++ ) {
237 const kmp_depend_info_t * dep = &dep_list[i];
239 KMP_DEBUG_ASSERT(dep->flags.in);
241 if ( filter && dep->base_addr == 0 )
continue;
243 kmp_dephash_entry_t *info = __kmp_dephash_find(thread,hash,dep->base_addr);
244 kmp_depnode_t *last_out = info->last_out;
246 if ( dep->flags.out && info->last_ins ) {
247 for ( kmp_depnode_list_t * p = info->last_ins; p; p = p->next ) {
248 kmp_depnode_t * indep = p->node;
249 if ( indep->dn.task ) {
250 KMP_ACQUIRE_DEPNODE(gtid,indep);
251 if ( indep->dn.task ) {
252 __kmp_track_dependence(indep,node);
253 indep->dn.successors = __kmp_add_node(thread, indep->dn.successors, node);
256 KMP_RELEASE_DEPNODE(gtid,indep);
260 __kmp_depnode_list_free(thread,info->last_ins);
261 info->last_ins = NULL;
263 }
else if ( last_out && last_out->dn.task ) {
264 KMP_ACQUIRE_DEPNODE(gtid,last_out);
265 if ( last_out->dn.task ) {
266 __kmp_track_dependence(last_out,node);
267 last_out->dn.successors = __kmp_add_node(thread, last_out->dn.successors, node);
270 KMP_RELEASE_DEPNODE(gtid,last_out);
276 __kmp_node_deref(thread,last_out);
277 info->last_out = NULL;
279 if ( dep->flags.out ) {
280 __kmp_node_deref(thread,last_out);
281 info->last_out = __kmp_node_ref(node);
283 info->last_ins = __kmp_add_node(thread, info->last_ins, node);
287 return npredecessors;
290 #define NO_DEP_BARRIER (false)
291 #define DEP_BARRIER (true)
295 __kmp_check_deps ( kmp_int32 gtid, kmp_depnode_t *node, kmp_task_t *task, kmp_dephash_t *hash,
bool dep_barrier,
296 kmp_int32 ndeps, kmp_depend_info_t *dep_list,
297 kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list )
303 for ( i = 0; i < ndeps; i ++ ) {
304 if ( dep_list[i].base_addr != 0 )
305 for (
int j = i+1; j < ndeps; j++ )
306 if ( dep_list[i].base_addr == dep_list[j].base_addr ) {
307 dep_list[i].flags.in |= dep_list[j].flags.in;
308 dep_list[i].flags.out |= dep_list[j].flags.out;
309 dep_list[j].base_addr = 0;
315 node->dn.npredecessors = 1;
320 npredecessors = __kmp_process_deps<true>(gtid, node, hash, dep_barrier, ndeps, dep_list);
321 npredecessors += __kmp_process_deps<false>(gtid, node, hash, dep_barrier, ndeps_noalias, noalias_dep_list);
323 KMP_TEST_THEN_ADD32(&node->dn.npredecessors, npredecessors);
326 node->dn.task = task;
328 npredecessors = KMP_TEST_THEN_DEC32(&node->dn.npredecessors) - 1;
331 return npredecessors > 0 ?
true :
false;
335 __kmp_release_deps ( kmp_int32 gtid, kmp_taskdata_t *task )
337 kmp_info_t *thread = __kmp_threads[ gtid ];
338 kmp_depnode_t *node = task->td_depnode;
340 if ( task->td_dephash )
341 __kmp_dephash_free(thread,task->td_dephash);
345 KMP_ACQUIRE_DEPNODE(gtid,node);
346 node->dn.task = NULL;
347 KMP_RELEASE_DEPNODE(gtid,node);
349 kmp_depnode_list_t *next;
350 for ( kmp_depnode_list_t *p = node->dn.successors; p; p = next ) {
351 kmp_depnode_t *successor = p->node;
352 kmp_int32 npredecessors = KMP_TEST_THEN_DEC32(&successor->dn.npredecessors) - 1;
355 if ( npredecessors == 0 ) {
357 if ( successor->dn.task )
359 __kmpc_omp_task(NULL,gtid,successor->dn.task);
363 __kmp_node_deref(thread,p->node);
365 __kmp_fast_free(thread,p);
367 __kmp_thread_free(thread,p);
371 __kmp_node_deref(thread,node);
390 kmp_int32 ndeps, kmp_depend_info_t *dep_list,
391 kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list )
393 kmp_info_t *thread = __kmp_threads[ gtid ];
394 kmp_taskdata_t * current_task = thread->th.th_current_task;
396 bool serial = current_task->td_flags.team_serial || current_task->td_flags.tasking_ser || current_task->td_flags.final;
398 if ( !serial && ( ndeps > 0 || ndeps_noalias > 0 )) {
400 if ( current_task->td_dephash == NULL )
401 current_task->td_dephash = __kmp_dephash_create(thread);
404 kmp_depnode_t *node = (kmp_depnode_t *) __kmp_fast_allocate(thread,
sizeof(kmp_depnode_t));
406 kmp_depnode_t *node = (kmp_depnode_t *) __kmp_thread_malloc(thread,
sizeof(kmp_depnode_t));
409 __kmp_init_node(node);
410 KMP_TASK_TO_TASKDATA(new_task)->td_depnode = node;
412 if ( __kmp_check_deps( gtid, node, new_task, current_task->td_dephash, NO_DEP_BARRIER,
413 ndeps, dep_list, ndeps_noalias,noalias_dep_list ) )
414 return TASK_CURRENT_NOT_QUEUED;
417 return __kmpc_omp_task(loc_ref,gtid,new_task);
433 kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list )
435 if ( ndeps == 0 && ndeps_noalias == 0 )
return;
437 kmp_info_t *thread = __kmp_threads[ gtid ];
438 kmp_taskdata_t * current_task = thread->th.th_current_task;
441 if ( current_task->td_flags.team_serial || current_task->td_flags.tasking_ser || current_task->td_flags.final)
445 if ( current_task->td_dephash == NULL )
return;
448 __kmp_init_node(&node);
450 if (!__kmp_check_deps( gtid, &node, NULL, current_task->td_dephash, DEP_BARRIER,
451 ndeps, dep_list, ndeps_noalias, noalias_dep_list ))
454 int thread_finished = FALSE;
455 while ( node.dn.npredecessors > 0 ) {
456 __kmp_execute_tasks( thread, gtid, (
volatile kmp_uint32 *)&(node.dn.npredecessors),
457 0, FALSE, &thread_finished,
461 __kmp_task_stealing_constraint );
void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list)
kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list)