On Sun, Apr 03, 2011 at 08:10:25PM +0200, Jakub Jelinek wrote:
> On Sun, Apr 03, 2011 at 07:27:12PM +0900, Sho Nakatani wrote:
> > Then, I'll compare the trees created by gcc and icc, and point out
> > that the implementation of OpenMP Task uses Lazy Task Creation while
> > gcc does not.
> 
> Depends on what you mean by lazy task creation, gcc schedules
> tasks lazily if they aren't if (0), some data structure if created
> for them when encountering #pragma omp task directive, but I guess
> any implementation will do something like that.
> 
> What your testcase shows is not whether tasks are created lazily or not, but
> how good/poor #pragma omp taskwait implementation is.  And, for your testcase
> libgomp/task.c (GOMP_taskwait) definitely could be improved.  Currently it 
> only
> tries to schedule in children that will be awaited by the current tasks and if
> there are no such children, goes to sleep, waiting for them to complete.
> Scheduling in random unrelated tasks is problematic, because the unrelated
> task might take too long to complete and delay the taskwait for way too long
> (note, gcc doesn't have untied tasks, all tasks are tied once they are 
> scheduled
> onto some particular tasks - setcontext/swapcontext is quite fragile thing to 
> do).
> But it is true it could very well schedule tasks that are taskwaited by tasks
> taskwaited by current task, and transitively further.  Plus, be able to 
> temporarily
> awake such a sleeping thread if there are tasks it can transitively taskwait
> for, as if those don't complete, the current taskwait won't return.

Just FYI, I've tried to implement something like that as a quick hack,
but it unfortunately slowed things down, at least on the attached fib testcase
with arguments 40 25.  Guess partly the problem is that after a task waiting
in taskwait_sem is awaken it now needs to take task_lock lock to unqueue itself
from the new in_taskwait_list, and partly because the search for grand-grand 
children
etc. is more expensive, the FIFO isn't a good data structure for that.

        Jakub
--- libgomp/team.c.jj   2011-04-04 18:14:58.000000000 +0200
+++ libgomp/team.c      2011-04-04 20:00:45.000000000 +0200
@@ -166,6 +166,7 @@ gomp_new_team (unsigned nthreads)
 
   gomp_mutex_init (&team->task_lock);
   team->task_queue = NULL;
+  team->in_taskwait_list = NULL;
   team->task_count = 0;
   team->task_running_count = 0;
 
--- libgomp/libgomp.h.jj        2011-04-04 18:19:46.000000000 +0200
+++ libgomp/libgomp.h   2011-04-04 20:00:45.000000000 +0200
@@ -311,6 +311,7 @@ struct gomp_team
 
   gomp_mutex_t task_lock;
   struct gomp_task *task_queue;
+  struct gomp_task *in_taskwait_list;
   int task_count;
   int task_running_count;
 
--- libgomp/task.c.jj   2009-04-14 16:33:07.000000000 +0200
+++ libgomp/task.c      2011-04-04 20:02:18.000000000 +0200
@@ -176,6 +176,26 @@ GOMP_task (void (*fn) (void *), void *da
       gomp_team_barrier_set_task_pending (&team->barrier);
       do_wake = team->task_running_count + !parent->in_tied_task
                < team->nthreads;
+      if (!do_wake && team->in_taskwait_list)
+       {
+         struct gomp_task *t = team->in_taskwait_list;
+         do
+           {
+             struct gomp_task *p = parent;
+             int i;
+
+             for (i = 0; i < 10 && p; i++, p = p->parent)
+               if (p == t || p->kind == GOMP_TASK_IMPLICIT)
+                 break;
+             if (p == t)
+               {
+                 gomp_sem_post (&t->taskwait_sem);
+                 break;
+               }
+             t = t->next_queue;
+           }
+         while (t != team->in_taskwait_list);
+       }
       gomp_mutex_unlock (&team->task_lock);
       if (do_wake)
        gomp_team_barrier_wake (&team->barrier, 1);
@@ -301,10 +321,35 @@ GOMP_taskwait (void)
            }
          return;
        }
-      if (task->children->kind == GOMP_TASK_WAITING)
+      child_task = task->children;
+      if (child_task->kind != GOMP_TASK_WAITING && team->task_queue)
+       {
+         /* Try harder, look for grandchildren etc. */
+         for (child_task = team->task_queue;;
+              child_task = child_task->next_queue)
+           {
+             if (child_task->kind == GOMP_TASK_WAITING)
+               {
+                 struct gomp_task *p = child_task->parent;
+                 int i;
+
+                 for (i = 0; i < 10 && p; i++, p = p->parent)
+                   if (p == task || p->kind == GOMP_TASK_IMPLICIT)
+                     break;
+                 if (p == task)
+                   break;
+               }
+             if (child_task->next_queue == team->task_queue)
+               {
+                 child_task = task->children;
+                 break;
+               }
+           }
+       }
+      if (child_task->kind == GOMP_TASK_WAITING)
        {
-         child_task = task->children;
-         task->children = child_task->next_child;
+         if (child_task->parent->children == child_task)
+           child_task->parent->children = child_task->next_child;
          child_task->prev_queue->next_queue = child_task->next_queue;
          child_task->next_queue->prev_queue = child_task->prev_queue;
          if (team->task_queue == child_task)
@@ -320,9 +365,25 @@ GOMP_taskwait (void)
            gomp_team_barrier_clear_task_pending (&team->barrier);
        }
       else
-       /* All tasks we are waiting for are already running
-          in other threads.  Wait for them.  */
-       task->in_taskwait = true;
+       {
+         child_task = NULL;
+         /* All tasks we are waiting for are already running
+            in other threads.  Wait for them.  */
+         task->in_taskwait = true;
+         if (team->in_taskwait_list)
+           {
+             task->next_queue = team->in_taskwait_list;
+             task->prev_queue = team->in_taskwait_list->prev_queue;
+             task->next_queue->prev_queue = task;
+             task->prev_queue->next_queue = task;
+           }
+         else
+           {
+             task->next_queue = task;
+             task->prev_queue = task;
+             team->in_taskwait_list = task;
+           }
+       }
       gomp_mutex_unlock (&team->task_lock);
       if (to_free)
        {
@@ -337,22 +398,23 @@ GOMP_taskwait (void)
          thr->task = task;
        }
       else
-       {
-         gomp_sem_wait (&task->taskwait_sem);
-         task->in_taskwait = false;
-         return;
-       }
+       gomp_sem_wait (&task->taskwait_sem);
       gomp_mutex_lock (&team->task_lock);
       if (child_task)
        {
+         struct gomp_task *parent = child_task->parent;
          child_task->prev_child->next_child = child_task->next_child;
          child_task->next_child->prev_child = child_task->prev_child;
-         if (task->children == child_task)
+         if (parent->children == child_task)
            {
              if (child_task->next_child != child_task)
-               task->children = child_task->next_child;
+               parent->children = child_task->next_child;
              else
-               task->children = NULL;
+               {
+                 parent->children = NULL;
+                 if (parent->in_taskwait)
+                   gomp_sem_post (&parent->taskwait_sem);
+               }
            }
          gomp_clear_parent (child_task->children);
          to_free = child_task;
@@ -360,5 +422,14 @@ GOMP_taskwait (void)
          team->task_count--;
          team->task_running_count--;
        }
+      else
+       {
+         task->in_taskwait = false;
+         task->prev_queue->next_queue = task->next_queue;
+         task->next_queue->prev_queue = task->prev_queue;
+         if (team->in_taskwait_list == task)
+           team->in_taskwait_list
+             = task->next_queue == task ? NULL : task->next_queue;
+       }
     }
 }
#include <omp.h>
#include <stdio.h>

long
fib (long n, long l)
{
  long i, j;
  if (n < 2)
    return n;
  else if (l)
    {
#pragma omp task shared(i) firstprivate(n, l)
      i = fib (n - 1, l - 1);
#pragma omp task shared(j) firstprivate(n, l)
      j = fib (n - 2, l - 1);
#pragma omp taskwait
    }
  else
    {
      i = fib (n - 1, 0);
      j = fib (n - 2, 0);
    }
  return i + j;
}

int
main (int argc, char *argv[])
{
  long n = argv[1] ? atoi (argv[1]) : 50;
  long l = argv[1] && argv[2] ? atoi (argv[2]) : 10;
  double t1, t2;
  long result;

  omp_set_dynamic (0);

#pragma omp parallel shared(n)
#pragma omp single
  {
    t1 = omp_get_wtime ();
    result = fib (n, l);
    t2 = omp_get_wtime ();
    printf ("fib (%ld, %ld) %5f %ld\n", n, l, t2 - t1, result);
  }
  return 0;
}

Reply via email to