On Tue, Feb 18, 2014 at 8:58 PM, Richard Henderson <r...@redhat.com> wrote:
> On 02/16/2014 03:59 PM, Nathaniel Smith wrote:
>> Yes, but the problem is that depending on what the user intends to do
>> after forking, our pthread_atfork handler might help or it might hurt,
>> and we don't know which. Consider these two cases:
>>   - fork+exec
>>   - fork+continue to use OMP in child
>> The former case is totally POSIX-legal, even when performed at
>> arbitrary places, even when another thread is, say, in the middle of
>> calling malloc().
>
> Point well taken.

Hi all,

I guess this patch has gotten all the feedback that it's getting. Any
interest in committing it? :-) I don't have commit access.

2014-02-12  Nathaniel J. Smith  <n...@pobox.com>

        * team.c (gomp_free_pool_helper): Move per-thread cleanup to main
        thread.
        (gomp_free_thread): Delegate implementation to...
        (gomp_free_thread_pool): ...this new function. Like old
        gomp_free_thread, but does per-thread cleanup, and has option to
        skip everything that involves interacting with actual threads,
        which is useful when called after fork.
        (gomp_after_fork_callback): New function.
        (gomp_team_start): Register atfork handler, and check for fork on
        entry.

Cheers,
-n

-- 
Nathaniel J. Smith
Postdoctoral researcher - Informatics - University of Edinburgh
http://vorpus.org
Index: team.c
===================================================================
--- team.c      (revision 207398)
+++ team.c      (working copy)
@@ -28,6 +28,7 @@
 #include "libgomp.h"
 #include <stdlib.h>
 #include <string.h>
+#include <stdbool.h>
 
 /* This attribute contains PTHREAD_CREATE_DETACHED.  */
 pthread_attr_t gomp_thread_attr;
@@ -43,6 +44,8 @@ __thread struct gomp_thread gomp_tls_data;
 pthread_key_t gomp_tls_key;
 #endif
 
+/* This is to enable best-effort cleanup after fork.  */
+static bool gomp_we_are_forked;
 
 /* This structure is used to communicate across pthread_create.  */
 
@@ -204,42 +207,41 @@ static struct gomp_thread_pool *gomp_new_thread_po
   return pool;
 }
 
+/* Free a thread pool and release its threads. */
+
 static void
 gomp_free_pool_helper (void *thread_pool)
 {
-  struct gomp_thread *thr = gomp_thread ();
   struct gomp_thread_pool *pool
     = (struct gomp_thread_pool *) thread_pool;
   gomp_barrier_wait_last (&pool->threads_dock);
-  gomp_sem_destroy (&thr->release);
-  thr->thread_pool = NULL;
-  thr->task = NULL;
   pthread_exit (NULL);
 }
 
-/* Free a thread pool and release its threads. */
-
-void
-gomp_free_thread (void *arg __attribute__((unused)))
+static void
+gomp_free_thread_pool (bool threads_are_running)
 {
   struct gomp_thread *thr = gomp_thread ();
   struct gomp_thread_pool *pool = thr->thread_pool;
   if (pool)
     {
+      int i;
       if (pool->threads_used > 0)
        {
-         int i;
-         for (i = 1; i < pool->threads_used; i++)
+         if (threads_are_running)
            {
-             struct gomp_thread *nthr = pool->threads[i];
-             nthr->fn = gomp_free_pool_helper;
-             nthr->data = pool;
+             for (i = 1; i < pool->threads_used; i++)
+               {
+                 struct gomp_thread *nthr = pool->threads[i];
+                 nthr->fn = gomp_free_pool_helper;
+                 nthr->data = pool;
+               }
+             /* This barrier undocks threads docked on pool->threads_dock.  */
+             gomp_barrier_wait (&pool->threads_dock);
+             /* And this waits till all threads have called
+                gomp_barrier_wait_last in gomp_free_pool_helper.  */
+             gomp_barrier_wait (&pool->threads_dock);
            }
-         /* This barrier undocks threads docked on pool->threads_dock.  */
-         gomp_barrier_wait (&pool->threads_dock);
-         /* And this waits till all threads have called gomp_barrier_wait_last
-            in gomp_free_pool_helper.  */
-         gomp_barrier_wait (&pool->threads_dock);
          /* Now it is safe to destroy the barrier and free the pool.  */
          gomp_barrier_destroy (&pool->threads_dock);
 
@@ -251,6 +253,14 @@ gomp_free_pool_helper (void *thread_pool)
          gomp_managed_threads -= pool->threads_used - 1L;
          gomp_mutex_unlock (&gomp_managed_threads_lock);
 #endif
+         /* Clean up thread objects */
+         for (i = 1; i < pool->threads_used; i++)
+           {
+             struct gomp_thread *nthr = pool->threads[i];
+             gomp_sem_destroy (&nthr->release);
+             nthr->thread_pool = NULL;
+             nthr->task = NULL;
+           }
        }
       free (pool->threads);
       if (pool->last_team)
@@ -266,6 +276,58 @@ gomp_free_pool_helper (void *thread_pool)
     }
 }
 
+/* This is called whenever a thread exits which has a non-NULL value for
+   gomp_thread_destructor. In practice, the only thread for which this occurs
+   is the one which created the thread pool.
+*/
+void
+gomp_free_thread (void *arg __attribute__((unused)))
+{
+  gomp_free_thread_pool (true);
+}
+
+/* This is called in the child process after a fork.
+
+   According to POSIX, if a process which uses threads calls fork(), then
+   there are very few things that the resulting child process can do safely --
+   mostly just exec().
+
+   However, in practice, (almost?) all POSIX implementations seem to allow
+   arbitrary code to run inside the child, *if* the parent process's threads
+   are in a well-defined state when the fork occurs. And this circumstance can
+   easily arise in OMP-using programs, e.g. when a library function like DGEMM
+   uses OMP internally, and some other unrelated part of the program calls
+   fork() at some other time, when no OMP sections are running.
+
+   Therefore, we make a best effort attempt to handle the case:
+
+     OMP section (in parent) -> quiesce -> fork -> OMP section (in child)
+
+   "Best-effort" here means that:
+   - Your system may or may not be able to handle this kind of code at all;
+     our goal is just to make sure that if it fails it's not gomp's fault.
+   - All threadprivate variables will be reset in the child. Fortunately this
+     is entirely compliant with the spec, according to the rule of nasal
+     demons.
+   - We must have minimal speed impact, and no correctness impact, on
+     compliant programs.
+
+   We use this callback to notice when a fork has a occurred, and if the child
+   later attempts to enter an OMP section (via gomp_team_start), then we know
+   that it is non-compliant, and are free to apply our best-effort strategy of
+   cleaning up the old thread pool structures and spawning a new one. Because
+   compliant programs never call gomp_team_start after forking, they are
+   unaffected.
+*/
+static void
+gomp_after_fork_callback (void)
+{
+  /* Only "async-signal-safe operations" are allowed here, so let's keep it
+     simple. No mutex is needed, because we are currently single-threaded.
+  */
+  gomp_we_are_forked = 1;
+}
+
 /* Launch a team.  */
 
 void
@@ -288,11 +350,19 @@ gomp_team_start (void (*fn) (void *), void *data,
 
   thr = gomp_thread ();
   nested = thr->ts.team != NULL;
+  if (__builtin_expect (gomp_we_are_forked, 0))
+    {
+      gomp_free_thread_pool (0);
+      gomp_we_are_forked = 0;
+    }
   if (__builtin_expect (thr->thread_pool == NULL, 0))
     {
       thr->thread_pool = gomp_new_thread_pool ();
       thr->thread_pool->threads_busy = nthreads;
+      /* The pool should be cleaned up whenever this thread exits... */
       pthread_setspecific (gomp_thread_destructor, thr);
+      /* ...and also in any fork()ed children. */
+      pthread_atfork (NULL, NULL, gomp_after_fork_callback);
     }
   pool = thr->thread_pool;
   task = thr->task;
Index: testsuite/libgomp.c/fork-1.c
===================================================================
--- testsuite/libgomp.c/fork-1.c        (revision 0)
+++ testsuite/libgomp.c/fork-1.c        (working copy)
@@ -0,0 +1,77 @@
+/* { dg-do run } */
+/* { dg-timeout 10 } */
+
+#include <omp.h>
+#include <string.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include <assert.h>
+
+static int saw[4];
+
+static void
+check_parallel (int exit_on_failure)
+{
+  memset (saw, 0, sizeof (saw));
+  #pragma omp parallel num_threads (2)
+  {
+    int iam = omp_get_thread_num ();
+    saw[iam] = 1;
+  }
+
+  // Encode failure in status code to report to parent process
+  if (exit_on_failure)
+    {
+      if (saw[0] != 1)
+        _exit(1);
+      else if (saw[1] != 1)
+        _exit(2);
+      else if (saw[2] != 0)
+        _exit(3);
+      else if (saw[3] != 0)
+        _exit(4);
+      else
+        _exit(0);
+  }
+  // Use regular assertions
+  else
+    {
+      assert (saw[0] == 1);
+      assert (saw[1] == 1);
+      assert (saw[2] == 0);
+      assert (saw[3] == 0);
+    }
+}
+
+int
+main ()
+{
+  // Initialize the OMP thread pool in the parent process
+  check_parallel (0);
+  pid_t fork_pid = fork();
+  if (fork_pid == -1)
+    return 1;
+  else if (fork_pid == 0)
+    {
+      // Call OMP again in the child process and encode failures in exit
+      // code.
+      check_parallel (1);
+    }
+  else
+    {
+      // Check that OMP runtime is still functional in parent process after
+      // the fork.
+      check_parallel (0);
+
+      // Wait for the child to finish and check the exit code.
+      int child_status = 0;
+      pid_t wait_pid = wait(&child_status);
+      assert (wait_pid == fork_pid);
+      assert (WEXITSTATUS (child_status) == 0);
+
+      // Check that the termination of the child process did not impact
+      // OMP in parent process.
+      check_parallel (0);
+    }
+  return 0;
+}

Reply via email to