lib/util: enhanced tfork()
authorRalph Boehme <slow@samba.org>
Tue, 25 Apr 2017 22:48:39 +0000 (00:48 +0200)
committerRalph Boehme <slow@samba.org>
Mon, 3 Jul 2017 17:59:07 +0000 (19:59 +0200)
This function is a solution to the problem of fork() requiring special
preperations in the caller to handle SIGCHLD signals and to reap the
child by wait()ing for it.

Instead, tfork provides a pollable file descriptor. The caller gets the
file descriptor by calling tfork_event_fd() on the handle returned from
tfork_create() and the caller can then get the status of the child
with a call to tfork_status().

tfork avoids raising SIGCHLD signals in the caller by installing a
temporary SIGCHLD handler from inside tfork_create() and tfork_status().

The termination signal of other child processes not created with tfork()
is forwarded to the existing signal handler if any.

There's one thing this thing can't protect us against and that is if a
process installs a SIGCHLD handler from one thread while another thread
is running inside tfork_create() or tfork_status() and the signal
handler doesn't forward signals for exitted childs it didn't fork, ie
our childs.

Pair-Programmed-With: Stefan Metzmacher <metze@samba.org>

Signed-off-by: Ralph Boehme <slow@samba.org>
Signed-off-by: Stefan Metzmacher <metze@samba.org>
lib/util/tests/tfork.c
lib/util/tfork.c
lib/util/tfork.h

index bd5809d9be110ba49b3b2ddb3a1f124cf1e2582c..2f140dd6ef0dc735cffaff063233329b8313ae9b 100644 (file)
 
 static bool test_tfork_simple(struct torture_context *tctx)
 {
-       pid_t pid;
-       pid_t parent = getpid();
-       pid_t parent_arg;
-
-       pid = tfork(NULL, &parent_arg);
-       if (pid == 0) {
-               torture_comment(tctx, "my parent pid is %d\n", parent);
-               torture_assert(tctx, parent == parent_arg, "tfork failed\n");
-               _exit(0);
-       }
-       if (pid == -1) {
-               torture_fail(tctx, "tfork failed\n");
-               return false;
-       }
-
-       return true;
+        pid_t parent = getpid();
+        struct tfork *t = NULL;
+        pid_t child;
+        int ret;
+
+        t = tfork_create();
+        if (t == NULL) {
+                torture_fail(tctx, "tfork failed\n");
+                return false;
+        }
+        child = tfork_child_pid(t);
+        if (child == 0) {
+                torture_comment(tctx, "my parent pid is %d\n", parent);
+                torture_assert(tctx, getpid() != parent, "tfork failed\n");
+                _exit(0);
+        }
+
+        ret = tfork_destroy(&t);
+        torture_assert(tctx, ret == 0, "tfork_destroy failed\n");
+
+        return true;
 }
 
 static bool test_tfork_status(struct torture_context *tctx)
 {
-       pid_t child;
+       struct tfork *t = NULL;
        int status;
-       ssize_t nread;
-       int status_fd = -1;
+       pid_t child;
        bool ok = true;
 
-       child = tfork(&status_fd, NULL);
-       if (child == 0) {
-               _exit(123);
-       }
-       if (child == -1) {
+       t = tfork_create();
+       if (t == NULL) {
                torture_fail(tctx, "tfork failed\n");
                return false;
        }
+       child = tfork_child_pid(t);
+       if (child == 0) {
+               _exit(123);
+       }
 
-       nread = sys_read(status_fd, &status, sizeof(status));
-       if (nread != sizeof(status)) {
-               torture_fail(tctx, "sys_read failed\n");
+       status = tfork_status(&t, true);
+       if (status == -1) {
+               torture_fail(tctx, "tfork_status failed\n");
        }
 
        torture_assert_goto(tctx, WIFEXITED(status) == true, ok, done,
@@ -80,10 +85,6 @@ static bool test_tfork_status(struct torture_context *tctx)
        torture_comment(tctx, "exit status [%d]\n", WEXITSTATUS(status));
 
 done:
-       if (status_fd != -1) {
-               close(status_fd);
-       }
-
        return ok;
 }
 
index 37c00e614c2f218c15d340bf401a7db335bb261f..cc2e0a05f4e5b865c59387cd6aae6c2512335d83 100644 (file)
 #include "replace.h"
 #include "system/wait.h"
 #include "system/filesys.h"
+#include "system/network.h"
 #include "lib/util/samba_util.h"
 #include "lib/util/sys_rw.h"
 #include "lib/util/tfork.h"
 #include "lib/util/debug.h"
 
-struct tfork_state {
-       void (*old_sig_chld)(int);
-       int status_pipe[2];
-       pid_t *parent;
+#ifdef HAVE_PTHREAD
+#include <pthread.h>
+#endif
+
+#ifdef NDEBUG
+#undef NDEBUG
+#endif
+#include <assert.h>
+
+/*
+ * This is how the process hierarchy looks like:
+ *
+ *   +----------+
+ *   |  caller  |
+ *   +----------+
+ *         |
+ *       fork
+ *         |
+ *         v
+ *   +----------+
+ *   |  waiter  |
+ *   +----------+
+ *         |
+ *       fork
+ *         |
+ *         v
+ *   +----------+
+ *   |  worker  |
+ *   +----------+
+ */
+
+/*
+ * The resulting (private) state per tfork_create() call, returned as a opaque
+ * handle to the caller.
+ */
+struct tfork {
+       /*
+        * This is returned to the caller with tfork_event_fd()
+        */
+       int event_fd;
 
-       pid_t level0_pid;
-       int level0_status;
+       /*
+        * This is used in the caller by tfork_status() to read the worker exit
+        * status and to tell the waiter to exit by closing the fd.
+        */
+       int status_fd;
 
-       pid_t level1_pid;
-       int level1_errno;
+       pid_t waiter_pid;
+       pid_t worker_pid;
+};
 
-       pid_t level2_pid;
-       int level2_errno;
+/*
+ * Internal per-thread state maintained while inside tfork.
+ */
+struct tfork_state {
+       pid_t waiter_pid;
+       int waiter_errno;
 
-       pid_t level3_pid;
+       pid_t worker_pid;
 };
 
 /*
- * TODO: We should make this global thread local
+ * A global state that synchronizes access to handling SIGCHLD and waiting for
+ * childs.
  */
-static struct tfork_state *tfork_global;
+struct tfork_signal_state {
+       bool available;
 
-static void tfork_sig_chld(int signum)
+#ifdef HAVE_PTHREAD
+       pthread_cond_t cond;
+       pthread_mutex_t mutex;
+#endif
+
+       /*
+        * pid of the waiter child. This points at waiter_pid in either struct
+        * tfork or struct tfork_state, depending on who called
+        * tfork_install_sigchld_handler().
+        *
+        * When tfork_install_sigchld_handler() is called the waiter_pid is
+        * still -1 and only set later after fork(), that's why this is must be
+        * a pointer. The signal handler checks this.
+        */
+       pid_t *pid;
+
+       struct sigaction oldact;
+       sigset_t oldset;
+};
+
+static struct tfork_signal_state signal_state;
+
+#ifdef HAVE_PTHREAD
+static pthread_once_t tfork_global_is_initialized = PTHREAD_ONCE_INIT;
+static pthread_key_t tfork_global_key;
+#else
+static struct tfork_state *global_state;
+#endif
+
+static void tfork_sigchld_handler(int signum, siginfo_t *si, void *p);
+
+#ifdef HAVE_PTHREAD
+static void tfork_global_destructor(void *state)
 {
-       if (tfork_global->level1_pid > 0) {
-               int ret = waitpid(tfork_global->level1_pid,
-                             &tfork_global->level0_status,
-                             WNOHANG);
-               if (ret == tfork_global->level1_pid) {
-                       tfork_global->level1_pid = -1;
-                       return;
+       anonymous_shared_free(state);
+}
+#endif
+
+static int tfork_acquire_sighandling(void)
+{
+       int ret = 0;
+
+#ifdef HAVE_PTHREAD
+       ret = pthread_mutex_lock(&signal_state.mutex);
+       if (ret != 0) {
+               return ret;
+       }
+
+       while (!signal_state.available) {
+               ret = pthread_cond_wait(&signal_state.cond,
+                                       &signal_state.mutex);
+               if (ret != 0) {
+                       return ret;
                }
        }
 
-       /*
-        * Not our child, forward to old handler
-        */
+       signal_state.available = false;
 
-       if (tfork_global->old_sig_chld == SIG_IGN) {
-               return;
+       ret = pthread_mutex_unlock(&signal_state.mutex);
+       if (ret != 0) {
+               return ret;
        }
+#endif
 
-       if (tfork_global->old_sig_chld == SIG_DFL) {
-               return;
+       return ret;
+}
+
+static int tfork_release_sighandling(void)
+{
+       int ret = 0;
+
+#ifdef HAVE_PTHREAD
+       ret = pthread_mutex_lock(&signal_state.mutex);
+       if (ret != 0) {
+               return ret;
+       }
+
+       signal_state.available = true;
+
+       ret = pthread_cond_signal(&signal_state.cond);
+       if (ret != 0) {
+               pthread_mutex_unlock(&signal_state.mutex);
+               return ret;
+       }
+
+       ret = pthread_mutex_unlock(&signal_state.mutex);
+       if (ret != 0) {
+               return ret;
        }
+#endif
 
-       tfork_global->old_sig_chld(signum);
+       return ret;
 }
 
-static pid_t level2_fork_and_wait(int child_ready_fd)
+#ifdef HAVE_PTHREAD
+static void tfork_atfork_prepare(void)
 {
-       int status;
-       ssize_t written;
-       pid_t pid;
-       int fd;
-       bool wait;
+       int ret;
+
+       ret = pthread_mutex_lock(&signal_state.mutex);
+       assert(ret == 0);
+}
+
+static void tfork_atfork_parent(void)
+{
+       int ret;
+
+       ret = pthread_mutex_unlock(&signal_state.mutex);
+       assert(ret == 0);
+}
+#endif
+
+static void tfork_atfork_child(void)
+{
+       int ret;
+
+#ifdef HAVE_PTHREAD
+       ret = pthread_mutex_unlock(&signal_state.mutex);
+       assert(ret == 0);
+
+       ret = pthread_key_delete(tfork_global_key);
+       assert(ret == 0);
+
+       ret = pthread_key_create(&tfork_global_key, tfork_global_destructor);
+       assert(ret == 0);
 
        /*
-        * Child level 2.
-        *
-        * Do a final fork and if the tfork() caller passed a status_fd, wait
-        * for child3 and return its exit status via status_fd.
+        * There's no way to destroy a condition variable if there are waiters,
+        * pthread_cond_destroy() will return EBUSY. Just zero out memory and
+        * then initialize again. This is not backed by POSIX but should be ok.
         */
+       ZERO_STRUCT(signal_state.cond);
+       ret = pthread_cond_init(&signal_state.cond, NULL);
+       assert(ret == 0);
+#endif
 
-       pid = fork();
-       if (pid == 0) {
-               /*
-                * Child level 3, this one finally returns from tfork() as child
-                * with pid 0.
-                *
-                * Cleanup all ressources we allocated before returning.
-                */
-               close(child_ready_fd);
-               close(tfork_global->status_pipe[1]);
-
-               if (tfork_global->parent != NULL) {
-                       /*
-                        * we're in the child and return the level0 parent pid
-                        */
-                       *tfork_global->parent = tfork_global->level0_pid;
-               }
+       if (signal_state.pid != NULL) {
 
-               anonymous_shared_free(tfork_global);
-               tfork_global = NULL;
+               ret = sigaction(SIGCHLD, &signal_state.oldact, NULL);
+               assert(ret == 0);
 
-               return 0;
+#ifdef HAVE_PTHREAD
+               ret = pthread_sigmask(SIG_SETMASK, &signal_state.oldset, NULL);
+#else
+               ret = sigprocmask(SIG_SETMASK, &signal_state.oldset, NULL);
+               assert(ret == 0);
+#endif
+
+               signal_state.pid = NULL;
        }
 
-       tfork_global->level3_pid = pid;
-       if (tfork_global->level3_pid == -1) {
-               tfork_global->level2_errno = errno;
-               _exit(0);
+       signal_state.available = true;
+}
+
+static void tfork_global_initialize(void)
+{
+#ifdef HAVE_PTHREAD
+       int ret;
+
+       pthread_atfork(tfork_atfork_prepare,
+                      tfork_atfork_parent,
+                      tfork_atfork_child);
+
+       ret = pthread_key_create(&tfork_global_key, tfork_global_destructor);
+       assert(ret == 0);
+
+       ret = pthread_mutex_init(&signal_state.mutex, NULL);
+       assert(ret == 0);
+
+       ret = pthread_cond_init(&signal_state.cond, NULL);
+       assert(ret == 0);
+#endif
+
+       signal_state.available = true;
+}
+
+static struct tfork_state *tfork_global_get(void)
+{
+       struct tfork_state *state = NULL;
+#ifdef HAVE_PTHREAD
+       int ret;
+#endif
+
+#ifdef HAVE_PTHREAD
+       state = (struct tfork_state *)pthread_getspecific(tfork_global_key);
+#else
+       state = global_state;
+#endif
+       if (state != NULL) {
+               return state;
        }
 
-       sys_write(child_ready_fd, &(char){0}, 1);
+       state = (struct tfork_state *)anonymous_shared_allocate(
+               sizeof(struct tfork_state));
+       if (state == NULL) {
+               return NULL;
+       }
 
-       if (tfork_global->status_pipe[1] == -1) {
-               _exit(0);
+#ifdef HAVE_PTHREAD
+       ret = pthread_setspecific(tfork_global_key, state);
+       if (ret != 0) {
+               anonymous_shared_free(state);
+               return NULL;
        }
-       wait = true;
+#endif
+       return state;
+}
 
-       /*
-        * We're going to stay around until child3 exits, so lets close all fds
-        * other then the pipe fd we may have inherited from the caller.
-        */
-       while (true) {
-               fd = dup2(tfork_global->status_pipe[1], 0);
-               if (fd == -1) {
-                       if (errno == EINTR) {
-                               continue;
-                       }
-                       status = errno;
+static void tfork_global_free(void)
+{
+       struct tfork_state *state = NULL;
+#ifdef HAVE_PTHREAD
+       int ret;
+#endif
+
+#ifdef HAVE_PTHREAD
+       state = (struct tfork_state *)pthread_getspecific(tfork_global_key);
+#else
+       state = global_state;
+#endif
+       if (state == NULL) {
+               return;
+       }
 
-                       kill(tfork_global->level3_pid, SIGKILL);
+#ifdef HAVE_PTHREAD
+       ret = pthread_setspecific(tfork_global_key, NULL);
+       if (ret != 0) {
+               return;
+       }
+#endif
+       anonymous_shared_free(state);
+}
 
-                       written = sys_write(tfork_global->status_pipe[1],
-                                           &status, sizeof(status));
-                       if (written != sizeof(status)) {
-                               abort();
-                       }
-                       _exit(0);
-               }
-               break;
+/**
+ * Only one thread at a time is allowed to handle SIGCHLD signals
+ **/
+static int tfork_install_sigchld_handler(pid_t *pid)
+{
+       int ret;
+       struct sigaction act;
+       sigset_t set;
+
+       ret = tfork_acquire_sighandling();
+       if (ret != 0) {
+               return -1;
        }
-       closefrom(1);
 
-       while (wait) {
-               int ret = waitpid(tfork_global->level3_pid, &status, 0);
-               if (ret == -1) {
-                       if (errno == EINTR) {
-                               continue;
-                       }
-                       status = errno;
-               }
-               break;
+       assert(signal_state.pid == NULL);
+       signal_state.pid = pid;
+
+       act = (struct sigaction) {
+               .sa_sigaction = tfork_sigchld_handler,
+               .sa_flags = SA_SIGINFO,
+       };
+
+       ret = sigaction(SIGCHLD, &act, &signal_state.oldact);
+       if (ret != 0) {
+               return -1;
        }
 
-       written = sys_write(fd, &status, sizeof(status));
-       if (written != sizeof(status)) {
-               abort();
+       sigemptyset(&set);
+       sigaddset(&set, SIGCHLD);
+#ifdef HAVE_PTHREAD
+       ret = pthread_sigmask(SIG_UNBLOCK, &set, &signal_state.oldset);
+#else
+       ret = sigprocmask(SIG_UNBLOCK, &set, &signal_state.oldset);
+#endif
+       if (ret != 0) {
+               return -1;
        }
 
-       _exit(0);
+       return 0;
 }
 
-pid_t tfork(int *status_fd, pid_t *parent)
+static int tfork_uninstall_sigchld_handler(void)
 {
        int ret;
+
+       signal_state.pid = NULL;
+
+       ret = sigaction(SIGCHLD, &signal_state.oldact, NULL);
+       if (ret != 0) {
+               return -1;
+       }
+
+#ifdef HAVE_PTHREAD
+       ret = pthread_sigmask(SIG_SETMASK, &signal_state.oldset, NULL);
+#else
+       ret = sigprocmask(SIG_SETMASK, &signal_state.oldset, NULL);
+#endif
+       if (ret != 0) {
+               return -1;
+       }
+
+       ret = tfork_release_sighandling();
+       if (ret != 0) {
+               return -1;
+       }
+
+       return 0;
+}
+
+static void tfork_sigchld_handler(int signum, siginfo_t *si, void *p)
+{
+       if ((signal_state.pid != NULL) &&
+           (*signal_state.pid != -1) &&
+           (si->si_pid == *signal_state.pid))
+       {
+               return;
+       }
+
+       /*
+        * Not our child, forward to old handler
+        */
+       if (signal_state.oldact.sa_flags & SA_SIGINFO) {
+               signal_state.oldact.sa_sigaction(signum, si, p);
+               return;
+       }
+
+       if (signal_state.oldact.sa_handler == SIG_IGN) {
+               return;
+       }
+       if (signal_state.oldact.sa_handler == SIG_DFL) {
+               return;
+       }
+       signal_state.oldact.sa_handler(signum);
+}
+
+static pid_t tfork_start_waiter_and_worker(struct tfork_state *state,
+                                          int *_event_fd,
+                                          int *_status_fd)
+{
+       int p[2];
+       int status_sp_caller_fd = -1;
+       int status_sp_waiter_fd = -1;
+       int event_pipe_caller_fd = -1;
+       int event_pipe_waiter_fd = -1;
+       int ready_pipe_caller_fd = -1;
+       int ready_pipe_worker_fd = -1;
+       ssize_t nwritten;
+       ssize_t nread;
        pid_t pid;
-       pid_t child;
+       int status;
+       int fd;
+       char c;
+       int ret;
+
+       *_event_fd = -1;
+       *_status_fd = -1;
 
-       tfork_global = (struct tfork_state *)
-               anonymous_shared_allocate(sizeof(struct tfork_state));
-       if (tfork_global == NULL) {
+       if (state == NULL) {
                return -1;
        }
 
-       tfork_global->parent = parent;
-       tfork_global->status_pipe[0] = -1;
-       tfork_global->status_pipe[1] = -1;
+       ret = socketpair(AF_UNIX, SOCK_STREAM, 0, p);
+       if (ret != 0) {
+               return -1;
+       }
+       set_close_on_exec(p[0]);
+       set_close_on_exec(p[1]);
+       status_sp_caller_fd = p[0];
+       status_sp_waiter_fd = p[1];
+
+       ret = pipe(p);
+       if (ret != 0) {
+               close(status_sp_caller_fd);
+               close(status_sp_waiter_fd);
+               return -1;
+       }
+       set_close_on_exec(p[0]);
+       set_close_on_exec(p[1]);
+       event_pipe_caller_fd = p[0];
+       event_pipe_waiter_fd = p[1];
+
+
+       ret = pipe(p);
+       if (ret != 0) {
+               close(status_sp_caller_fd);
+               close(status_sp_waiter_fd);
+               close(event_pipe_caller_fd);
+               close(event_pipe_waiter_fd);
+               return -1;
+       }
+       set_close_on_exec(p[0]);
+       set_close_on_exec(p[1]);
+       ready_pipe_worker_fd = p[0];
+       ready_pipe_caller_fd = p[1];
 
-       tfork_global->level0_pid = getpid();
-       tfork_global->level0_status = -1;
-       tfork_global->level1_pid = -1;
-       tfork_global->level1_errno = ECANCELED;
-       tfork_global->level2_pid = -1;
-       tfork_global->level2_errno = ECANCELED;
-       tfork_global->level3_pid = -1;
+       pid = fork();
+       if (pid == -1) {
+               close(status_sp_caller_fd);
+               close(status_sp_waiter_fd);
+               close(event_pipe_caller_fd);
+               close(event_pipe_waiter_fd);
+               close(ready_pipe_caller_fd);
+               close(ready_pipe_worker_fd);
+               return -1;
+       }
+       if (pid != 0) {
+               /* The caller */
 
-       if (status_fd != NULL) {
-               ret = pipe(&tfork_global->status_pipe[0]);
-               if (ret != 0) {
-                       int saved_errno = errno;
+               state->waiter_pid = pid;
+
+               close(status_sp_waiter_fd);
+               close(event_pipe_waiter_fd);
+               close(ready_pipe_worker_fd);
+
+               set_blocking(event_pipe_caller_fd, false);
 
-                       anonymous_shared_free(tfork_global);
-                       tfork_global = NULL;
-                       errno = saved_errno;
+               /*
+                * wait for the waiter to get ready.
+                */
+               nread = sys_read(status_sp_caller_fd, &c, sizeof(char));
+               if (nread != sizeof(char)) {
                        return -1;
                }
 
-               *status_fd = tfork_global->status_pipe[0];
+               /*
+                * Notify the worker to start.
+                */
+               nwritten = sys_write(ready_pipe_caller_fd,
+                                    &(char){0}, sizeof(char));
+               if (nwritten != sizeof(char)) {
+                       close(ready_pipe_caller_fd);
+                       return -1;
+               }
+               close(ready_pipe_caller_fd);
+
+               *_event_fd = event_pipe_caller_fd;
+               *_status_fd = status_sp_caller_fd;
+
+               return pid;
        }
 
+#ifndef HAVE_PTHREAD
+       /* cleanup sigchld_handler */
+       tfork_atfork_child();
+#endif
+
        /*
-        * We need to set our own signal handler to prevent any existing signal
-        * handler from reaping our child.
+        * The "waiter" child.
         */
-       tfork_global->old_sig_chld = CatchSignal(SIGCHLD, tfork_sig_chld);
+       CatchSignal(SIGCHLD, SIG_DFL);
+
+       close(status_sp_caller_fd);
+       close(event_pipe_caller_fd);
+       close(ready_pipe_caller_fd);
 
        pid = fork();
+       if (pid == -1) {
+               state->waiter_errno = errno;
+               _exit(0);
+       }
        if (pid == 0) {
-               int level2_pipe[2];
-               char c;
-               ssize_t nread;
-
                /*
-                * Child level 1.
-                *
-                * Restore SIGCHLD handler
+                * The worker child.
                 */
-               CatchSignal(SIGCHLD, SIG_DFL);
 
-               /*
-                * Close read end of the signal pipe, we don't need it anymore
-                * and don't want to leak it into childs.
-                */
-               if (tfork_global->status_pipe[0] != -1) {
-                       close(tfork_global->status_pipe[0]);
-                       tfork_global->status_pipe[0] = -1;
-               }
+               close(status_sp_waiter_fd);
+               close(event_pipe_waiter_fd);
 
                /*
-                * Create a pipe for waiting for the child level 2 to finish
-                * forking.
+                * Wait for the caller to give us a go!
                 */
-               ret = pipe(&level2_pipe[0]);
-               if (ret != 0) {
-                       tfork_global->level1_errno = errno;
-                       _exit(0);
+               nread = sys_read(ready_pipe_worker_fd, &c, sizeof(char));
+               if (nread != sizeof(char)) {
+                       _exit(1);
                }
+               close(ready_pipe_worker_fd);
 
-               pid = fork();
-               if (pid == 0) {
+               return 0;
+       }
+       state->worker_pid = pid;
+
+       close(ready_pipe_worker_fd);
+
+       /*
+        * We're going to stay around until child2 exits, so lets close all fds
+        * other then the pipe fd we may have inherited from the caller.
+        *
+        * Dup event_sp_waiter_fd and status_sp_waiter_fd onto fds 0 and 1 so we
+        * can then call closefrom(2).
+        */
+       if (event_pipe_waiter_fd > 0) {
+               int dup_fd = 0;
 
-                       /*
-                        * Child level 2.
-                        */
+               if (status_sp_waiter_fd == 0) {
+                       dup_fd = 1;
+               }
 
-                       close(level2_pipe[0]);
-                       return level2_fork_and_wait(level2_pipe[1]);
+               do {
+                       fd = dup2(event_pipe_waiter_fd, dup_fd);
+               } while ((fd == -1) && (errno == EINTR));
+               if (fd == -1) {
+                       state->waiter_errno = errno;
+                       kill(state->worker_pid, SIGKILL);
+                       state->worker_pid = -1;
+                       _exit(1);
                }
+               event_pipe_waiter_fd = fd;
+       }
 
-               tfork_global->level2_pid = pid;
-               if (tfork_global->level2_pid == -1) {
-                       tfork_global->level1_errno = errno;
-                       _exit(0);
+       if (status_sp_waiter_fd > 1) {
+               do {
+                       fd = dup2(status_sp_waiter_fd, 1);
+               } while ((fd == -1) && (errno == EINTR));
+               if (fd == -1) {
+                       state->waiter_errno = errno;
+                       kill(state->worker_pid, SIGKILL);
+                       state->worker_pid = -1;
+                       _exit(1);
                }
+               status_sp_waiter_fd = fd;
+       }
 
-               close(level2_pipe[1]);
-               level2_pipe[1] = -1;
+       closefrom(2);
 
-               nread = sys_read(level2_pipe[0], &c, 1);
-               if (nread != 1) {
-                       abort();
+       /* Tell the caller we're ready */
+       nwritten = sys_write(status_sp_waiter_fd, &(char){0}, sizeof(char));
+       if (nwritten != sizeof(char)) {
+               _exit(1);
+       }
+
+       tfork_global_free();
+       state = NULL;
+
+       do {
+               ret = waitpid(pid, &status, 0);
+       } while ((ret == -1) && (errno == EINTR));
+       if (ret == -1) {
+               status = errno;
+               kill(pid, SIGKILL);
+       }
+
+       /*
+        * This writes the worker child exit status via our internal socketpair
+        * so the tfork_status() implementation can read it from its end.
+        */
+       nwritten = sys_write(status_sp_waiter_fd, &status, sizeof(status));
+       if (nwritten == -1) {
+               if (errno != EPIPE && errno != ECONNRESET) {
+                       _exit(errno);
                }
+               /*
+                * The caller exitted and didn't call tfork_status().
+                */
                _exit(0);
        }
+       if (nwritten != sizeof(status)) {
+               _exit(1);
+       }
 
-       tfork_global->level1_pid = pid;
-       if (tfork_global->level1_pid == -1) {
-               int saved_errno = errno;
-
-               anonymous_shared_free(tfork_global);
-               tfork_global = NULL;
-               errno = saved_errno;
-               return -1;
+       /*
+        * This write to the event_fd returned by tfork_event_fd() and notifies
+        * the caller that the worker child is done and he may now call
+        * tfork_status().
+        */
+       nwritten = sys_write(event_pipe_waiter_fd, &(char){0}, sizeof(char));
+       if (nwritten != sizeof(char)) {
+               _exit(1);
        }
 
        /*
-        * By using the helper variable pid we avoid a TOCTOU with the signal
-        * handler that will set tfork_global->level1_pid to -1 (which would
-        * cause waitpid() to block waiting for another exitted child).
+        * Wait for our parent (the process that called tfork_create()) to
+        * close() the socketpair fd in tfork_status().
         *
-        * We can't avoid the race waiting for pid twice (in the signal handler
-        * and then again here in the while loop), but we must avoid waiting for
-        * -1 and this does the trick.
+        * Again, the caller might have exitted without calling tfork_status().
         */
-       pid = tfork_global->level1_pid;
-
-       while (tfork_global->level1_pid != -1) {
-               ret = waitpid(pid, &tfork_global->level0_status, 0);
-               if (ret == -1 && errno == EINTR) {
-                       continue;
+       nread = sys_read(status_sp_waiter_fd, &c, 1);
+       if (nread == -1) {
+               if (errno == EPIPE || errno == ECONNRESET) {
+                       _exit(0);
                }
+               _exit(errno);
+       }
+       if (nread != 0) {
+               _exit(255);
+       }
+
+       _exit(0);
+}
 
-               break;
+static int tfork_create_reap_waiter(pid_t waiter_pid)
+{
+       pid_t pid;
+       int waiter_status;
+
+       if (waiter_pid == -1) {
+               return 0;
        }
 
-       CatchSignal(SIGCHLD, tfork_global->old_sig_chld);
+       kill(waiter_pid, SIGKILL);
 
-       if (tfork_global->level0_status != 0) {
-               anonymous_shared_free(tfork_global);
-               tfork_global = NULL;
-               errno = ECHILD;
-               return -1;
+       do {
+               pid = waitpid(waiter_pid, &waiter_status, 0);
+       } while ((pid == -1) && (errno == EINTR));
+       assert(pid == waiter_pid);
+
+       return 0;
+}
+
+struct tfork *tfork_create(void)
+{
+       struct tfork_state *state = NULL;
+       struct tfork *t = NULL;
+       pid_t pid;
+       int saved_errno;
+       int ret = 0;
+
+#ifdef HAVE_PTHREAD
+       ret = pthread_once(&tfork_global_is_initialized,
+                          tfork_global_initialize);
+       if (ret != 0) {
+               return NULL;
+       }
+#else
+       tfork_global_initialize();
+#endif
+
+       state = tfork_global_get();
+       if (state == NULL) {
+               return NULL;
+       }
+       *state = (struct tfork_state) {
+               .waiter_pid = -1,
+               .waiter_errno = ECANCELED,
+               .worker_pid = -1,
+       };
+
+       t = malloc(sizeof(struct tfork));
+       if (t == NULL) {
+               ret = -1;
+               goto cleanup;
        }
 
-       if (tfork_global->level2_pid == -1) {
-               int saved_errno = tfork_global->level1_errno;
+       *t = (struct tfork) {
+               .event_fd = -1,
+               .status_fd = -1,
+               .waiter_pid = -1,
+               .worker_pid = -1,
+       };
+
+       ret = tfork_install_sigchld_handler(&state->waiter_pid);
+       if (ret != 0) {
+               goto cleanup;
+       }
+
+       pid = tfork_start_waiter_and_worker(state,
+                                           &t->event_fd,
+                                           &t->status_fd);
+       if (pid == -1) {
+               ret = -1;
+               goto cleanup;
+       }
+       if (pid == 0) {
+               /* In the worker */
+               tfork_global_free();
+               t->worker_pid = 0;
+               return t;
+       }
+
+       t->waiter_pid = pid;
+       t->worker_pid = state->worker_pid;
+
+cleanup:
+       if (ret == -1) {
+               saved_errno = errno;
+
+               if (t != NULL) {
+                       if (t->status_fd != -1) {
+                               close(t->status_fd);
+                       }
+                       if (t->event_fd != -1) {
+                               close(t->event_fd);
+                       }
+
+                       ret = tfork_create_reap_waiter(state->waiter_pid);
+                       assert(ret == 0);
+
+                       free(t);
+                       t = NULL;
+               }
+       }
+
+       ret = tfork_uninstall_sigchld_handler();
+       assert(ret == 0);
+
+       tfork_global_free();
 
-               anonymous_shared_free(tfork_global);
-               tfork_global = NULL;
+       if (ret == -1) {
                errno = saved_errno;
+       }
+       return t;
+}
+
+pid_t tfork_child_pid(const struct tfork *t)
+{
+       return t->worker_pid;
+}
+
+int tfork_event_fd(const struct tfork *t)
+{
+       return t->event_fd;
+}
+
+int tfork_status(struct tfork **_t, bool wait)
+{
+       struct tfork *t = *_t;
+       int status;
+       ssize_t nread;
+       int waiter_status;
+       pid_t pid;
+       int ret;
+
+       if (t == NULL) {
                return -1;
        }
 
-       if (tfork_global->level3_pid == -1) {
-               int saved_errno = tfork_global->level2_errno;
+       if (wait) {
+               set_blocking(t->status_fd, true);
 
-               anonymous_shared_free(tfork_global);
-               tfork_global = NULL;
-               errno = saved_errno;
+               nread = sys_read(t->status_fd, &status, sizeof(int));
+       } else {
+               set_blocking(t->status_fd, false);
+
+               nread = read(t->status_fd, &status, sizeof(int));
+               if ((nread == -1) &&
+                   ((errno == EAGAIN) || (errno == EWOULDBLOCK) || errno == EINTR)) {
+                       errno = EAGAIN;
+                       return -1;
+               }
+       }
+       if (nread != sizeof(int)) {
                return -1;
        }
 
-       child = tfork_global->level3_pid;
-       anonymous_shared_free(tfork_global);
-       tfork_global = NULL;
+       ret = tfork_install_sigchld_handler(&t->waiter_pid);
+       if (ret != 0) {
+               return -1;
+       }
+
+       /*
+        * This triggers process exit in the waiter.
+        */
+       close(t->status_fd);
+
+       do {
+               pid = waitpid(t->waiter_pid, &waiter_status, 0);
+       } while ((pid == -1) && (errno == EINTR));
+       assert(pid == t->waiter_pid);
+
+       close(t->event_fd);
+
+       free(t);
+       t = NULL;
+       *_t = NULL;
+
+       ret = tfork_uninstall_sigchld_handler();
+       assert(ret == 0);
+
+       return status;
+}
+
+int tfork_destroy(struct tfork **_t)
+{
+        struct tfork *t = *_t;
+        int ret;
+
+        if (t == NULL) {
+                errno = EINVAL;
+                return -1;
+        }
+
+        kill(t->worker_pid, SIGKILL);
+
+        ret = tfork_status(_t, true);
+        if (ret == -1) {
+                return -1;
+        }
 
-       return child;
+        return 0;
 }
index 0c62fc35387d8041deca9ad25472359c0c5d27de..1fea2ba41295e03f16b19f9d54397575d7a29bf5 100644 (file)
 #ifndef LIB_UTIL_TFORK_H
 #define LIB_UTIL_TFORK_H
 
+struct tfork;
+
 /**
  * @brief a fork() that avoids SIGCHLD and waitpid
  *
- * This function is a workaround for the problem of using fork() in
- * library code. In that case the library should avoid to set a global
- * signal handler for SIGCHLD, because the application may wants to use its
- * own handler.
- *
- * The child process will start with SIGCHLD handler set to SIG_DFL, so the
- * child might need to setup its own handler.
- *
- * @param[out] status_fd  If this is not NULL, tfork creates a pipe and returns
- *                        the readable end via this pointer. The caller can
- *                        wait for the process to finish by polling the
- *                        status_fd for readability and can then read the exit
- *                        status (an int).
- *
- * @param[out] parent     The PID of the parent process, if 0 is returned
- *                        otherwise the variable will not be touched at all.
- *                        It is possible to pass NULL.
- *
- * @return                On success, the PID of the child process is returned
- *                        in the parent, and 0 is returned in the child. On
- *                        failure, -1 is returned in the parent, no child
- *                        process is created, and errno is set appropriately.
- */
-int tfork(int *status_fd, int *parent);
+ * This function is a solution to the problem of fork() requiring special
+ * preperations in the caller to handle SIGCHLD signals and to reap the child by
+ * wait()ing for it.
+ *
+ * The advantage over fork() is that the child process termination is signalled
+ * to the caller by making a pipe fd readable returned by tfork_event_fd(), in
+ * which case the exit status of the child can be fetched with tfork_status()
+ * without blocking.
+ *
+ * The child process will start with SIGCHLD handler set to SIG_DFL.
+ *
+ * @return                On success, a struct tfork. NULL on failure.
+ *                        Use tfork_worker_pid() to get the pid of the created
+ *                        child and tfork_event_fd() to get the file descriptor
+ *                        that can be used to poll for process termination and
+ *                        reading the child process exit status.
+ *
+ * @note There's one thing this thing can't protect us against and that is if a
+ * process installs a SIGCHLD handler from one thread while another thread is
+ * running inside tfork_create() or tfork_status() and the signal handler
+ * doesn't forward signals for exitted childs it didn't fork, ie our childs.
+ **/
+struct tfork *tfork_create(void);
+
+/**
+ * @brief Return the child pid from tfork_create()
+ *
+ * @param[in]   t    Pointer to struct tfork returned by tfork_create()
+ *
+ * @return           In the caller this returns the pid of the child,
+ *                   in the child this returns 0.
+ **/
+pid_t tfork_child_pid(const struct tfork *t);
+
+/**
+ * @brief Return an event fd that signals child termination
+ *
+ * @param[in]   t    Pointer to struct tfork returned by tfork_create()
+ *
+ * @return           An fd that becomes readable when the child created with
+ *                   tfork_create() terminates. It is guaranteed that a
+ *                   subsequent call to tfork_status() will not block and return
+ *                   the exit status of the child.
+ **/
+int tfork_event_fd(const struct tfork *t);
+
+/**
+ * @brief Wait for the child to terminate and return its exit status
+ *
+ * @param[in]   t     Pointer-pointer to a struct tfork returned by
+ *                    tfork_create(). Upon successful completion t is freed and
+ *                    set to NULL.
+ *
+ * @param[in]   wait  Whether to wait for the child to change state. If wait is
+ *                    false, and the child hasn't changed state, tfork_status()
+ *                    will return -1 with errno set to EAGAIN. If wait is true,
+ *                    tfork_status() will block waiting for the child to change
+ *                    runstate.
+ *
+ * @return            The exit status of the child, -1 on error.
+ *
+ * @note We overload the return value a bit, but a process exit status is pretty
+ * much guaranteed to be a 16-bit int and can't be -1.
+ **/
+int tfork_status(struct tfork **_t, bool wait);
+
+/**
+ * @brief Terminate the child discarding the exit status
+ *
+ * @param[in]   t     Pointer-pointer to a struct tfork returned by
+ *                    tfork_create(). Upon successful completion t is freed and
+ *                    set to NULL.
+ *
+ * @return            0 on success, -1 on error.
+ **/
+int tfork_destroy(struct tfork **_t);
 
 #endif /* LIB_UTIL_TFORK_H */