ctdb-daemon: Wait for eventd to be ready before connecting
authorMartin Schwenke <martin@meltin.net>
Mon, 27 Aug 2018 04:47:38 +0000 (14:47 +1000)
committerAmitay Isaacs <amitay@samba.org>
Thu, 30 Aug 2018 02:48:56 +0000 (04:48 +0200)
The current method of retrying the connection to eventd means that
messages get logged for each failure.

Instead, pass a pipe file descriptor to eventd and wait for it to
write 0 to the pipe to indicate that it is ready to accept client
connections.

BUG: https://bugzilla.samba.org/show_bug.cgi?id=13592

Signed-off-by: Martin Schwenke <martin@meltin.net>
Reviewed-by: Amitay Isaacs <amitay@gmail.com>
ctdb/server/eventscript.c

index cf4f7f6860bbb20253fedb2344f4f837274f7020..30916a07ed5630f248e339272fb9a1b8c31d0bf1 100644 (file)
@@ -110,6 +110,100 @@ static bool eventd_context_init(TALLOC_CTX *mem_ctx,
        return true;
 }
 
+struct eventd_startup_state {
+       bool done;
+       int ret;
+       int fd;
+};
+
+static void eventd_startup_timeout_handler(struct tevent_context *ev,
+                                          struct tevent_timer *te,
+                                          struct timeval t,
+                                          void *private_data)
+{
+       struct eventd_startup_state *state =
+               (struct eventd_startup_state *) private_data;
+
+       state->done = true;
+       state->ret = ETIMEDOUT;
+}
+
+static void eventd_startup_handler(struct tevent_context *ev,
+                                  struct tevent_fd *fde, uint16_t flags,
+                                  void *private_data)
+{
+       struct eventd_startup_state *state =
+               (struct eventd_startup_state *)private_data;
+       unsigned int data;
+       ssize_t num_read;
+
+       num_read = sys_read(state->fd, &data, sizeof(data));
+       if (num_read == sizeof(data)) {
+               if (data == 0) {
+                       state->ret = 0;
+               } else {
+                       state->ret = EIO;
+               }
+       } else if (num_read == 0) {
+               state->ret = EPIPE;
+       } else if (num_read == -1) {
+               state->ret = errno;
+       } else {
+               state->ret = EINVAL;
+       }
+
+       state->done = true;
+}
+
+
+static int wait_for_daemon_startup(struct tevent_context *ev,
+                                  int fd)
+{
+       TALLOC_CTX *mem_ctx;
+       struct tevent_timer *timer;
+       struct tevent_fd *fde;
+       struct eventd_startup_state state = {
+               .done = false,
+               .ret = 0,
+               .fd = fd,
+       };
+
+       mem_ctx = talloc_new(ev);
+       if (mem_ctx == NULL) {
+               return ENOMEM;
+       }
+
+       timer = tevent_add_timer(ev,
+                                mem_ctx,
+                                tevent_timeval_current_ofs(10, 0),
+                                eventd_startup_timeout_handler,
+                                &state);
+       if (timer == NULL) {
+               talloc_free(mem_ctx);
+               return ENOMEM;
+       }
+
+       fde = tevent_add_fd(ev,
+                           mem_ctx,
+                           fd,
+                           TEVENT_FD_READ,
+                           eventd_startup_handler,
+                           &state);
+       if (fde == NULL) {
+               talloc_free(mem_ctx);
+               return ENOMEM;
+       }
+
+       while (! state.done) {
+               tevent_loop_once(ev);
+       }
+
+       talloc_free(mem_ctx);
+
+       return state.ret;
+}
+
+
 /*
  * Start and stop event daemon
  */
@@ -148,7 +242,7 @@ int ctdb_start_eventd(struct ctdb_context *ctdb)
                return -1;
        }
 
-       argv = talloc_array(ectx, const char *, 4);
+       argv = talloc_array(ectx, const char *, 6);
        if (argv == NULL) {
                close(fd[0]);
                close(fd[1]);
@@ -158,9 +252,11 @@ int ctdb_start_eventd(struct ctdb_context *ctdb)
        argv[0] = ectx->path;
        argv[1] = "-P";
        argv[2] = talloc_asprintf(argv, "%d", ctdb->ctdbd_pid);
-       argv[3] = NULL;
+       argv[3] = "-S";
+       argv[4] = talloc_asprintf(argv, "%d", fd[1]);
+       argv[5] = NULL;
 
-       if (argv[2] == NULL) {
+       if (argv[2] == NULL || argv[4] == NULL) {
                close(fd[0]);
                close(fd[1]);
                talloc_free(argv);
@@ -190,6 +286,14 @@ int ctdb_start_eventd(struct ctdb_context *ctdb)
        talloc_free(argv);
        close(fd[1]);
 
+       ret = wait_for_daemon_startup(ctdb->ev, fd[0]);
+       if (ret != 0) {
+               ctdb_kill(ctdb, pid, SIGKILL);
+               close(fd[0]);
+               D_ERR("Failed to initialize event daemon (%d)\n", ret);
+               return -1;
+       }
+
        ectx->eventd_fde = tevent_add_fd(ctdb->ev, ectx, fd[0],
                                         TEVENT_FD_READ,
                                         eventd_dead_handler, ectx);