ctdb-recoverd: Abort recovery/takeover if recmaster changes
authorAmitay Isaacs <amitay@gmail.com>
Fri, 8 Sep 2017 01:24:27 +0000 (11:24 +1000)
committerMartin Schwenke <martins@samba.org>
Tue, 12 Sep 2017 10:23:19 +0000 (12:23 +0200)
Recovery and takeover are run via helper from recovery daemon.  While the
helpers are running, it's possible for the current node to lose election.
If that happens, abort the currently running recovery/takeover helper.

Signed-off-by: Amitay Isaacs <amitay@gmail.com>
Reviewed-by: Martin Schwenke <martin@meltin.net>
ctdb/server/ctdb_recoverd.c

index 9488bc2c8428309ac83603c1f746e1e25f28aaba..2b94fed7478d5736401d79f79ccf2b338c8826c9 100644 (file)
@@ -1026,6 +1026,7 @@ static int helper_run(struct ctdb_recoverd *rec, TALLOC_CTX *mem_ctx,
        struct tevent_fd *fde;
        const char **args;
        int nargs, ret;
+       uint32_t recmaster = rec->recmaster;
 
        state = talloc_zero(mem_ctx, struct helper_state);
        if (state == NULL) {
@@ -1085,6 +1086,14 @@ static int helper_run(struct ctdb_recoverd *rec, TALLOC_CTX *mem_ctx,
 
        while (!state->done) {
                tevent_loop_once(rec->ctdb->ev);
+
+               /* If recmaster changes, we have lost election */
+               if (recmaster != rec->recmaster) {
+                       D_ERR("Recmaster changed to %u, aborting %s\n",
+                             rec->recmaster, type);
+                       state->result = 1;
+                       break;
+               }
        }
 
        close(state->fd[0]);