ctdb-recoverd: If obtaining recovery lock fails, try again

author Amitay Isaacs <amitay@gmail.com>

Thu, 25 Sep 2014 07:17:04 +0000 (17:17 +1000)

committer Martin Schwenke <martins@samba.org>

Thu, 25 Sep 2014 12:48:11 +0000 (14:48 +0200)
author Amitay Isaacs <amitay@gmail.com>
Thu, 25 Sep 2014 07:17:04 +0000 (17:17 +1000)
committer Martin Schwenke <martins@samba.org>
Thu, 25 Sep 2014 12:48:11 +0000 (14:48 +0200)
diff --git a/ctdb/server/ctdb_recoverd.c b/ctdb/server/ctdb_recoverd.c

index 14e6ea85ad567b230984564cf6a0e1335b858379..945b01c4e92d3120ce0138f9dd12eb9419bd1ece 100644 (file)
--- a/ctdb/server/ctdb_recoverd.c
+++ b/ctdb/server/ctdb_recoverd.c
@@ -1815,6 +1815,16 @@ static int do_recovery(struct ctdb_recoverd *rec,
                 DEBUG(DEBUG_ERR,("Taking out recovery lock from recovery daemon\n"));
                 start_time = timeval_current();
                 if (!ctdb_recovery_lock(ctdb, true)) {
+                       if (ctdb->runstate == CTDB_RUNSTATE_FIRST_RECOVERY) {
+                               /* If ctdb is trying first recovery, it's
+                                * possible that current node does not know yet
+                                * who the recmaster is.
+                                */
+                               DEBUG(DEBUG_ERR, ("Unable to get recovery lock"
+                                               " - retrying recovery\n"));
+                               return -1;
+                       }
+
                         DEBUG(DEBUG_ERR,("Unable to get recovery lock - aborting recovery "
                                          "and ban ourself for %u seconds\n",
                                          ctdb->tunable.recovery_ban_period));
@@ -3593,6 +3603,14 @@ static void main_loop(struct ctdb_context *ctdb, struct ctdb_recoverd *rec,
                 return;
         }
  
+       /* get runstate */
+       ret = ctdb_ctrl_get_runstate(ctdb, CONTROL_TIMEOUT(),
+                                    CTDB_CURRENT_NODE, &ctdb->runstate);
+       if (ret != 0) {
+               DEBUG(DEBUG_ERR, ("Failed to get runstate - retrying\n"));
+               return;
+       }
+
         /* get the current recovery lock file from the server */
         if (update_recovery_lock_file(ctdb) != 0) {
                 DEBUG(DEBUG_ERR,("Failed to update the recovery lock file\n"));
author	Amitay Isaacs <amitay@gmail.com>
	Thu, 25 Sep 2014 07:17:04 +0000 (17:17 +1000)
committer	Martin Schwenke <martins@samba.org>
	Thu, 25 Sep 2014 12:48:11 +0000 (14:48 +0200)