recoverd: avoid triggering a full recovery if just some ip allocation
authorRonnie Sahlberg <ronniesahlberg@gmail.com>
Mon, 10 Jan 2011 05:51:56 +0000 (16:51 +1100)
committerMichael Adam <obnox@samba.org>
Fri, 6 Jun 2014 13:00:40 +0000 (15:00 +0200)
has failed.
We dont need to rebuild the databases in this situation, we just
need to try again to sort out the ip address allocations.

(cherry picked from commit 044c398ffea23d36ee033c8ddf07d11028197346)

server/ctdb_recoverd.c

index 45d387d6e4e0d1c12fc6a064e2191f3044de96bb..75e839cc4dc79df689e6aa1544eb0725d9448e27 100644 (file)
@@ -1616,13 +1616,14 @@ static int do_recovery(struct ctdb_recoverd *rec,
        if (ret != 0) {
                DEBUG(DEBUG_ERR,("Failed to read public ips from remote node %d\n",
                                 culprit));
+               rec->need_takeover_run = true;
                return -1;
        }
        rec->need_takeover_run = false;
        ret = ctdb_takeover_run(ctdb, nodemap);
        if (ret != 0) {
-               DEBUG(DEBUG_ERR, (__location__ " Unable to setup public takeover addresses\n"));
-               return -1;
+               DEBUG(DEBUG_ERR, (__location__ " Unable to setup public takeover addresses. ctdb_takeover_run() failed.\n"));
+               rec->need_takeover_run = true;
        }
        DEBUG(DEBUG_NOTICE, (__location__ " Recovery - takeip finished\n"));
 
@@ -2037,8 +2038,7 @@ static void process_ipreallocate_requests(struct ctdb_context *ctdb, struct ctdb
        if (ret == 0) {
                ret = ctdb_takeover_run(ctdb, rec->nodemap);
                if (ret != 0) {
-                       DEBUG(DEBUG_ERR,("Failed to read public ips from remote node %d\n",
-                                        culprit));
+                       DEBUG(DEBUG_ERR,("Failed to reallocate addresses: ctdb_takeover_run() failed.\n"));
                        rec->need_takeover_run = true;
                }
        }
@@ -3391,8 +3391,7 @@ static void main_loop(struct ctdb_context *ctdb, struct ctdb_recoverd *rec,
                if (ret != 0) {
                        DEBUG(DEBUG_ERR,("Failed to read public ips from remote node %d\n",
                                         culprit));
-                       ctdb_set_culprit(rec, culprit);
-                       do_recovery(rec, mem_ctx, pnn, nodemap, vnnmap);
+                       rec->need_takeover_run = true;
                        return;
                }
 
@@ -3407,9 +3406,7 @@ static void main_loop(struct ctdb_context *ctdb, struct ctdb_recoverd *rec,
 
                ret = ctdb_takeover_run(ctdb, nodemap);
                if (ret != 0) {
-                       DEBUG(DEBUG_ERR, (__location__ " Unable to setup public takeover addresses - starting recovery\n"));
-                       ctdb_set_culprit(rec, ctdb->pnn);
-                       do_recovery(rec, mem_ctx, pnn, nodemap, vnnmap);
+                       DEBUG(DEBUG_ERR, (__location__ " Unable to setup public takeover addresses. Try again later\n"));
                        return;
                }