recoverd: avoid triggering a full recovery if just some ip allocation
authorRonnie Sahlberg <ronniesahlberg@gmail.com>
Mon, 10 Jan 2011 05:51:56 +0000 (16:51 +1100)
committerRonnie Sahlberg <ronniesahlberg@gmail.com>
Mon, 10 Jan 2011 05:51:56 +0000 (16:51 +1100)
has failed.
We dont need to rebuild the databases in this situation, we just
need to try again to sort out the ip address allocations.

server/ctdb_recoverd.c

index b2c08ced0c5bc3b5a2cf520f79e47d9d7e524a79..b82f0e77f5418e8b36d63c64631df41c406dffec 100644 (file)
@@ -1624,13 +1624,14 @@ static int do_recovery(struct ctdb_recoverd *rec,
        if (ret != 0) {
                DEBUG(DEBUG_ERR,("Failed to read public ips from remote node %d\n",
                                 culprit));
+               rec->need_takeover_run = true;
                return -1;
        }
        rec->need_takeover_run = false;
        ret = ctdb_takeover_run(ctdb, nodemap);
        if (ret != 0) {
-               DEBUG(DEBUG_ERR, (__location__ " Unable to setup public takeover addresses\n"));
-               return -1;
+               DEBUG(DEBUG_ERR, (__location__ " Unable to setup public takeover addresses. ctdb_takeover_run() failed.\n"));
+               rec->need_takeover_run = true;
        }
        DEBUG(DEBUG_NOTICE, (__location__ " Recovery - takeip finished\n"));
 
@@ -2045,8 +2046,7 @@ static void process_ipreallocate_requests(struct ctdb_context *ctdb, struct ctdb
        if (ret == 0) {
                ret = ctdb_takeover_run(ctdb, rec->nodemap);
                if (ret != 0) {
-                       DEBUG(DEBUG_ERR,("Failed to read public ips from remote node %d\n",
-                                        culprit));
+                       DEBUG(DEBUG_ERR,("Failed to reallocate addresses: ctdb_takeover_run() failed.\n"));
                        rec->need_takeover_run = true;
                }
        }
@@ -3397,8 +3397,7 @@ static void main_loop(struct ctdb_context *ctdb, struct ctdb_recoverd *rec,
                if (ret != 0) {
                        DEBUG(DEBUG_ERR,("Failed to read public ips from remote node %d\n",
                                         culprit));
-                       ctdb_set_culprit(rec, culprit);
-                       do_recovery(rec, mem_ctx, pnn, nodemap, vnnmap);
+                       rec->need_takeover_run = true;
                        return;
                }
 
@@ -3413,9 +3412,7 @@ static void main_loop(struct ctdb_context *ctdb, struct ctdb_recoverd *rec,
 
                ret = ctdb_takeover_run(ctdb, nodemap);
                if (ret != 0) {
-                       DEBUG(DEBUG_ERR, (__location__ " Unable to setup public takeover addresses - starting recovery\n"));
-                       ctdb_set_culprit(rec, ctdb->pnn);
-                       do_recovery(rec, mem_ctx, pnn, nodemap, vnnmap);
+                       DEBUG(DEBUG_ERR, (__location__ " Unable to setup public takeover addresses. Try again later\n"));
                        return;
                }