recoverd: avoid triggering a full recovery if just some ip allocation
authorRonnie Sahlberg <ronniesahlberg@gmail.com>
Mon, 10 Jan 2011 05:51:56 +0000 (16:51 +1100)
committerRonnie Sahlberg <ronniesahlberg@gmail.com>
Mon, 10 Jan 2011 20:39:56 +0000 (07:39 +1100)
has failed.
We dont need to rebuild the databases in this situation, we just
need to try again to sort out the ip address allocations.

server/ctdb_recoverd.c

index 63ef31c616c6b9078351c235aa33aa86b3024c46..5199fb4d8abaf2d74b9722213911b5f18c70d334 100644 (file)
@@ -1683,13 +1683,14 @@ static int do_recovery(struct ctdb_recoverd *rec,
        if (ret != 0) {
                DEBUG(DEBUG_ERR,("Failed to read public ips from remote node %d\n",
                                 culprit));
+               rec->need_takeover_run = true;
                return -1;
        }
        rec->need_takeover_run = false;
        ret = ctdb_takeover_run(ctdb, nodemap);
        if (ret != 0) {
-               DEBUG(DEBUG_ERR, (__location__ " Unable to setup public takeover addresses\n"));
-               return -1;
+               DEBUG(DEBUG_ERR, (__location__ " Unable to setup public takeover addresses. ctdb_takeover_run() failed.\n"));
+               rec->need_takeover_run = true;
        }
        DEBUG(DEBUG_NOTICE, (__location__ " Recovery - takeip finished\n"));
 
@@ -2124,8 +2125,7 @@ static void process_ipreallocate_requests(struct ctdb_context *ctdb, struct ctdb
        if (ret == 0) {
                ret = ctdb_takeover_run(ctdb, rec->nodemap);
                if (ret != 0) {
-                       DEBUG(DEBUG_ERR,("Failed to read public ips from remote node %d\n",
-                                        culprit));
+                       DEBUG(DEBUG_ERR,("Failed to reallocate addresses: ctdb_takeover_run() failed.\n"));
                        rec->need_takeover_run = true;
                }
        }
@@ -3350,8 +3350,7 @@ static void main_loop(struct ctdb_context *ctdb, struct ctdb_recoverd *rec,
                if (ret != 0) {
                        DEBUG(DEBUG_ERR,("Failed to read public ips from remote node %d\n",
                                         culprit));
-                       ctdb_set_culprit(rec, culprit);
-                       do_recovery(rec, mem_ctx, pnn, nodemap, vnnmap);
+                       rec->need_takeover_run = true;
                        return;
                }
 
@@ -3366,9 +3365,7 @@ static void main_loop(struct ctdb_context *ctdb, struct ctdb_recoverd *rec,
 
                ret = ctdb_takeover_run(ctdb, nodemap);
                if (ret != 0) {
-                       DEBUG(DEBUG_ERR, (__location__ " Unable to setup public takeover addresses - starting recovery\n"));
-                       ctdb_set_culprit(rec, ctdb->pnn);
-                       do_recovery(rec, mem_ctx, pnn, nodemap, vnnmap);
+                       DEBUG(DEBUG_ERR, (__location__ " Unable to setup public takeover addresses. Try again later\n"));
                        return;
                }