From 9fea542d0acd222aab0070827a0de453c13edaab Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Mon, 10 Jan 2011 16:51:56 +1100 Subject: [PATCH] recoverd: avoid triggering a full recovery if just some ip allocation has failed. We dont need to rebuild the databases in this situation, we just need to try again to sort out the ip address allocations. (cherry picked from commit 044c398ffea23d36ee033c8ddf07d11028197346) --- server/ctdb_recoverd.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/server/ctdb_recoverd.c b/server/ctdb_recoverd.c index 45d387d6..75e839cc 100644 --- a/server/ctdb_recoverd.c +++ b/server/ctdb_recoverd.c @@ -1616,13 +1616,14 @@ static int do_recovery(struct ctdb_recoverd *rec, if (ret != 0) { DEBUG(DEBUG_ERR,("Failed to read public ips from remote node %d\n", culprit)); + rec->need_takeover_run = true; return -1; } rec->need_takeover_run = false; ret = ctdb_takeover_run(ctdb, nodemap); if (ret != 0) { - DEBUG(DEBUG_ERR, (__location__ " Unable to setup public takeover addresses\n")); - return -1; + DEBUG(DEBUG_ERR, (__location__ " Unable to setup public takeover addresses. ctdb_takeover_run() failed.\n")); + rec->need_takeover_run = true; } DEBUG(DEBUG_NOTICE, (__location__ " Recovery - takeip finished\n")); @@ -2037,8 +2038,7 @@ static void process_ipreallocate_requests(struct ctdb_context *ctdb, struct ctdb if (ret == 0) { ret = ctdb_takeover_run(ctdb, rec->nodemap); if (ret != 0) { - DEBUG(DEBUG_ERR,("Failed to read public ips from remote node %d\n", - culprit)); + DEBUG(DEBUG_ERR,("Failed to reallocate addresses: ctdb_takeover_run() failed.\n")); rec->need_takeover_run = true; } } @@ -3391,8 +3391,7 @@ static void main_loop(struct ctdb_context *ctdb, struct ctdb_recoverd *rec, if (ret != 0) { DEBUG(DEBUG_ERR,("Failed to read public ips from remote node %d\n", culprit)); - ctdb_set_culprit(rec, culprit); - do_recovery(rec, mem_ctx, pnn, nodemap, vnnmap); + rec->need_takeover_run = true; return; } @@ -3407,9 +3406,7 @@ static void main_loop(struct ctdb_context *ctdb, struct ctdb_recoverd *rec, ret = ctdb_takeover_run(ctdb, nodemap); if (ret != 0) { - DEBUG(DEBUG_ERR, (__location__ " Unable to setup public takeover addresses - starting recovery\n")); - ctdb_set_culprit(rec, ctdb->pnn); - do_recovery(rec, mem_ctx, pnn, nodemap, vnnmap); + DEBUG(DEBUG_ERR, (__location__ " Unable to setup public takeover addresses. Try again later\n")); return; } -- 2.34.1