ctdb-recovery: Factor out existing database recovery code
authorAmitay Isaacs <amitay@gmail.com>
Thu, 17 Sep 2015 06:00:47 +0000 (16:00 +1000)
committerAmitay Isaacs <amitay@samba.org>
Wed, 7 Oct 2015 12:53:29 +0000 (14:53 +0200)
Signed-off-by: Amitay Isaacs <amitay@gmail.com>
Reviewed-by: Martin Schwenke <martin@meltin.net>
ctdb/server/ctdb_recoverd.c

index ac920d57d642357bcedefdd28997cfb803167421..4527e010ec19e12641a5023f40b20ef2257ed9ef 100644 (file)
@@ -1799,129 +1799,29 @@ done:
        return ok;
 }
 
-
-/*
-  we are the recmaster, and recovery is needed - start a recovery run
- */
-static int do_recovery(struct ctdb_recoverd *rec, 
-                      TALLOC_CTX *mem_ctx, uint32_t pnn,
-                      struct ctdb_node_map *nodemap, struct ctdb_vnn_map *vnnmap)
+static int db_recovery_serial(struct ctdb_recoverd *rec, TALLOC_CTX *mem_ctx,
+                             uint32_t pnn, struct ctdb_node_map *nodemap,
+                             struct ctdb_vnn_map *vnnmap,
+                             struct ctdb_dbid_map *dbmap)
 {
        struct ctdb_context *ctdb = rec->ctdb;
-       int i, j, ret;
        uint32_t generation;
-       struct ctdb_dbid_map *dbmap;
        TDB_DATA data;
        uint32_t *nodes;
-       struct timeval start_time;
-       uint32_t culprit = (uint32_t)-1;
-       bool self_ban;
-
-       DEBUG(DEBUG_NOTICE, (__location__ " Starting do_recovery\n"));
-
-       /* if recovery fails, force it again */
-       rec->need_recovery = true;
-
-       if (!ctdb_op_begin(rec->recovery)) {
-               return -1;
-       }
-
-       if (rec->election_timeout) {
-               /* an election is in progress */
-               DEBUG(DEBUG_ERR, ("do_recovery called while election in progress - try again later\n"));
-               goto fail;
-       }
-
-       ban_misbehaving_nodes(rec, &self_ban);
-       if (self_ban) {
-               DEBUG(DEBUG_NOTICE, ("This node was banned, aborting recovery\n"));
-               goto fail;
-       }
-
-        if (ctdb->recovery_lock_file != NULL) {
-               if (ctdb_recovery_have_lock(ctdb)) {
-                       DEBUG(DEBUG_NOTICE, ("Already holding recovery lock\n"));
-               } else {
-                       start_time = timeval_current();
-                       DEBUG(DEBUG_NOTICE, ("Attempting to take recovery lock (%s)\n",
-                                            ctdb->recovery_lock_file));
-                       if (!ctdb_recovery_lock(ctdb)) {
-                               if (ctdb->runstate == CTDB_RUNSTATE_FIRST_RECOVERY) {
-                                       /* If ctdb is trying first recovery, it's
-                                        * possible that current node does not know
-                                        * yet who the recmaster is.
-                                        */
-                                       DEBUG(DEBUG_ERR, ("Unable to get recovery lock"
-                                                         " - retrying recovery\n"));
-                                       goto fail;
-                               }
-
-                               DEBUG(DEBUG_ERR,("Unable to get recovery lock - aborting recovery "
-                                                "and ban ourself for %u seconds\n",
-                                                ctdb->tunable.recovery_ban_period));
-                               ctdb_ban_node(rec, pnn, ctdb->tunable.recovery_ban_period);
-                               goto fail;
-                       }
-                       ctdb_ctrl_report_recd_lock_latency(ctdb,
-                                                          CONTROL_TIMEOUT(),
-                                                          timeval_elapsed(&start_time));
-                       DEBUG(DEBUG_NOTICE,
-                             ("Recovery lock taken successfully by recovery daemon\n"));
-               }
-       }
-
-       DEBUG(DEBUG_NOTICE, (__location__ " Recovery initiated due to problem with node %u\n", rec->last_culprit_node));
-
-       /* get a list of all databases */
-       ret = ctdb_ctrl_getdbmap(ctdb, CONTROL_TIMEOUT(), pnn, mem_ctx, &dbmap);
-       if (ret != 0) {
-               DEBUG(DEBUG_ERR, (__location__ " Unable to get dbids from node :%u\n", pnn));
-               goto fail;
-       }
-
-       /* we do the db creation before we set the recovery mode, so the freeze happens
-          on all databases we will be dealing with. */
-
-       /* verify that we have all the databases any other node has */
-       ret = create_missing_local_databases(ctdb, nodemap, pnn, &dbmap, mem_ctx);
-       if (ret != 0) {
-               DEBUG(DEBUG_ERR, (__location__ " Unable to create missing local databases\n"));
-               goto fail;
-       }
-
-       /* verify that all other nodes have all our databases */
-       ret = create_missing_remote_databases(ctdb, nodemap, pnn, dbmap, mem_ctx);
-       if (ret != 0) {
-               DEBUG(DEBUG_ERR, (__location__ " Unable to create missing remote databases\n"));
-               goto fail;
-       }
-       DEBUG(DEBUG_NOTICE, (__location__ " Recovery - created remote databases\n"));
-
-       /* update the database priority for all remote databases */
-       ret = update_db_priority_on_remote_nodes(ctdb, nodemap, pnn, dbmap, mem_ctx);
-       if (ret != 0) {
-               DEBUG(DEBUG_ERR, (__location__ " Unable to set db priority on remote nodes\n"));
-       }
-       DEBUG(DEBUG_NOTICE, (__location__ " Recovery - updated db priority for all databases\n"));
-
-
-       /* update all other nodes to use the same setting for reclock files
-          as the local recovery master.
-       */
-       sync_recovery_lock_file_across_cluster(rec);
+       int ret, i, j;
 
        /* set recovery mode to active on all nodes */
        ret = set_recovery_mode(ctdb, rec, nodemap, CTDB_RECOVERY_ACTIVE);
        if (ret != 0) {
                DEBUG(DEBUG_ERR, (__location__ " Unable to set recovery mode to active on cluster\n"));
-               goto fail;
+               return -1;
        }
 
        /* execute the "startrecovery" event script on all nodes */
        ret = run_startrecovery_eventscript(rec, nodemap);
        if (ret!=0) {
                DEBUG(DEBUG_ERR, (__location__ " Unable to run the 'startrecovery' event on cluster\n"));
-               goto fail;
+               return -1;
        }
 
        /*
@@ -1938,7 +1838,7 @@ static int do_recovery(struct ctdb_recoverd *rec,
                                DEBUG(DEBUG_WARNING, (__location__ "Unable to update flags on inactive node %d\n", i));
                        } else {
                                DEBUG(DEBUG_ERR, (__location__ " Unable to update flags on all nodes for node %d\n", i));
-                               goto fail;
+                               return -1;
                        }
                }
        }
@@ -1962,7 +1862,7 @@ static int do_recovery(struct ctdb_recoverd *rec,
        ret = ctdb_ctrl_setvnnmap(ctdb, CONTROL_TIMEOUT(), pnn, mem_ctx, vnnmap);
        if (ret != 0) {
                DEBUG(DEBUG_ERR, (__location__ " Unable to set vnnmap for node %u\n", pnn));
-               goto fail;
+               return -1;
        }
 
        /* Database generations are updated when the transaction is commited to
@@ -1990,7 +1890,7 @@ static int do_recovery(struct ctdb_recoverd *rec,
                                        NULL) != 0) {
                        DEBUG(DEBUG_ERR,("Failed to cancel recovery transaction\n"));
                }
-               goto fail;
+               return -1;
        }
 
        DEBUG(DEBUG_NOTICE,(__location__ " started transactions on all nodes\n"));
@@ -2002,7 +1902,7 @@ static int do_recovery(struct ctdb_recoverd *rec,
                                       pnn, nodemap, generation);
                if (ret != 0) {
                        DEBUG(DEBUG_ERR, (__location__ " Failed to recover database 0x%x\n", dbmap->dbs[i].dbid));
-                       goto fail;
+                       return -1;
                }
        }
 
@@ -2015,17 +1915,16 @@ static int do_recovery(struct ctdb_recoverd *rec,
                                        NULL, NULL,
                                        NULL) != 0) {
                DEBUG(DEBUG_ERR, (__location__ " Unable to commit recovery changes. Recovery failed.\n"));
-               goto fail;
+               return -1;
        }
 
        DEBUG(DEBUG_NOTICE, (__location__ " Recovery - committed databases\n"));
-       
 
        /* update the capabilities for all nodes */
        ret = update_capabilities(rec, nodemap);
        if (ret!=0) {
                DEBUG(DEBUG_ERR, (__location__ " Unable to update node capabilities.\n"));
-               goto fail;
+               return -1;
        }
 
        /* build a new vnn map with all the currently active and
@@ -2060,13 +1959,13 @@ static int do_recovery(struct ctdb_recoverd *rec,
                vnnmap->map = talloc_realloc(vnnmap, vnnmap->map, uint32_t, vnnmap->size);
                CTDB_NO_MEMORY(ctdb, vnnmap->map);
                vnnmap->map[0] = pnn;
-       }       
+       }
 
        /* update to the new vnnmap on all nodes */
        ret = update_vnnmap_on_all_nodes(ctdb, nodemap, pnn, vnnmap, mem_ctx);
        if (ret != 0) {
                DEBUG(DEBUG_ERR, (__location__ " Unable to update vnnmap on all nodes\n"));
-               goto fail;
+               return -1;
        }
 
        DEBUG(DEBUG_NOTICE, (__location__ " Recovery - updated vnnmap\n"));
@@ -2075,7 +1974,7 @@ static int do_recovery(struct ctdb_recoverd *rec,
        ret = set_recovery_master(ctdb, nodemap, pnn);
        if (ret!=0) {
                DEBUG(DEBUG_ERR, (__location__ " Unable to set recovery master\n"));
-               goto fail;
+               return -1;
        }
 
        DEBUG(DEBUG_NOTICE, (__location__ " Recovery - updated recmaster\n"));
@@ -2084,11 +1983,126 @@ static int do_recovery(struct ctdb_recoverd *rec,
        ret = set_recovery_mode(ctdb, rec, nodemap, CTDB_RECOVERY_NORMAL);
        if (ret != 0) {
                DEBUG(DEBUG_ERR, (__location__ " Unable to set recovery mode to normal on cluster\n"));
-               goto fail;
+               return -1;
        }
 
        DEBUG(DEBUG_NOTICE, (__location__ " Recovery - disabled recovery mode\n"));
 
+       return 0;
+}
+
+/*
+  we are the recmaster, and recovery is needed - start a recovery run
+ */
+static int do_recovery(struct ctdb_recoverd *rec,
+                      TALLOC_CTX *mem_ctx, uint32_t pnn,
+                      struct ctdb_node_map *nodemap, struct ctdb_vnn_map *vnnmap)
+{
+       struct ctdb_context *ctdb = rec->ctdb;
+       int i, ret;
+       struct ctdb_dbid_map *dbmap;
+       struct timeval start_time;
+       uint32_t culprit = (uint32_t)-1;
+       bool self_ban;
+
+       DEBUG(DEBUG_NOTICE, (__location__ " Starting do_recovery\n"));
+
+       /* if recovery fails, force it again */
+       rec->need_recovery = true;
+
+       if (!ctdb_op_begin(rec->recovery)) {
+               return -1;
+       }
+
+       if (rec->election_timeout) {
+               /* an election is in progress */
+               DEBUG(DEBUG_ERR, ("do_recovery called while election in progress - try again later\n"));
+               goto fail;
+       }
+
+       ban_misbehaving_nodes(rec, &self_ban);
+       if (self_ban) {
+               DEBUG(DEBUG_NOTICE, ("This node was banned, aborting recovery\n"));
+               goto fail;
+       }
+
+        if (ctdb->recovery_lock_file != NULL) {
+               if (ctdb_recovery_have_lock(ctdb)) {
+                       DEBUG(DEBUG_NOTICE, ("Already holding recovery lock\n"));
+               } else {
+                       start_time = timeval_current();
+                       DEBUG(DEBUG_NOTICE, ("Attempting to take recovery lock (%s)\n",
+                                            ctdb->recovery_lock_file));
+                       if (!ctdb_recovery_lock(ctdb)) {
+                               if (ctdb->runstate == CTDB_RUNSTATE_FIRST_RECOVERY) {
+                                       /* If ctdb is trying first recovery, it's
+                                        * possible that current node does not know
+                                        * yet who the recmaster is.
+                                        */
+                                       DEBUG(DEBUG_ERR, ("Unable to get recovery lock"
+                                                         " - retrying recovery\n"));
+                                       goto fail;
+                               }
+
+                               DEBUG(DEBUG_ERR,("Unable to get recovery lock - aborting recovery "
+                                                "and ban ourself for %u seconds\n",
+                                                ctdb->tunable.recovery_ban_period));
+                               ctdb_ban_node(rec, pnn, ctdb->tunable.recovery_ban_period);
+                               goto fail;
+                       }
+                       ctdb_ctrl_report_recd_lock_latency(ctdb,
+                                                          CONTROL_TIMEOUT(),
+                                                          timeval_elapsed(&start_time));
+                       DEBUG(DEBUG_NOTICE,
+                             ("Recovery lock taken successfully by recovery daemon\n"));
+               }
+       }
+
+       DEBUG(DEBUG_NOTICE, (__location__ " Recovery initiated due to problem with node %u\n", rec->last_culprit_node));
+
+       /* get a list of all databases */
+       ret = ctdb_ctrl_getdbmap(ctdb, CONTROL_TIMEOUT(), pnn, mem_ctx, &dbmap);
+       if (ret != 0) {
+               DEBUG(DEBUG_ERR, (__location__ " Unable to get dbids from node :%u\n", pnn));
+               goto fail;
+       }
+
+       /* we do the db creation before we set the recovery mode, so the freeze happens
+          on all databases we will be dealing with. */
+
+       /* verify that we have all the databases any other node has */
+       ret = create_missing_local_databases(ctdb, nodemap, pnn, &dbmap, mem_ctx);
+       if (ret != 0) {
+               DEBUG(DEBUG_ERR, (__location__ " Unable to create missing local databases\n"));
+               goto fail;
+       }
+
+       /* verify that all other nodes have all our databases */
+       ret = create_missing_remote_databases(ctdb, nodemap, pnn, dbmap, mem_ctx);
+       if (ret != 0) {
+               DEBUG(DEBUG_ERR, (__location__ " Unable to create missing remote databases\n"));
+               goto fail;
+       }
+       DEBUG(DEBUG_NOTICE, (__location__ " Recovery - created remote databases\n"));
+
+       /* update the database priority for all remote databases */
+       ret = update_db_priority_on_remote_nodes(ctdb, nodemap, pnn, dbmap, mem_ctx);
+       if (ret != 0) {
+               DEBUG(DEBUG_ERR, (__location__ " Unable to set db priority on remote nodes\n"));
+       }
+       DEBUG(DEBUG_NOTICE, (__location__ " Recovery - updated db priority for all databases\n"));
+
+
+       /* update all other nodes to use the same setting for reclock files
+          as the local recovery master.
+       */
+       sync_recovery_lock_file_across_cluster(rec);
+
+       ret = db_recovery_serial(rec, mem_ctx, pnn, nodemap, vnnmap, dbmap);
+       if (ret != 0) {
+               goto fail;
+       }
+
        /* Fetch known/available public IPs from each active node */
        ret = ctdb_reload_remote_public_ips(ctdb, rec, nodemap, &culprit);
        if (ret != 0) {