add a new control for explicitely cancelling recovery transactions, i.e. the
authorRonnie Sahlberg <ronniesahlberg@gmail.com>
Mon, 12 Oct 2009 05:48:05 +0000 (16:48 +1100)
committerRonnie Sahlberg <ronniesahlberg@gmail.com>
Mon, 12 Oct 2009 05:48:05 +0000 (16:48 +1100)
transactions we start across all tdb databased during the recovery.

this allows us to properly clean up and delete these tdb transactions on a
recovery failure.

include/ctdb_private.h
server/ctdb_freeze.c
server/ctdb_recoverd.c

index cc5729a1171f3d9b44248d05362ac4f1ed9eb7ee..acaaf5fef95d71ce75ed9a4009bbe945eb4dc7e4 100644 (file)
@@ -601,6 +601,7 @@ enum ctdb_controls {CTDB_CONTROL_PROCESS_EXISTS          = 0,
                    CTDB_CONTROL_GET_BAN_STATE           = 110,
                    CTDB_CONTROL_SET_DB_PRIORITY         = 111,
                    CTDB_CONTROL_GET_DB_PRIORITY         = 112,
+                   CTDB_CONTROL_TRANSACTION_CANCEL      = 113,
 };     
 
 /*
@@ -1401,6 +1402,7 @@ int32_t ctdb_control_trans2_commit(struct ctdb_context *ctdb,
 
 int32_t ctdb_control_transaction_start(struct ctdb_context *ctdb, uint32_t id);
 int32_t ctdb_control_transaction_commit(struct ctdb_context *ctdb, uint32_t id);
+int32_t ctdb_control_transaction_cancel(struct ctdb_context *ctdb);
 int32_t ctdb_control_wipe_database(struct ctdb_context *ctdb, TDB_DATA indata);
 
 
index 2cc39aa274af438fd8e160b604b2745462e86a83..da7272fddaa277914095b59d66abe916c0dbd92c 100644 (file)
@@ -419,6 +419,31 @@ int32_t ctdb_control_transaction_start(struct ctdb_context *ctdb, uint32_t id)
        return 0;
 }
 
+/*
+  cancel a transaction for all databases - used for recovery
+ */
+int32_t ctdb_control_transaction_cancel(struct ctdb_context *ctdb)
+{
+       struct ctdb_db_context *ctdb_db;
+
+       DEBUG(DEBUG_ERR,(__location__ " recovery transaction cancelled called\n"));
+
+       for (ctdb_db=ctdb->db_list;ctdb_db;ctdb_db=ctdb_db->next) {
+               tdb_add_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
+
+               if (tdb_transaction_cancel(ctdb_db->ltdb->tdb) != 0) {
+                       DEBUG(DEBUG_ERR,(__location__ " Failed to cancel transaction for db '%s'\n",  ctdb_db->db_name));
+                       /* not a fatal error */
+               }
+
+               tdb_remove_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
+       }
+
+       ctdb->freeze_transaction_started = false;
+
+       return 0;
+}
+
 /*
   commit transactions on all databases
  */
index a7d07a8b6d1d52fdf9d0e6a165a4456d80e57ad9..d759856cd97ebee2a151b5ef644de2cd0dbcff3d 100644 (file)
@@ -254,6 +254,14 @@ static void set_recmode_fail_callback(struct ctdb_context *ctdb, uint32_t node_p
        ctdb_set_culprit_count(rec, node_pnn, rec->nodemap->num);
 }
 
+static void transaction_start_fail_callback(struct ctdb_context *ctdb, uint32_t node_pnn, int32_t res, TDB_DATA outdata, void *callback_data)
+{
+       struct ctdb_recoverd *rec = talloc_get_type(callback_data, struct ctdb_recoverd);
+
+       DEBUG(DEBUG_ERR,("Failed to start recovery transaction on node %u. Set it as ban culprit for %d credits\n", node_pnn, rec->nodemap->num));
+       ctdb_set_culprit_count(rec, node_pnn, rec->nodemap->num);
+}
+
 /*
   change recovery mode on all nodes
  */
@@ -1334,9 +1342,18 @@ static int do_recovery(struct ctdb_recoverd *rec,
        if (ctdb_client_async_control(ctdb, CTDB_CONTROL_TRANSACTION_START,
                                        nodes, 0,
                                        CONTROL_TIMEOUT(), false, data,
-                                       NULL, NULL,
-                                       NULL) != 0) {
+                                       NULL,
+                                       transaction_start_fail_callback,
+                                       rec) != 0) {
                DEBUG(DEBUG_ERR, (__location__ " Unable to start transactions. Recovery failed.\n"));
+               if (ctdb_client_async_control(ctdb, CTDB_CONTROL_TRANSACTION_CANCEL,
+                                       nodes, 0,
+                                       CONTROL_TIMEOUT(), false, tdb_null,
+                                       NULL,
+                                       NULL,
+                                       NULL) != 0) {
+                       DEBUG(DEBUG_ERR,("Failed to cancel recovery transaction\n"));
+               }
                return -1;
        }