ctdb-takeover: Recovery daemon no longer passes fail callback
authorMartin Schwenke <martin@meltin.net>
Tue, 3 May 2016 05:35:08 +0000 (15:35 +1000)
committerAmitay Isaacs <amitay@samba.org>
Fri, 13 May 2016 11:47:17 +0000 (13:47 +0200)
Banning is now handled by the takeover code sending banning credit
messages.

This commit makes a change in behaviour quite obvious.  Takeover runs
were initiated from several locations in the code but banning was only
done from one of these locations.  Now banning can be done from any
failed takeover run.

Signed-off-by: Martin Schwenke <martin@meltin.net>
Reviewed-by: Amitay Isaacs <amitay@gmail.com>
ctdb/include/ctdb_private.h
ctdb/server/ctdb_recoverd.c
ctdb/server/ctdb_takeover.c

index f8889e025a820d2f1c7fb3c45a2c3164a8238f0e..a2f6dfce76c7430132671ebdbd66c49b197d296b 100644 (file)
@@ -975,8 +975,7 @@ int32_t ctdb_control_ipreallocated(struct ctdb_context *ctdb,
 int ctdb_set_public_addresses(struct ctdb_context *ctdb, bool check_addresses);
 
 int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map_old *nodemap,
-                     uint32_t *force_rebalance_nodes,
-                     client_async_callback fail_callback, void *callback_data);
+                     uint32_t *force_rebalance_nodes);
 
 int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id,
                                TDB_DATA indata);
index f3fea02a575fc3101a8199b68f58c4e7f4460711..ba57028a6617be308b4487280c531cfa6d1967b9 100644 (file)
@@ -1644,25 +1644,6 @@ static int sync_recovery_lock_file_across_cluster(struct ctdb_recoverd *rec)
        return 0;
 }
 
-
-/*
- * this callback is called for every node that failed to execute ctdb_takeover_run()
- * and set flag to re-run takeover run.
- */
-static void takeover_fail_callback(struct ctdb_context *ctdb, uint32_t node_pnn, int32_t res, TDB_DATA outdata, void *callback_data)
-{
-       DEBUG(DEBUG_ERR, ("Node %u failed the takeover run\n", node_pnn));
-
-       if (callback_data != NULL) {
-               struct ctdb_recoverd *rec = talloc_get_type(callback_data, struct ctdb_recoverd);
-
-               DEBUG(DEBUG_ERR, ("Setting node %u as recovery fail culprit\n", node_pnn));
-
-               ctdb_set_culprit(rec, node_pnn);
-       }
-}
-
-
 static void ban_misbehaving_nodes(struct ctdb_recoverd *rec, bool *self_ban)
 {
        struct ctdb_context *ctdb = rec->ctdb;
@@ -1693,8 +1674,7 @@ static void ban_misbehaving_nodes(struct ctdb_recoverd *rec, bool *self_ban)
 }
 
 static bool do_takeover_run(struct ctdb_recoverd *rec,
-                           struct ctdb_node_map_old *nodemap,
-                           bool banning_credits_on_fail)
+                           struct ctdb_node_map_old *nodemap)
 {
        uint32_t *nodes = NULL;
        struct ctdb_disable_message dtr;
@@ -1747,9 +1727,7 @@ static bool do_takeover_run(struct ctdb_recoverd *rec,
        }
 
        ret = ctdb_takeover_run(rec->ctdb, nodemap,
-                               rec->force_rebalance_nodes,
-                               takeover_fail_callback,
-                               banning_credits_on_fail ? rec : NULL);
+                               rec->force_rebalance_nodes);
 
        /* Reenable takeover runs and IP checks on other nodes */
        dtr.timeout = 0;
@@ -2226,7 +2204,7 @@ static int do_recovery(struct ctdb_recoverd *rec,
                goto fail;
        }
 
-       do_takeover_run(rec, nodemap, false);
+       do_takeover_run(rec, nodemap);
 
        /* execute the "recovered" event script on all nodes */
        ret = run_recovered_eventscript(rec, nodemap, "do_recovery");
@@ -2686,7 +2664,7 @@ static void process_ipreallocate_requests(struct ctdb_context *ctdb,
        current = rec->reallocate_requests;
        rec->reallocate_requests = NULL;
 
-       if (do_takeover_run(rec, rec->nodemap, false)) {
+       if (do_takeover_run(rec, rec->nodemap)) {
                ret = ctdb_get_pnn(ctdb);
        } else {
                ret = -1;
@@ -3923,7 +3901,7 @@ static void main_loop(struct ctdb_context *ctdb, struct ctdb_recoverd *rec,
                 * If takeover run fails repeatedly, the node would get
                 * banned.
                 */
-               do_takeover_run(rec, nodemap, true);
+               do_takeover_run(rec, nodemap);
        }
 }
 
index be4b59a1f3e33f837bcbb27f14581079734109eb..5db7092e443ed46fec73c02c7b8338e4c3a614b7 100644 (file)
@@ -1668,8 +1668,7 @@ static void takeover_run_process_failures(struct ctdb_context *ctdb,
  * - Send IPREALLOCATED to all nodes (with backward compatibility hack)
  */
 int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map_old *nodemap,
-                     uint32_t *force_rebalance_nodes,
-                     client_async_callback fail_callback, void *callback_data)
+                     uint32_t *force_rebalance_nodes)
 {
        int i, ret;
        struct ctdb_public_ip ip;