when we change state between healthy/unhealthy, make sure we ask the recovery
authorRonnie Sahlberg <ronniesahlberg@gmail.com>
Wed, 14 Oct 2009 00:59:16 +0000 (11:59 +1100)
committerRonnie Sahlberg <ronniesahlberg@gmail.com>
Wed, 14 Oct 2009 00:59:16 +0000 (11:59 +1100)
master to perform an explicit ip reallocation.

This is more reliable and faster than having the recovery dameon track these
changes, and since we now have an explicit method to ask the recovery daemon
to perform an explicit ip reallocation, we should use this.

include/ctdb.h
include/ctdb_private.h
server/ctdb_monitor.c
tools/ctdb.c

index b9a7685f523b66e71d7fd5ff6ef46779b67e2809..abe09a3f966dfd2464336764212fa3d1158a510f 100644 (file)
@@ -106,6 +106,11 @@ struct ctdb_call_info {
 */
 #define CTDB_SRVID_DISABLE_IP_CHECK  0xFC00000000000000LL
 
+/* A dummy port used for sending back ipreallocate resposnes to the main
+   daemon
+*/
+#define CTDB_SRVID_TAKEOVER_RUN_RESPONSE  0xFD00000000000000LL
+
 /* used on the domain socket, send a pdu to the local daemon */
 #define CTDB_CURRENT_NODE     0xF0000001
 /* send a broadcast to all nodes in the cluster, active or not */
index acaaf5fef95d71ce75ed9a4009bbe945eb4dc7e4..af77556ebbda5b1f4ee9fdd36981f4499e8994f7 100644 (file)
@@ -45,6 +45,14 @@ struct rd_memdump_reply {
        uint64_t srvid;
 };
 
+/*
+  description for a TAKEOVER_RUN message reply address
+ */
+struct takeover_run_reply {
+       uint32_t pnn;
+       uint64_t srvid;
+};
+
 /*
   a tcp connection description
  */
index 4a554d45552496a65993cc9f4f10144c58c28825..fc96fd71a2a99b59f0b2d6ba6838e9df9ddd9f90 100644 (file)
@@ -110,10 +110,19 @@ static void ctdb_health_callback(struct ctdb_context *ctdb, int status, void *p)
        TDB_DATA data;
        struct ctdb_node_flag_change c;
        uint32_t next_interval;
+       int ret;
+       TDB_DATA rddata;
+       struct takeover_run_reply rd;
 
        c.pnn = ctdb->pnn;
        c.old_flags = node->flags;
 
+       rd.pnn   = ctdb->pnn;
+       rd.srvid = CTDB_SRVID_TAKEOVER_RUN_RESPONSE;
+
+       rddata.dptr = (uint8_t *)&rd;
+       rddata.dsize = sizeof(rd);
+
        if (status != 0 && !(node->flags & NODE_FLAGS_UNHEALTHY)) {
                DEBUG(DEBUG_NOTICE,("monitor event failed - disabling node\n"));
                node->flags |= NODE_FLAGS_UNHEALTHY;
@@ -124,12 +133,28 @@ static void ctdb_health_callback(struct ctdb_context *ctdb, int status, void *p)
                }
 
                ctdb_run_notification_script(ctdb, "unhealthy");
+
+               /* ask the recmaster to reallocate all addresses */
+               DEBUG(DEBUG_ERR,("Node became UNHEALTHY. Ask recovery master %u to perform ip reallocation\n", ctdb->recovery_master));
+               ret = ctdb_daemon_send_message(ctdb, ctdb->recovery_master, CTDB_SRVID_TAKEOVER_RUN, rddata);
+               if (ret != 0) {
+                       DEBUG(DEBUG_ERR,(__location__ " Failed to send ip takeover run request message to %u\n", ctdb->recovery_master));
+               }
+
        } else if (status == 0 && (node->flags & NODE_FLAGS_UNHEALTHY)) {
                DEBUG(DEBUG_NOTICE,("monitor event OK - node re-enabled\n"));
                node->flags &= ~NODE_FLAGS_UNHEALTHY;
                ctdb->monitor->next_interval = 1;
 
                ctdb_run_notification_script(ctdb, "healthy");
+
+               /* ask the recmaster to reallocate all addresses */
+               DEBUG(DEBUG_ERR,("Node became HEALTHY. Ask recovery master %u to perform ip reallocation\n", ctdb->recovery_master));
+               ret = ctdb_daemon_send_message(ctdb, ctdb->recovery_master, CTDB_SRVID_TAKEOVER_RUN, rddata);
+               if (ret != 0) {
+                       DEBUG(DEBUG_ERR,(__location__ " Failed to send ip takeover run request message to %u\n", ctdb->recovery_master));
+               }
+
        }
 
        next_interval = ctdb->monitor->next_interval;
index ec70fe8c11281857cfc4bd3766c48ce3ca23294a..2f78ebe76fbfc11cb8bae026ab00e31183eaf83f 100644 (file)
@@ -1619,7 +1619,7 @@ static int control_ipreallocate(struct ctdb_context *ctdb, int argc, const char
 {
        int i, ret;
        TDB_DATA data;
-       struct rd_memdump_reply rd;
+       struct takeover_run_reply rd;
        uint32_t recmaster;
        struct ctdb_node_map *nodemap=NULL;
        int retries=0;