add a context and a timed event so that once we have been in recovery

author Ronnie Sahlberg <ronniesahlberg@gmail.com>

Wed, 22 Oct 2008 00:04:41 +0000 (11:04 +1100)

committer Ronnie Sahlberg <ronniesahlberg@gmail.com>

Wed, 22 Oct 2008 00:04:41 +0000 (11:04 +1100)
author Ronnie Sahlberg <ronniesahlberg@gmail.com>
Wed, 22 Oct 2008 00:04:41 +0000 (11:04 +1100)
committer Ronnie Sahlberg <ronniesahlberg@gmail.com>
Wed, 22 Oct 2008 00:04:41 +0000 (11:04 +1100)
diff --git a/include/ctdb_private.h b/include/ctdb_private.h

index 756c62ab7d9b193cc09599c5adf34361f98423ab..9c0640912a21bd7923ac7583040758799e569459 100644 (file)
--- a/include/ctdb_private.h
+++ b/include/ctdb_private.h
@@ -420,6 +420,7 @@ struct ctdb_context {
         uint32_t event_script_timeouts; /* counting how many consecutive times an eventscript has timedout */
         TALLOC_CTX *eventscripts_ctx; /* a context to hold data for the RUN_EVENTSCRIPTS control */
         uint32_t *recd_ping_count;
+       TALLOC_CTX *release_ips_ctx; /* a context used to automatically drop all IPs if we fail to recover the node */
  };
  
  struct ctdb_db_context {
@@ -1422,6 +1423,7 @@ char *ctdb_addr_to_str(ctdb_sock_addr *addr);
  void ctdb_canonicalize_ip(const ctdb_sock_addr *ip, ctdb_sock_addr *cip);
  
  int32_t ctdb_control_recd_ping(struct ctdb_context *ctdb);
+int32_t ctdb_control_set_recmaster(struct ctdb_context *ctdb, uint32_t opcode, TDB_DATA indata);
  
  extern int script_log_level;
  
diff --git a/server/ctdb_control.c b/server/ctdb_control.c

index fa38fea33897fbc0223386f08df1362f883fad8b..5f655479ec74d2be50f8143d136587fe6b593c3b 100644 (file)
--- a/server/ctdb_control.c
+++ b/server/ctdb_control.c
@@ -160,13 +160,7 @@ static int32_t ctdb_control_dispatch(struct ctdb_context *ctdb,
         }
  
         case CTDB_CONTROL_SET_RECMASTER: {
-               CHECK_CONTROL_DATA_SIZE(sizeof(uint32_t));
-               if (ctdb->freeze_mode != CTDB_FREEZE_FROZEN) {
-                       DEBUG(DEBUG_NOTICE,("Attempt to set recmaster when not frozen\n"));
-                       return -1;
-               }
-               ctdb->recovery_master = ((uint32_t *)(&indata.dptr[0]))[0];
-               return 0;
+               return ctdb_control_set_recmaster(ctdb, opcode, indata);
         }
  
         case CTDB_CONTROL_GET_RECMASTER:
diff --git a/server/ctdb_recover.c b/server/ctdb_recover.c

index 8d61704ab768fa503f82149acf79a0838fd8ba14..c8b0ba066abce420520a9d7d3ef1e1e7eab65453 100644 (file)
--- a/server/ctdb_recover.c
+++ b/server/ctdb_recover.c
@@ -529,6 +529,19 @@ static void set_recmode_handler(struct event_context *ev, struct fd_event *fde,
         return;
  }
  
+static void
+ctdb_drop_all_ips_event(struct event_context *ev, struct timed_event *te, 
+                              struct timeval t, void *private_data)
+{
+       struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
+
+       DEBUG(DEBUG_INFO,(__location__ " Been in recovery mode for too long. Dropping all IPS\n"));
+       talloc_free(ctdb->release_ips_ctx);
+       ctdb->release_ips_ctx = NULL;
+
+       ctdb_release_all_ips(ctdb);
+}
+
  /*
    set the recovery mode
   */
@@ -542,6 +555,21 @@ int32_t ctdb_control_set_recmode(struct ctdb_context *ctdb,
         struct ctdb_set_recmode_state *state;
         pid_t parent = getpid();
  
+       /* if we enter recovery but stay in recovery for too long
+          we will eventually drop all our ip addresses
+       */
+       if (recmode == CTDB_RECOVERY_NORMAL) {
+               talloc_free(ctdb->release_ips_ctx);
+               ctdb->release_ips_ctx = NULL;
+       } else {
+               talloc_free(ctdb->release_ips_ctx);
+               ctdb->release_ips_ctx = talloc_new(ctdb);
+               CTDB_NO_MEMORY(ctdb, ctdb->release_ips_ctx);
+
+               event_add_timed(ctdb->ev, ctdb->release_ips_ctx, timeval_current_ofs(5,0), ctdb_drop_all_ips_event, ctdb);
+       }
+
+
         if (ctdb->freeze_mode != CTDB_FREEZE_FROZEN) {
                 DEBUG(DEBUG_ERR,("Attempt to change recovery mode to %u when not frozen\n", 
                          recmode));
@@ -1045,3 +1073,15 @@ int32_t ctdb_control_recd_ping(struct ctdb_context *ctdb)
         return 0;
  }
  
+
+
+int32_t ctdb_control_set_recmaster(struct ctdb_context *ctdb, uint32_t opcode, TDB_DATA indata)
+{
+       CHECK_CONTROL_DATA_SIZE(sizeof(uint32_t));
+       if (ctdb->freeze_mode != CTDB_FREEZE_FROZEN) {
+               DEBUG(DEBUG_NOTICE,("Attempt to set recmaster when not frozen\n"));
+               return -1;
+       }
+       ctdb->recovery_master = ((uint32_t *)(&indata.dptr[0]))[0];
+       return 0;
+}
diff --git a/server/ctdb_takeover.c b/server/ctdb_takeover.c

index 6533d71bc03f870962835949c9ef90e5403be019..c14afb305624b1b55caec9d6b56d6dfcdaf0aef0 100644 (file)
--- a/server/ctdb_takeover.c
+++ b/server/ctdb_takeover.c
@@ -1291,6 +1291,9 @@ void ctdb_release_all_ips(struct ctdb_context *ctdb)
                 if (!ctdb_sys_have_ip(&vnn->public_address)) {
                         continue;
                 }
+               if (vnn->pnn == ctdb->pnn) {
+                       vnn->pnn = -1;
+               }
                 ctdb_event_script(ctdb, "releaseip %s %s %u",
                                   vnn->iface, 
                                   talloc_strdup(ctdb, ctdb_addr_to_str(&vnn->public_address)),
diff --git a/server/eventscript.c b/server/eventscript.c

index deaf750a33c2078a3309a4d009722d57ffcd0b14..6edd1a4dc6ace9fcf34960717247143be359ba7d 100644 (file)
--- a/server/eventscript.c
+++ b/server/eventscript.c
@@ -72,13 +72,13 @@ static int ctdb_event_script_v(struct ctdb_context *ctdb, const char *options)
         if (ctdb->recovery_mode != CTDB_RECOVERY_NORMAL) {
                 /* we guarantee that only some specifically allowed event scripts are run
                    while in recovery */
-               const char *allowed_scripts[] = {"startrecovery", "shutdown" };
+               const char *allowed_scripts[] = {"startrecovery", "shutdown", "releaseip" };
                 int i;
                 for (i=0;i<ARRAY_SIZE(allowed_scripts);i++) {
-                       if (strcmp(options, allowed_scripts[i]) == 0) break;
+                       if (strncmp(options, allowed_scripts[i], strlen(allowed_scripts[i])) == 0) break;
                 }
                 if (i == ARRAY_SIZE(allowed_scripts)) {
-                       DEBUG(0,("Refusing to run event scripts with option '%s' while in recovery\n",
+                       DEBUG(DEBUG_ERR,("Refusing to run event scripts with option '%s' while in recovery\n",
                                  options));
                         return -1;
                 }
diff --git a/tools/ctdb.c b/tools/ctdb.c

index 5055c2693e95c433c59d22c980b5196e640f003a..5c553e69a9fb26c8690e7f92ab7f5e89da79da02 100644 (file)
--- a/tools/ctdb.c
+++ b/tools/ctdb.c
@@ -441,8 +441,7 @@ static int control_status(struct ctdb_context *ctdb, int argc, const char **argv
  static int control_recmaster(struct ctdb_context *ctdb, int argc, const char **argv)
  {
         int ret;
-       uint32_t recmode, recmaster;
-       int mypnn;
+       uint32_t recmaster;
  
         ret = ctdb_ctrl_getrecmaster(ctdb, ctdb, TIMELIMIT(), options.pnn, &recmaster);
         if (ret != 0) {
author	Ronnie Sahlberg <ronniesahlberg@gmail.com>
	Wed, 22 Oct 2008 00:04:41 +0000 (11:04 +1100)
committer	Ronnie Sahlberg <ronniesahlberg@gmail.com>
	Wed, 22 Oct 2008 00:04:41 +0000 (11:04 +1100)
include/ctdb_private.h		patch \| blob \| history
server/ctdb_control.c		patch \| blob \| history
server/ctdb_recover.c		patch \| blob \| history
server/ctdb_takeover.c		patch \| blob \| history
server/eventscript.c		patch \| blob \| history
tools/ctdb.c		patch \| blob \| history