ctdbd: Add new runstate CTDB_RUNSTATE_FIRST_RECOVERY
authorMartin Schwenke <martin@meltin.net>
Thu, 18 Apr 2013 10:30:14 +0000 (20:30 +1000)
committerAmitay Isaacs <amitay@gmail.com>
Fri, 24 May 2013 04:08:07 +0000 (14:08 +1000)
This adds more serialisation to the startup, ensuring that the
"startup" event runs after everything to do with the first recovery
(including the "recovered" event).

Given that it now takes longer to get to the "startup" state, the
initscript needs to wait until ctdbd gets to "first_recovery".

Signed-off-by: Martin Schwenke <martin@meltin.net>
Pair-programmed-with: Amitay Isaacs <amitay@gmail.com>

common/ctdb_util.c
config/ctdb.init
doc/ctdb.1.xml
include/ctdb_private.h
server/ctdb_daemon.c
server/ctdb_monitor.c
server/ctdb_recover.c
tools/ctdb.c

index 270ad625c194faa917318b98353bf2bce1bf32bf..a910a0cd79c22989aadd3d22067fe041a7d019a4 100644 (file)
@@ -711,6 +711,7 @@ static struct {
        { CTDB_RUNSTATE_UNKNOWN, "UNKNOWN" },
        { CTDB_RUNSTATE_INIT, "INIT" },
        { CTDB_RUNSTATE_SETUP, "SETUP" },
+       { CTDB_RUNSTATE_FIRST_RECOVERY, "FIRST_RECOVERY" },
        { CTDB_RUNSTATE_STARTUP, "STARTUP" },
        { CTDB_RUNSTATE_RUNNING, "RUNNING" },
        { CTDB_RUNSTATE_SHUTDOWN, "SHUTDOWN" },
index 70dcfa642169408d32bf773942764e59ecb8c110..2ceb45ff19191a4bd6e598a4ba975176a0e78d10 100755 (executable)
@@ -220,7 +220,7 @@ wait_until_ready () {
     _timeout="${1:-10}" # default is 10 seconds
 
     _count=0
-    while ! ctdb runstate startup running >/dev/null 2>&1 ; do
+    while ! ctdb runstate first_recovery startup running >/dev/null 2>&1 ; do
        if [ $_count -ge $_timeout ] ; then
            return 1
        fi
index 7242f3aa29afb75c100465aaa3bb574826ff4061..ce83a3efa0bdf229aeaa8589e8fcafa30c03bdfe 100644 (file)
@@ -382,7 +382,7 @@ response from 3 time=0.000114 sec  (2 clients)
       </screen>
     </refsect2>
 
-    <refsect2><title>runstate [setup|startup|running]</title>
+    <refsect2><title>runstate [setup|first_recovery|startup|running]</title>
       <para>
         Print the runstate of the specified node.  Runstates are used
         to serialise important state transitions in CTDB, particularly
index 2698785f0be6dfd6c635408a25555d5d72563da6..eadd9637155e7f4166b7e58c60bf2394d486e516 100644 (file)
@@ -440,6 +440,7 @@ enum ctdb_runstate {
        CTDB_RUNSTATE_UNKNOWN,
        CTDB_RUNSTATE_INIT,
        CTDB_RUNSTATE_SETUP,
+       CTDB_RUNSTATE_FIRST_RECOVERY,
        CTDB_RUNSTATE_STARTUP,
        CTDB_RUNSTATE_RUNNING,
        CTDB_RUNSTATE_SHUTDOWN,
index edbc34c7a1f6814428dc2064f145794134bcfa79..32c30243ea6f42ee50d66611b7665b266be5b092 100644 (file)
@@ -1037,7 +1037,7 @@ static void ctdb_setup_event_callback(struct ctdb_context *ctdb, int status,
        }
        ctdb_run_notification_script(ctdb, "setup");
 
-       ctdb_set_runstate(ctdb, CTDB_RUNSTATE_STARTUP);
+       ctdb_set_runstate(ctdb, CTDB_RUNSTATE_FIRST_RECOVERY);
 
        /* tell all other nodes we've just started up */
        ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_ALL,
index 1e556e00d8728459317fddbddb4903642800fc2f..1608804c3084c46b820d5db422156d273b460ce9 100644 (file)
@@ -307,7 +307,6 @@ static void ctdb_wait_until_recovered(struct event_context *ev, struct timed_eve
        }
        ctdb->db_persistent_check_errors = 0;
 
-       DEBUG(DEBUG_NOTICE,(__location__ " Recoveries finished. Running the \"startup\" event.\n"));
        event_add_timed(ctdb->ev, ctdb->monitor->monitor_context,
                             timeval_current(),
                             ctdb_check_health, ctdb);
@@ -323,6 +322,14 @@ static void ctdb_check_health(struct event_context *ev, struct timed_event *te,
        struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
        int ret = 0;
 
+       if (ctdb->runstate < CTDB_RUNSTATE_STARTUP) {
+               DEBUG(DEBUG_NOTICE,("Not yet in startup runstate. Wait one more second\n"));
+               event_add_timed(ctdb->ev, ctdb->monitor->monitor_context,
+                               timeval_current_ofs(1, 0), 
+                               ctdb_check_health, ctdb);
+               return;
+       }
+       
        if (ctdb->recovery_mode != CTDB_RECOVERY_NORMAL ||
            (ctdb->monitor->monitoring_mode == CTDB_MONITORING_DISABLED &&
             ctdb->runstate == CTDB_RUNSTATE_RUNNING)) {
@@ -333,6 +340,7 @@ static void ctdb_check_health(struct event_context *ev, struct timed_event *te,
        }
        
        if (ctdb->runstate == CTDB_RUNSTATE_STARTUP) {
+               DEBUG(DEBUG_NOTICE,("Recoveries finished. Running the \"startup\" event.\n"));
                ret = ctdb_event_script_callback(ctdb, 
                                                 ctdb->monitor->monitor_context, ctdb_startup_callback, 
                                                 ctdb, false,
index 7fd0ac36ea110e1bf1ee5e86c4ac5c040f4cf7f0..6506f8bbce3be119845345a0537f2a04fcfc561f 100644 (file)
@@ -911,6 +911,10 @@ static void ctdb_end_recovery_callback(struct ctdb_context *ctdb, int status, vo
        talloc_free(state);
 
        gettimeofday(&ctdb->last_recovery_finished, NULL);
+
+       if (ctdb->runstate == CTDB_RUNSTATE_FIRST_RECOVERY) {
+               ctdb_set_runstate(ctdb, CTDB_RUNSTATE_STARTUP);
+       }
 }
 
 /*
index e21e845073631fb880f9b070b7781f42123626ab..48ca93ba0eecf4a2e8e00fd8c30a3a9a371c4486 100644 (file)
@@ -5847,7 +5847,7 @@ static const struct {
        { "status",          control_status,            true,   false,  "show node status" },
        { "uptime",          control_uptime,            true,   false,  "show node uptime" },
        { "ping",            control_ping,              true,   false,  "ping all nodes" },
-       { "runstate",        control_runstate,          true,   false,  "get/check runstate of a node", "[setup|startup|running]" },
+       { "runstate",        control_runstate,          true,   false,  "get/check runstate of a node", "[setup|first_recovery|startup|running]" },
        { "getvar",          control_getvar,            true,   false,  "get a tunable variable",               "<name>"},
        { "setvar",          control_setvar,            true,   false,  "set a tunable variable",               "<name> <value>"},
        { "listvars",        control_listvars,          true,   false,  "list tunable variables"},