prevent a deadly embrace between smbd and ctdbd by moving the calling
authorAndrew Tridgell <tridge@samba.org>
Sun, 11 Nov 2007 23:53:11 +0000 (10:53 +1100)
committerAndrew Tridgell <tridge@samba.org>
Sun, 11 Nov 2007 23:53:11 +0000 (10:53 +1100)
of the startup event scripts after the point where recovery has
started and the node is in normal operation

This makes the 'startup' script just a special type of the 'monitor'
script which is called first

include/ctdb_private.h
server/ctdb_daemon.c
server/ctdb_monitor.c

index 7b98683e9ffe22244a2ae1f9e89a540cc88b52d7..cb76bb0074cb68a6a69dbf98e931859b0ebe5d6a 100644 (file)
@@ -366,6 +366,7 @@ struct ctdb_context {
        const char *event_script_dir;
        const char *default_public_interface;
        pid_t recoverd_pid;
+       bool done_startup;
 };
 
 struct ctdb_db_context {
index 671a7e8be2e40bd71c050432d8102ae671f63d61..8f66ade92767432452976051b89d37eec92762a5 100644 (file)
@@ -68,13 +68,8 @@ static void print_exit_message(void)
 
 
 /* called when the "startup" event script has finished */
-static void ctdb_start_transport(struct ctdb_context *ctdb, int status, void *p)
+static void ctdb_start_transport(struct ctdb_context *ctdb)
 {
-       if (status != 0) {
-               DEBUG(0,("startup event failed!\n"));
-               ctdb_fatal(ctdb, "startup event script failed");                
-       }
-
        /* start the transport running */
        if (ctdb->methods->start(ctdb) != 0) {
                DEBUG(0,("transport failed to start!\n"));
@@ -664,12 +659,8 @@ int ctdb_start_daemon(struct ctdb_context *ctdb, bool do_fork)
        /* release any IPs we hold from previous runs of the daemon */
        ctdb_release_all_ips(ctdb);
 
-       ret = ctdb_event_script_callback(ctdb, timeval_zero(), ctdb, 
-                                        ctdb_start_transport, NULL, "startup");
-       if (ret != 0) {
-               DEBUG(0,("Failed startup event script\n"));
-               return -1;
-       }
+       /* start the transport going */
+       ctdb_start_transport(ctdb);
 
        /* go into a wait loop to allow other nodes to complete */
        event_loop_wait(ctdb->ev);
index c96099e76c8e5ae116c03af23e187ccc9e4df549..52ecc7c7136eff33e8ffec7e315dd509922acf3f 100644 (file)
@@ -137,6 +137,31 @@ static void ctdb_health_callback(struct ctdb_context *ctdb, int status, void *p)
 }
 
 
+/*
+  called when the startup event script finishes
+ */
+static void ctdb_startup_callback(struct ctdb_context *ctdb, int status, void *p)
+{
+       if (status != 0) {
+               DEBUG(0,("startup event failed\n"));
+       } else if (status == 0) {
+               DEBUG(0,("startup event OK - enabling monitoring\n"));
+               ctdb->done_startup = true;
+       }
+
+       if (ctdb->done_startup) {
+               event_add_timed(ctdb->ev, ctdb->monitor_context, 
+                               timeval_zero(),
+                               ctdb_check_health, ctdb);
+       } else {
+               event_add_timed(ctdb->ev, ctdb->monitor_context, 
+                               timeval_current_ofs(ctdb->tunable.monitor_interval, 0), 
+                               ctdb_check_health, ctdb);
+       }
+
+}
+
+
 /*
   see if the event scripts think we are healthy
  */
@@ -146,16 +171,25 @@ static void ctdb_check_health(struct event_context *ev, struct timed_event *te,
        struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
        int ret;
 
-       if (ctdb->monitoring_mode == CTDB_MONITORING_DISABLED) {
+       if (ctdb->monitoring_mode == CTDB_MONITORING_DISABLED && ctdb->done_startup) {
                event_add_timed(ctdb->ev, ctdb->monitor_context,
                                timeval_current_ofs(ctdb->tunable.monitor_interval, 0), 
                                ctdb_check_health, ctdb);
                return;
        }
        
-       ret = ctdb_event_script_callback(ctdb, 
-                                        timeval_current_ofs(ctdb->tunable.script_timeout, 0),
-                                        ctdb->monitor_context, ctdb_health_callback, ctdb, "monitor");
+       if (!ctdb->done_startup) {
+               ret = ctdb_event_script_callback(ctdb, 
+                                                timeval_current_ofs(ctdb->tunable.script_timeout, 0),
+                                                ctdb->monitor_context, ctdb_startup_callback, 
+                                                ctdb, "startup");
+       } else {
+               ret = ctdb_event_script_callback(ctdb, 
+                                                timeval_current_ofs(ctdb->tunable.script_timeout, 0),
+                                                ctdb->monitor_context, ctdb_health_callback, 
+                                                ctdb, "monitor");
+       }
+
        if (ret != 0) {
                DEBUG(0,("Unable to launch monitor event script\n"));
                event_add_timed(ctdb->ev, ctdb->monitor_context,