Interface monitoring: add a event to trigger every 30 seconds to check that all inter...
authorRonnie Sahlberg <ronniesahlberg@gmail.com>
Tue, 6 Sep 2011 07:02:19 +0000 (17:02 +1000)
committerMichael Adam <obnox@samba.org>
Fri, 6 Jun 2014 13:00:38 +0000 (15:00 +0200)
This will make it much easier to root-cause problems such as
S1029023
when an external application deleted the interface while it is still is in use by ctdbd.
(cherry picked from commit 9abf9c919a7e6789695490e2c3de56c21b63fa57)

include/ctdb_private.h
server/ctdb_takeover.c

index 3b1500790effa060ee7e33ecf0d4ab625980a676..aceeb8e9c23da3022f806d674e327af857da909f 100644 (file)
@@ -402,6 +402,7 @@ struct ctdb_context {
        uint32_t recovery_mode;
        TALLOC_CTX *tickle_update_context;
        TALLOC_CTX *keepalive_ctx;
+       TALLOC_CTX *check_public_ifaces_ctx;
        struct ctdb_tunable tunable;
        enum ctdb_freeze_mode freeze_mode[NUM_DB_PRIORITIES+1];
        struct ctdb_freeze_handle *freeze_handles[NUM_DB_PRIORITIES+1];
index b51e8f0b79204fbaa5195a1dad00f633706cef6e..ffa222debf19bd89072c996de9ad8d06619e4d1b 100644 (file)
@@ -937,6 +937,51 @@ int ctdb_set_event_script_dir(struct ctdb_context *ctdb, const char *script_dir)
        return 0;
 }
 
+static void ctdb_check_interfaces_event(struct event_context *ev, struct timed_event *te, 
+                                 struct timeval t, void *private_data)
+{
+       struct ctdb_context *ctdb = talloc_get_type(private_data, 
+                                                       struct ctdb_context);
+       struct ctdb_vnn *vnn;
+
+       for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
+               int i;
+
+               for (i=0; vnn->ifaces[i] != NULL; i++) {
+                       if (!ctdb_sys_check_iface_exists(vnn->ifaces[i])) {
+                               DEBUG(DEBUG_CRIT,("Interface %s does not exist but is used by public ip %s\n",
+                                       vnn->ifaces[i],
+                                       ctdb_addr_to_str(&vnn->public_address)));
+                       }
+               }
+       }
+
+       event_add_timed(ctdb->ev, ctdb->check_public_ifaces_ctx, 
+               timeval_current_ofs(30, 0), 
+               ctdb_check_interfaces_event, ctdb);
+}
+
+
+static int ctdb_start_monitoring_interfaces(struct ctdb_context *ctdb)
+{
+       if (ctdb->check_public_ifaces_ctx != NULL) {
+               talloc_free(ctdb->check_public_ifaces_ctx);
+               ctdb->check_public_ifaces_ctx = NULL;
+       }
+
+       ctdb->check_public_ifaces_ctx = talloc_new(ctdb);
+       if (ctdb->check_public_ifaces_ctx == NULL) {
+               ctdb_fatal(ctdb, "failed to allocate context for checking interfaces");
+       }
+
+       event_add_timed(ctdb->ev, ctdb->check_public_ifaces_ctx, 
+               timeval_current_ofs(30, 0), 
+               ctdb_check_interfaces_event, ctdb);
+
+       return 0;
+}
+
+
 /*
   setup the public address lists from a file
 */
@@ -999,6 +1044,9 @@ int ctdb_set_public_addresses(struct ctdb_context *ctdb, const char *alist)
                }
        }
 
+
+       ctdb_start_monitoring_interfaces(ctdb);
+
        talloc_free(lines);
        return 0;
 }