From wolfgang Mueller
authorRonnie Sahlberg <ronniesahlberg@gmail.com>
Tue, 20 Oct 2009 01:59:48 +0000 (12:59 +1100)
committerRonnie Sahlberg <ronniesahlberg@gmail.com>
Tue, 20 Oct 2009 01:59:48 +0000 (12:59 +1100)
Add a tuneable so that when scripts starts to hang/timeout, we can make the node unhealthy instead of banned

include/ctdb_private.h
server/ctdb_tunables.c
server/eventscript.c

index af77556ebbda5b1f4ee9fdd36981f4499e8994f7..35430fc50eb56ce89e45762cccb2636c2c9f1601 100644 (file)
@@ -99,6 +99,7 @@ struct ctdb_tunable {
        uint32_t tickle_update_interval;
        uint32_t script_timeout;
        uint32_t script_ban_count; /* ban after this many consec timeouts*/
+       uint32_t script_unhealthy_on_timeout; /* don't ban on timeout; set node unhealthy */
        uint32_t recovery_grace_period;
        uint32_t recovery_ban_period;
        uint32_t database_hash_size;
index 519a7b72fdaeefb9b18008ff7fad89ae67d1b3c7..90918c8c933ea5ac45f62bea3590218536892eb5 100644 (file)
@@ -39,6 +39,7 @@ static const struct {
        { "TickleUpdateInterval",20,  offsetof(struct ctdb_tunable, tickle_update_interval) },
        { "EventScriptTimeout",  60,  offsetof(struct ctdb_tunable, script_timeout) },
        { "EventScriptBanCount",  5,  offsetof(struct ctdb_tunable, script_ban_count) },
+       { "EventScriptUnhealthyOnTimeout", 0, offsetof(struct ctdb_tunable, script_unhealthy_on_timeout) },
        { "RecoveryGracePeriod", 120,  offsetof(struct ctdb_tunable, recovery_grace_period) },
        { "RecoveryBanPeriod",  300,  offsetof(struct ctdb_tunable, recovery_ban_period) },
        { "DatabaseHashSize", 10000,  offsetof(struct ctdb_tunable, database_hash_size) },
index 3d139c0c0f32b1e8aba17bc8a443d117c424772a..f4c362615c2432d81750079a747399152251ca25 100644 (file)
@@ -705,10 +705,15 @@ static void ctdb_event_script_timeout(struct event_context *ev, struct timed_eve
 
                ctdb->event_script_timeouts++;
                if (ctdb->event_script_timeouts > ctdb->tunable.script_ban_count) {
-                       ctdb->event_script_timeouts = 0;
-                       DEBUG(DEBUG_ERR, ("Maximum timeout count %u reached for eventscript. Banning self for %d seconds\n", ctdb->tunable.script_ban_count, ctdb->tunable.recovery_ban_period));
-                       ctdb_ban_self(ctdb, ctdb->tunable.recovery_ban_period);
-                       callback(ctdb, -1, private_data);
+                       if (ctdb->tunable.script_unhealthy_on_timeout != 0) {
+                               DEBUG(DEBUG_ERR, ("Maximum timeout count %u reached for eventscript. Making node unhealthy\n", ctdb->tunable.script_ban_count));
+                               callback(ctdb, -ETIME, private_data);
+                       } else {
+                               ctdb->event_script_timeouts = 0;
+                               DEBUG(DEBUG_ERR, ("Maximum timeout count %u reached for eventscript. Banning self for %d seconds\n", ctdb->tunable.script_ban_count, ctdb->tunable.recovery_ban_period));
+                               ctdb_ban_self(ctdb, ctdb->tunable.recovery_ban_period);
+                               callback(ctdb, -1, private_data);
+                       }
                } else {
                        callback(ctdb, 0, private_data);
                }