Add a tuneable so that when scripts starts to hang/timeout, we can make the node unhealthy instead of banned
uint32_t tickle_update_interval;
uint32_t script_timeout;
uint32_t script_ban_count; /* ban after this many consec timeouts*/
+ uint32_t script_unhealthy_on_timeout; /* don't ban on timeout; set node unhealthy */
uint32_t recovery_grace_period;
uint32_t recovery_ban_period;
uint32_t database_hash_size;
{ "TickleUpdateInterval",20, offsetof(struct ctdb_tunable, tickle_update_interval) },
{ "EventScriptTimeout", 60, offsetof(struct ctdb_tunable, script_timeout) },
{ "EventScriptBanCount", 5, offsetof(struct ctdb_tunable, script_ban_count) },
+ { "EventScriptUnhealthyOnTimeout", 0, offsetof(struct ctdb_tunable, script_unhealthy_on_timeout) },
{ "RecoveryGracePeriod", 120, offsetof(struct ctdb_tunable, recovery_grace_period) },
{ "RecoveryBanPeriod", 300, offsetof(struct ctdb_tunable, recovery_ban_period) },
{ "DatabaseHashSize", 10000, offsetof(struct ctdb_tunable, database_hash_size) },
ctdb->event_script_timeouts++;
if (ctdb->event_script_timeouts > ctdb->tunable.script_ban_count) {
- ctdb->event_script_timeouts = 0;
- DEBUG(DEBUG_ERR, ("Maximum timeout count %u reached for eventscript. Banning self for %d seconds\n", ctdb->tunable.script_ban_count, ctdb->tunable.recovery_ban_period));
- ctdb_ban_self(ctdb, ctdb->tunable.recovery_ban_period);
- callback(ctdb, -1, private_data);
+ if (ctdb->tunable.script_unhealthy_on_timeout != 0) {
+ DEBUG(DEBUG_ERR, ("Maximum timeout count %u reached for eventscript. Making node unhealthy\n", ctdb->tunable.script_ban_count));
+ callback(ctdb, -ETIME, private_data);
+ } else {
+ ctdb->event_script_timeouts = 0;
+ DEBUG(DEBUG_ERR, ("Maximum timeout count %u reached for eventscript. Banning self for %d seconds\n", ctdb->tunable.script_ban_count, ctdb->tunable.recovery_ban_period));
+ ctdb_ban_self(ctdb, ctdb->tunable.recovery_ban_period);
+ callback(ctdb, -1, private_data);
+ }
} else {
callback(ctdb, 0, private_data);
}