uint32_t event_script_timeouts; /* counting how many consecutive times an eventscript has timedout */
TALLOC_CTX *eventscripts_ctx; /* a context to hold data for the RUN_EVENTSCRIPTS control */
uint32_t *recd_ping_count;
+ TALLOC_CTX *release_ips_ctx; /* a context used to automatically drop all IPs if we fail to recover the node */
};
struct ctdb_db_context {
void ctdb_canonicalize_ip(const ctdb_sock_addr *ip, ctdb_sock_addr *cip);
int32_t ctdb_control_recd_ping(struct ctdb_context *ctdb);
+int32_t ctdb_control_set_recmaster(struct ctdb_context *ctdb, uint32_t opcode, TDB_DATA indata);
extern int script_log_level;
}
case CTDB_CONTROL_SET_RECMASTER: {
- CHECK_CONTROL_DATA_SIZE(sizeof(uint32_t));
- if (ctdb->freeze_mode != CTDB_FREEZE_FROZEN) {
- DEBUG(DEBUG_NOTICE,("Attempt to set recmaster when not frozen\n"));
- return -1;
- }
- ctdb->recovery_master = ((uint32_t *)(&indata.dptr[0]))[0];
- return 0;
+ return ctdb_control_set_recmaster(ctdb, opcode, indata);
}
case CTDB_CONTROL_GET_RECMASTER:
return;
}
+static void
+ctdb_drop_all_ips_event(struct event_context *ev, struct timed_event *te,
+ struct timeval t, void *private_data)
+{
+ struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
+
+ DEBUG(DEBUG_INFO,(__location__ " Been in recovery mode for too long. Dropping all IPS\n"));
+ talloc_free(ctdb->release_ips_ctx);
+ ctdb->release_ips_ctx = NULL;
+
+ ctdb_release_all_ips(ctdb);
+}
+
/*
set the recovery mode
*/
struct ctdb_set_recmode_state *state;
pid_t parent = getpid();
+ /* if we enter recovery but stay in recovery for too long
+ we will eventually drop all our ip addresses
+ */
+ if (recmode == CTDB_RECOVERY_NORMAL) {
+ talloc_free(ctdb->release_ips_ctx);
+ ctdb->release_ips_ctx = NULL;
+ } else {
+ talloc_free(ctdb->release_ips_ctx);
+ ctdb->release_ips_ctx = talloc_new(ctdb);
+ CTDB_NO_MEMORY(ctdb, ctdb->release_ips_ctx);
+
+ event_add_timed(ctdb->ev, ctdb->release_ips_ctx, timeval_current_ofs(5,0), ctdb_drop_all_ips_event, ctdb);
+ }
+
+
if (ctdb->freeze_mode != CTDB_FREEZE_FROZEN) {
DEBUG(DEBUG_ERR,("Attempt to change recovery mode to %u when not frozen\n",
recmode));
return 0;
}
+
+
+int32_t ctdb_control_set_recmaster(struct ctdb_context *ctdb, uint32_t opcode, TDB_DATA indata)
+{
+ CHECK_CONTROL_DATA_SIZE(sizeof(uint32_t));
+ if (ctdb->freeze_mode != CTDB_FREEZE_FROZEN) {
+ DEBUG(DEBUG_NOTICE,("Attempt to set recmaster when not frozen\n"));
+ return -1;
+ }
+ ctdb->recovery_master = ((uint32_t *)(&indata.dptr[0]))[0];
+ return 0;
+}
if (!ctdb_sys_have_ip(&vnn->public_address)) {
continue;
}
+ if (vnn->pnn == ctdb->pnn) {
+ vnn->pnn = -1;
+ }
ctdb_event_script(ctdb, "releaseip %s %s %u",
vnn->iface,
talloc_strdup(ctdb, ctdb_addr_to_str(&vnn->public_address)),
if (ctdb->recovery_mode != CTDB_RECOVERY_NORMAL) {
/* we guarantee that only some specifically allowed event scripts are run
while in recovery */
- const char *allowed_scripts[] = {"startrecovery", "shutdown" };
+ const char *allowed_scripts[] = {"startrecovery", "shutdown", "releaseip" };
int i;
for (i=0;i<ARRAY_SIZE(allowed_scripts);i++) {
- if (strcmp(options, allowed_scripts[i]) == 0) break;
+ if (strncmp(options, allowed_scripts[i], strlen(allowed_scripts[i])) == 0) break;
}
if (i == ARRAY_SIZE(allowed_scripts)) {
- DEBUG(0,("Refusing to run event scripts with option '%s' while in recovery\n",
+ DEBUG(DEBUG_ERR,("Refusing to run event scripts with option '%s' while in recovery\n",
options));
return -1;
}
static int control_recmaster(struct ctdb_context *ctdb, int argc, const char **argv)
{
int ret;
- uint32_t recmode, recmaster;
- int mypnn;
+ uint32_t recmaster;
ret = ctdb_ctrl_getrecmaster(ctdb, ctdb, TIMELIMIT(), options.pnn, &recmaster);
if (ret != 0) {