#include "../include/ctdb.h"
#include "../include/ctdb_private.h"
--static int timed_out = 0;
--
--static void timeout_func(struct event_context *ev, struct timed_event *te,
-- struct timeval t, void *private_data)
--{
-- timed_out = 1;
--}
++/*
++ private state of recovery daemon
++ */
++struct ctdb_recoverd {
++ struct ctdb_context *ctdb;
++ TALLOC_CTX *mem_ctx;
++ uint32_t last_culprit;
++ uint32_t culprit_counter;
++ struct timeval first_recover_time;
++ bool *banned_nodes;
++};
#define CONTROL_TIMEOUT() timeval_current_ofs(ctdb->tunable.recover_timeout, 0)
#define MONITOR_TIMEOUT() timeval_current_ofs(ctdb->tunable.recover_interval, 0)
++/*
++ change recovery mode on all nodes
++ */
static int set_recovery_mode(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap, uint32_t rec_mode)
{
int j, ret;
/* set recovery mode to active on all nodes */
for (j=0; j<nodemap->num; j++) {
/* dont change it for nodes that are unavailable */
-- if (!(nodemap->nodes[j].flags&NODE_FLAGS_CONNECTED)) {
++ if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) {
continue;
}
return 0;
}
++/*
++ change recovery master on all node
++ */
static int set_recovery_master(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap, uint32_t vnn)
{
int j, ret;
/* set recovery master to vnn on all nodes */
for (j=0; j<nodemap->num; j++) {
/* dont change it for nodes that are unavailable */
-- if (!(nodemap->nodes[j].flags&NODE_FLAGS_CONNECTED)) {
++ if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) {
continue;
}
return 0;
}
--static int create_missing_remote_databases(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap, uint32_t vnn, struct ctdb_dbid_map *dbmap, TALLOC_CTX *mem_ctx)
++
++/*
++ ensure all other nodes have attached to any databases that we have
++ */
++static int create_missing_remote_databases(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap,
++ uint32_t vnn, struct ctdb_dbid_map *dbmap, TALLOC_CTX *mem_ctx)
{
int i, j, db, ret;
struct ctdb_dbid_map *remote_dbmap;
continue;
}
/* dont check nodes that are unavailable */
-- if (!(nodemap->nodes[j].flags&NODE_FLAGS_CONNECTED)) {
++ if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) {
continue;
}
}
--static int create_missing_local_databases(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap, uint32_t vnn, struct ctdb_dbid_map **dbmap, TALLOC_CTX *mem_ctx)
++/*
++ ensure we are attached to any databases that anyone else is attached to
++ */
++static int create_missing_local_databases(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap,
++ uint32_t vnn, struct ctdb_dbid_map **dbmap, TALLOC_CTX *mem_ctx)
{
int i, j, db, ret;
struct ctdb_dbid_map *remote_dbmap;
continue;
}
/* dont check nodes that are unavailable */
-- if (!(nodemap->nodes[j].flags&NODE_FLAGS_CONNECTED)) {
++ if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) {
continue;
}
}
--static int pull_all_remote_databases(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap, uint32_t vnn, struct ctdb_dbid_map *dbmap, TALLOC_CTX *mem_ctx)
++/*
++ pull all the remote database contents into ours
++ */
++static int pull_all_remote_databases(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap,
++ uint32_t vnn, struct ctdb_dbid_map *dbmap, TALLOC_CTX *mem_ctx)
{
int i, j, ret;
continue;
}
/* dont merge from nodes that are unavailable */
-- if (!(nodemap->nodes[j].flags&NODE_FLAGS_CONNECTED)) {
++ if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) {
continue;
}
ret = ctdb_ctrl_copydb(ctdb, CONTROL_TIMEOUT(), nodemap->nodes[j].vnn, vnn, dbmap->dbids[i], CTDB_LMASTER_ANY, mem_ctx);
}
--
++/*
++ change the dmaster on all databases to point to us
++ */
static int update_dmaster_on_all_databases(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap,
uint32_t vnn, struct ctdb_dbid_map *dbmap, TALLOC_CTX *mem_ctx)
{
for (i=0;i<dbmap->num;i++) {
for (j=0; j<nodemap->num; j++) {
/* dont repoint nodes that are unavailable */
-- if (!(nodemap->nodes[j].flags&NODE_FLAGS_CONNECTED)) {
++ if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) {
continue;
}
ret = ctdb_ctrl_setdmaster(ctdb, CONTROL_TIMEOUT(), nodemap->nodes[j].vnn, ctdb, dbmap->dbids[i], vnn);
return 0;
}
++
++/*
++ update flags on all active nodes
++ */
++static int update_flags_on_all_nodes(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap)
++{
++ int i;
++ for (i=0;i<nodemap->num;i++) {
++ struct ctdb_node_flag_change c;
++ TDB_DATA data;
++ uint32_t flags = nodemap->nodes[i].flags;
++
++ if (flags & NODE_FLAGS_DISCONNECTED) {
++ continue;
++ }
++
++ c.vnn = nodemap->nodes[i].vnn;
++ c.flags = nodemap->nodes[i].flags;
++
++ data.dptr = (uint8_t *)&c;
++ data.dsize = sizeof(c);
++
++ ctdb_send_message(ctdb, CTDB_BROADCAST_VNNMAP,
++ CTDB_SRVID_NODE_FLAGS_CHANGED, data);
++
++ }
++ return 0;
++}
++
/*
vacuum one database
*/
/* set rsn on non-empty records to max_rsn+1 */
for (i=0;i<nodemap->num;i++) {
-- if (!nodemap->nodes[i].flags & NODE_FLAGS_CONNECTED) {
++ if (nodemap->nodes[i].flags & NODE_FLAGS_INACTIVE) {
continue;
}
ret = ctdb_ctrl_set_rsn_nonempty(ctdb, CONTROL_TIMEOUT(), nodemap->nodes[i].vnn,
/* delete records with rsn < max_rsn+1 on all nodes */
for (i=0;i<nodemap->num;i++) {
-- if (!nodemap->nodes[i].flags & NODE_FLAGS_CONNECTED) {
++ if (nodemap->nodes[i].flags & NODE_FLAGS_INACTIVE) {
continue;
}
ret = ctdb_ctrl_delete_low_rsn(ctdb, CONTROL_TIMEOUT(), nodemap->nodes[i].vnn,
}
++/*
++ vacuum all attached databases
++ */
static int vacuum_all_databases(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap,
struct ctdb_dbid_map *dbmap)
{
}
++/*
++ push out all our database contents to all other nodes
++ */
static int push_all_local_databases(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap,
uint32_t vnn, struct ctdb_dbid_map *dbmap, TALLOC_CTX *mem_ctx)
{
continue;
}
/* dont push to nodes that are unavailable */
-- if (!(nodemap->nodes[j].flags&NODE_FLAGS_CONNECTED)) {
++ if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) {
continue;
}
ret = ctdb_ctrl_copydb(ctdb, CONTROL_TIMEOUT(), vnn, nodemap->nodes[j].vnn, dbmap->dbids[i], CTDB_LMASTER_ANY, mem_ctx);
}
++/*
++ ensure all nodes have the same vnnmap we do
++ */
static int update_vnnmap_on_all_nodes(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap,
uint32_t vnn, struct ctdb_vnn_map *vnnmap, TALLOC_CTX *mem_ctx)
{
/* push the new vnn map out to all the nodes */
for (j=0; j<nodemap->num; j++) {
/* dont push to nodes that are unavailable */
-- if (!(nodemap->nodes[j].flags&NODE_FLAGS_CONNECTED)) {
++ if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) {
continue;
}
return 0;
}
++
++struct ban_state {
++ struct ctdb_recoverd *rec;
++ uint32_t banned_node;
++};
++
++/*
++ called when a ban has timed out
++ */
++static void ctdb_ban_timeout(struct event_context *ev, struct timed_event *te, struct timeval t, void *p)
++{
++ struct ban_state *state = talloc_get_type(p, struct ban_state);
++ DEBUG(0,("Node %u in now unbanned\n", state->banned_node));
++
++ state->rec->banned_nodes[state->banned_node] = false;
++ talloc_free(state);
++}
++
++
/*
we are the recmaster, and recovery is needed - start a recovery run
*/
--static int do_recovery(struct ctdb_context *ctdb,
++static int do_recovery(struct ctdb_recoverd *rec,
TALLOC_CTX *mem_ctx, uint32_t vnn, uint32_t num_active,
-- struct ctdb_node_map *nodemap, struct ctdb_vnn_map *vnnmap)
++ struct ctdb_node_map *nodemap, struct ctdb_vnn_map *vnnmap,
++ uint32_t culprit)
{
++ struct ctdb_context *ctdb = rec->ctdb;
int i, j, ret;
uint32_t generation;
struct ctdb_dbid_map *dbmap;
++ if (rec->last_culprit != culprit ||
++ timeval_elapsed(&rec->first_recover_time) > ctdb->tunable.recovery_grace_period) {
++ /* either a new node is the culprit, or we've decide to forgive them */
++ rec->last_culprit = culprit;
++ rec->first_recover_time = timeval_current();
++ rec->culprit_counter = 0;
++ }
++ rec->culprit_counter++;
++
++ if (rec->culprit_counter > 2*nodemap->num) {
++ struct ban_state *state;
++
++ DEBUG(0,("Node %u has caused %u recoveries in %.0f seconds - banning it for %u seconds\n",
++ culprit, rec->culprit_counter, timeval_elapsed(&rec->first_recover_time),
++ ctdb->tunable.recovery_ban_period));
++ ctdb_ctrl_modflags(ctdb, CONTROL_TIMEOUT(), culprit, NODE_FLAGS_BANNED, 0);
++ rec->banned_nodes[culprit] = true;
++
++ state = talloc(rec->mem_ctx, struct ban_state);
++ CTDB_NO_MEMORY_FATAL(ctdb, state);
++
++ state->rec = rec;
++ state->banned_node = culprit;
++
++ event_add_timed(ctdb->ev, state, timeval_current_ofs(ctdb->tunable.recovery_ban_period, 0),
++ ctdb_ban_timeout, state);
++ }
++
if (!ctdb_recovery_lock(ctdb, true)) {
DEBUG(0,("Unable to get recovery lock - aborting recovery\n"));
return -1;
return -1;
}
-- DEBUG(0, (__location__ " Recovery initiated\n"));
++ DEBUG(0, (__location__ " Recovery initiated due to problem with node %u\n", culprit));
/* pick a new generation number */
generation = random();
-- /* build a new vnn map with all the currently active nodes */
++ /* build a new vnn map with all the currently active and
++ unbanned nodes */
generation = random();
vnnmap = talloc(mem_ctx, struct ctdb_vnn_map);
CTDB_NO_MEMORY(ctdb, vnnmap);
vnnmap->size = num_active;
vnnmap->map = talloc_array(vnnmap, uint32_t, vnnmap->size);
for (i=j=0;i<nodemap->num;i++) {
-- if (nodemap->nodes[i].flags&NODE_FLAGS_CONNECTED) {
++ if (!(nodemap->nodes[i].flags & NODE_FLAGS_INACTIVE)) {
vnnmap->map[j++] = nodemap->nodes[i].vnn;
}
}
return -1;
}
++ /*
++ update all nodes to have the same flags that we have
++ */
++ ret = update_flags_on_all_nodes(ctdb, nodemap);
++ if (ret != 0) {
++ DEBUG(0, (__location__ " Unable to update flags on all nodes\n"));
++ return -1;
++ }
++
/*
run a vacuum operation on empty records
*/
uint32_t vnn;
};
++
++/*
++ send out an election request
++ */
static int send_election_request(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, uint32_t vnn)
{
int ret;
handler for recovery master elections
*/
static void election_handler(struct ctdb_context *ctdb, uint64_t srvid,
-- TDB_DATA data, void *private_data)
++ TDB_DATA data, void *private_data)
{
++ struct ctdb_recoverd *rec = talloc_get_type(private_data, struct ctdb_recoverd);
int ret;
struct election_message *em = (struct election_message *)data.dptr;
TALLOC_CTX *mem_ctx;
return;
}
++ /* release any ban information */
++ talloc_free(rec->mem_ctx);
++ rec->mem_ctx = talloc_new(rec);
++ CTDB_NO_MEMORY_FATAL(rec->mem_ctx, rec->banned_nodes);
++
++ rec->last_culprit = (uint32_t)-1;
++ talloc_free(rec->banned_nodes);
++ rec->banned_nodes = talloc_zero_array(rec, bool, ctdb->num_nodes);
++ CTDB_NO_MEMORY_FATAL(ctdb, rec->banned_nodes);
++
talloc_free(mem_ctx);
return;
}
++/*
++ called when ctdb_wait_timeout should finish
++ */
++static void ctdb_wait_handler(struct event_context *ev, struct timed_event *te,
++ struct timeval yt, void *p)
++{
++ uint32_t *timed_out = (uint32_t *)p;
++ (*timed_out) = 1;
++}
++
++/*
++ wait for a given number of seconds
++ */
++static void ctdb_wait_timeout(struct ctdb_context *ctdb, uint32_t secs)
++{
++ uint32_t timed_out = 0;
++ event_add_timed(ctdb->ev, ctdb, timeval_current_ofs(secs, 0), ctdb_wait_handler, &timed_out);
++ while (!timed_out) {
++ event_loop_once(ctdb->ev);
++ }
++}
++
++/*
++ force the start of the election process
++ */
static void force_election(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, uint32_t vnn, struct ctdb_node_map *nodemap)
{
int ret;
}
/* wait for a few seconds to collect all responses */
-- timed_out = 0;
-- event_add_timed(ctdb->ev, mem_ctx, timeval_current_ofs(ctdb->tunable.election_timeout, 0),
-- timeout_func, ctdb);
-- while (!timed_out) {
-- event_loop_once(ctdb->ev);
++ ctdb_wait_timeout(ctdb, ctdb->tunable.election_timeout);
++}
++
++
++
++/*
++ handler for when a node changes its flags
++*/
++static void monitor_handler(struct ctdb_context *ctdb, uint64_t srvid,
++ TDB_DATA data, void *private_data)
++{
++ int ret;
++ struct ctdb_node_flag_change *c = (struct ctdb_node_flag_change *)data.dptr;
++ struct ctdb_node_map *nodemap=NULL;
++ TALLOC_CTX *tmp_ctx;
++ int i;
++
++ if (data.dsize != sizeof(*c)) {
++ DEBUG(0,(__location__ "Invalid data in ctdb_node_flag_change\n"));
++ return;
++ }
++
++ tmp_ctx = talloc_new(ctdb);
++ CTDB_NO_MEMORY_VOID(ctdb, tmp_ctx);
++
++ ret = ctdb_ctrl_getnodemap(ctdb, CONTROL_TIMEOUT(), CTDB_CURRENT_NODE, tmp_ctx, &nodemap);
++
++ for (i=0;i<nodemap->num;i++) {
++ if (nodemap->nodes[i].vnn == c->vnn) break;
++ }
++
++ if (i == nodemap->num) {
++ DEBUG(0,(__location__ "Flag change for non-existant node %u\n", c->vnn));
++ talloc_free(tmp_ctx);
++ return;
++ }
++
++ if (nodemap->nodes[i].flags != c->flags) {
++ DEBUG(0,("Node %u has changed flags - now 0x%x\n", c->vnn, c->flags));
++ }
++
++ nodemap->nodes[i].flags = c->flags;
++
++ ret = ctdb_ctrl_getrecmaster(ctdb, CONTROL_TIMEOUT(),
++ CTDB_CURRENT_NODE, &ctdb->recovery_master);
++
++ if (ret == 0) {
++ ret = ctdb_ctrl_getrecmode(ctdb, CONTROL_TIMEOUT(),
++ CTDB_CURRENT_NODE, &ctdb->recovery_mode);
}
++
++ if (ret == 0 &&
++ ctdb->recovery_master == ctdb->vnn &&
++ ctdb->recovery_mode == CTDB_RECOVERY_NORMAL &&
++ ctdb->takeover.enabled) {
++ ret = ctdb_takeover_run(ctdb, nodemap);
++ if (ret != 0) {
++ DEBUG(0, (__location__ " Unable to setup public takeover addresses\n"));
++ }
++ }
++
++ talloc_free(tmp_ctx);
}
++
++
/*
the main monitoring loop
*/
struct ctdb_vnn_map *remote_vnnmap=NULL;
int i, j, ret;
bool need_takeover_run;
++ struct ctdb_recoverd *rec;
++
++ rec = talloc_zero(ctdb, struct ctdb_recoverd);
++ CTDB_NO_MEMORY_FATAL(ctdb, rec);
++
++ rec->ctdb = ctdb;
++ rec->banned_nodes = talloc_zero_array(rec, bool, ctdb->num_nodes);
++ CTDB_NO_MEMORY_FATAL(ctdb, rec->banned_nodes);
++
++ rec->mem_ctx = talloc_new(rec);
++ CTDB_NO_MEMORY_FATAL(ctdb, rec->mem_ctx);
++
++ /* register a message port for recovery elections */
++ ctdb_set_message_handler(ctdb, CTDB_SRVID_RECOVERY, election_handler, rec);
++
++ /* and one for when nodes are disabled/enabled */
++ ctdb_set_message_handler(ctdb, CTDB_SRVID_NODE_FLAGS_CHANGED, monitor_handler, rec);
again:
need_takeover_run = false;
}
/* we only check for recovery once every second */
-- timed_out = 0;
-- event_add_timed(ctdb->ev, mem_ctx, MONITOR_TIMEOUT(), timeout_func, ctdb);
-- while (!timed_out) {
-- event_loop_once(ctdb->ev);
-- }
++ ctdb_wait_timeout(ctdb, ctdb->tunable.recover_interval);
/* get relevant tunables */
ctdb_ctrl_get_tunable(ctdb, CONTROL_TIMEOUT(), CTDB_CURRENT_NODE,
"ElectionTimeout", &ctdb->tunable.election_timeout);
ctdb_ctrl_get_tunable(ctdb, CONTROL_TIMEOUT(), CTDB_CURRENT_NODE,
"TakeoverTimeout", &ctdb->tunable.takeover_timeout);
++ ctdb_ctrl_get_tunable(ctdb, CONTROL_TIMEOUT(), CTDB_CURRENT_NODE,
++ "RecoveryGracePeriod", &ctdb->tunable.recovery_grace_period);
++ ctdb_ctrl_get_tunable(ctdb, CONTROL_TIMEOUT(), CTDB_CURRENT_NODE,
++ "RecoveryBanPeriod", &ctdb->tunable.recovery_ban_period);
vnn = ctdb_ctrl_getvnn(ctdb, CONTROL_TIMEOUT(), CTDB_CURRENT_NODE);
if (vnn == (uint32_t)-1) {
goto again;
}
-- ctdb->vnn = vnn;
--
/* get the vnnmap */
ret = ctdb_ctrl_getvnnmap(ctdb, CONTROL_TIMEOUT(), vnn, mem_ctx, &vnnmap);
if (ret != 0) {
/* count how many active nodes there are */
num_active = 0;
for (i=0; i<nodemap->num; i++) {
-- if (nodemap->nodes[i].flags&NODE_FLAGS_CONNECTED) {
++ if (rec->banned_nodes[nodemap->nodes[i].vnn]) {
++ nodemap->nodes[i].flags |= NODE_FLAGS_BANNED;
++ } else {
++ nodemap->nodes[i].flags &= ~NODE_FLAGS_BANNED;
++ }
++ if (!(nodemap->nodes[i].flags & NODE_FLAGS_INACTIVE)) {
num_active++;
}
}
goto again;
}
-- if (!(nodemap->nodes[j].flags&NODE_FLAGS_CONNECTED)) {
++ if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) {
DEBUG(0, ("Recmaster node %u no longer available. Force reelection\n", nodemap->nodes[j].vnn));
force_election(ctdb, mem_ctx, vnn, nodemap);
goto again;
/* verify that all active nodes agree that we are the recmaster */
for (j=0; j<nodemap->num; j++) {
-- if (!(nodemap->nodes[j].flags&NODE_FLAGS_CONNECTED)) {
++ if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) {
continue;
}
if (nodemap->nodes[j].vnn == vnn) {
and not in recovery mode
*/
for (j=0; j<nodemap->num; j++) {
-- if (!(nodemap->nodes[j].flags&NODE_FLAGS_CONNECTED)) {
++ if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) {
continue;
}
DEBUG(0, ("Unable to get recmode from node %u\n", vnn));
goto again;
}
-- if (recmode!=CTDB_RECOVERY_NORMAL) {
++ if (recmode != CTDB_RECOVERY_NORMAL) {
DEBUG(0, (__location__ " Node:%u was in recovery mode. Restart recovery process\n", nodemap->nodes[j].vnn));
-- do_recovery(ctdb, mem_ctx, vnn, num_active, nodemap, vnnmap);
++ do_recovery(rec, mem_ctx, vnn, num_active, nodemap, vnnmap, nodemap->nodes[j].vnn);
goto again;
}
}
they are the same as for this node
*/
for (j=0; j<nodemap->num; j++) {
-- if (!(nodemap->nodes[j].flags&NODE_FLAGS_CONNECTED)) {
++ if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) {
continue;
}
if (nodemap->nodes[j].vnn == vnn) {
then this is a good reason to try recovery
*/
if (remote_nodemap->num != nodemap->num) {
-- DEBUG(0, (__location__ " Remote node:%u has different node count. %u vs %u of the local node\n", nodemap->nodes[j].vnn, remote_nodemap->num, nodemap->num));
-- do_recovery(ctdb, mem_ctx, vnn, num_active, nodemap, vnnmap);
++ DEBUG(0, (__location__ " Remote node:%u has different node count. %u vs %u of the local node\n",
++ nodemap->nodes[j].vnn, remote_nodemap->num, nodemap->num));
++ do_recovery(rec, mem_ctx, vnn, num_active, nodemap, vnnmap, nodemap->nodes[j].vnn);
goto again;
}
*/
for (i=0;i<nodemap->num;i++) {
if ((remote_nodemap->nodes[i].vnn != nodemap->nodes[i].vnn)
-- || ((remote_nodemap->nodes[i].flags&NODE_FLAGS_CONNECTED) !=
-- (nodemap->nodes[i].flags & NODE_FLAGS_CONNECTED))) {
-- DEBUG(0, (__location__ " Remote node:%u has different nodemap.\n", nodemap->nodes[j].vnn));
-- do_recovery(ctdb, mem_ctx, vnn, num_active, nodemap, vnnmap);
++ || ((remote_nodemap->nodes[i].flags & NODE_FLAGS_INACTIVE) !=
++ (nodemap->nodes[i].flags & NODE_FLAGS_INACTIVE))) {
++ DEBUG(0, (__location__ " Remote node:%u has different nodemap.\n",
++ nodemap->nodes[j].vnn));
++ do_recovery(rec, mem_ctx, vnn, num_active, nodemap, vnnmap, nodemap->nodes[j].vnn);
goto again;
}
}
as there are active nodes or we will have to do a recovery
*/
if (vnnmap->size != num_active) {
-- DEBUG(0, (__location__ " The vnnmap count is different from the number of active nodes. %u vs %u\n", vnnmap->size, num_active));
-- do_recovery(ctdb, mem_ctx, vnn, num_active, nodemap, vnnmap);
++ DEBUG(0, (__location__ " The vnnmap count is different from the number of active nodes. %u vs %u\n",
++ vnnmap->size, num_active));
++ do_recovery(rec, mem_ctx, vnn, num_active, nodemap, vnnmap, ctdb->vnn);
goto again;
}
the vnnmap.
*/
for (j=0; j<nodemap->num; j++) {
-- if (!(nodemap->nodes[j].flags & NODE_FLAGS_CONNECTED)) {
++ if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) {
continue;
}
if (nodemap->nodes[j].vnn == vnn) {
break;
}
}
-- if (i==vnnmap->size) {
-- DEBUG(0, (__location__ " Node %u is active in the nodemap but did not exist in the vnnmap\n", nodemap->nodes[j].vnn));
-- do_recovery(ctdb, mem_ctx, vnn, num_active, nodemap, vnnmap);
++ if (i == vnnmap->size) {
++ DEBUG(0, (__location__ " Node %u is active in the nodemap but did not exist in the vnnmap\n",
++ nodemap->nodes[j].vnn));
++ do_recovery(rec, mem_ctx, vnn, num_active, nodemap, vnnmap, nodemap->nodes[j].vnn);
goto again;
}
}
and are from the same generation
*/
for (j=0; j<nodemap->num; j++) {
-- if (!(nodemap->nodes[j].flags & NODE_FLAGS_CONNECTED)) {
++ if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) {
continue;
}
if (nodemap->nodes[j].vnn == vnn) {
/* verify the vnnmap generation is the same */
if (vnnmap->generation != remote_vnnmap->generation) {
-- DEBUG(0, (__location__ " Remote node %u has different generation of vnnmap. %u vs %u (ours)\n", nodemap->nodes[j].vnn, remote_vnnmap->generation, vnnmap->generation));
-- do_recovery(ctdb, mem_ctx, vnn, num_active, nodemap, vnnmap);
++ DEBUG(0, (__location__ " Remote node %u has different generation of vnnmap. %u vs %u (ours)\n",
++ nodemap->nodes[j].vnn, remote_vnnmap->generation, vnnmap->generation));
++ do_recovery(rec, mem_ctx, vnn, num_active, nodemap, vnnmap, nodemap->nodes[j].vnn);
goto again;
}
/* verify the vnnmap size is the same */
if (vnnmap->size != remote_vnnmap->size) {
-- DEBUG(0, (__location__ " Remote node %u has different size of vnnmap. %u vs %u (ours)\n", nodemap->nodes[j].vnn, remote_vnnmap->size, vnnmap->size));
-- do_recovery(ctdb, mem_ctx, vnn, num_active, nodemap, vnnmap);
++ DEBUG(0, (__location__ " Remote node %u has different size of vnnmap. %u vs %u (ours)\n",
++ nodemap->nodes[j].vnn, remote_vnnmap->size, vnnmap->size));
++ do_recovery(rec, mem_ctx, vnn, num_active, nodemap, vnnmap, nodemap->nodes[j].vnn);
goto again;
}
/* verify the vnnmap is the same */
for (i=0;i<vnnmap->size;i++) {
if (remote_vnnmap->map[i] != vnnmap->map[i]) {
-- DEBUG(0, (__location__ " Remote node %u has different vnnmap.\n", nodemap->nodes[j].vnn));
-- do_recovery(ctdb, mem_ctx, vnn, num_active, nodemap, vnnmap);
++ DEBUG(0, (__location__ " Remote node %u has different vnnmap.\n",
++ nodemap->nodes[j].vnn));
++ do_recovery(rec, mem_ctx, vnn, num_active, nodemap, vnnmap, nodemap->nodes[j].vnn);
goto again;
}
}
}
--
/*
-- handler for when a node changes its flags
--*/
--static void monitor_handler(struct ctdb_context *ctdb, uint64_t srvid,
-- TDB_DATA data, void *private_data)
--{
-- int ret;
-- struct ctdb_node_flag_change *c = (struct ctdb_node_flag_change *)data.dptr;
-- struct ctdb_node_map *nodemap=NULL;
-- TALLOC_CTX *tmp_ctx;
-- int i;
--
-- if (data.dsize != sizeof(*c)) {
-- DEBUG(0,(__location__ "Invalid data in ctdb_node_flag_change\n"));
-- return;
-- }
--
-- tmp_ctx = talloc_new(ctdb);
-- CTDB_NO_MEMORY_VOID(ctdb, tmp_ctx);
--
-- ret = ctdb_ctrl_getnodemap(ctdb, CONTROL_TIMEOUT(), CTDB_CURRENT_NODE, tmp_ctx, &nodemap);
--
-- for (i=0;i<nodemap->num;i++) {
-- if (nodemap->nodes[i].vnn == c->vnn) break;
-- }
--
-- if (i == nodemap->num) {
-- DEBUG(0,(__location__ "Flag change for non-existant node %u\n", c->vnn));
-- talloc_free(tmp_ctx);
-- return;
-- }
--
-- if (c->vnn != ctdb->vnn) {
-- DEBUG(0,("Node %u has changed flags - now 0x%x\n", c->vnn, c->flags));
-- }
--
-- nodemap->nodes[i].flags = c->flags;
--
-- ret = ctdb_ctrl_getrecmaster(ctdb, CONTROL_TIMEOUT(),
-- CTDB_CURRENT_NODE, &ctdb->recovery_master);
--
-- if (ret == 0) {
-- ret = ctdb_ctrl_getrecmode(ctdb, CONTROL_TIMEOUT(),
-- CTDB_CURRENT_NODE, &ctdb->recovery_mode);
-- }
--
-- if (ret == 0 &&
-- ctdb->recovery_master == ctdb->vnn &&
-- ctdb->recovery_mode == CTDB_RECOVERY_NORMAL &&
-- ctdb->takeover.enabled) {
-- ret = ctdb_takeover_run(ctdb, nodemap);
-- if (ret != 0) {
-- DEBUG(0, (__location__ " Unable to setup public takeover addresses\n"));
-- }
-- }
--
-- talloc_free(tmp_ctx);
--}
--
--
++ event handler for when the main ctdbd dies
++ */
static void ctdb_recoverd_parent(struct event_context *ev, struct fd_event *fde,
uint16_t flags, void *private_data)
{
_exit(1);
}
++
++
++/*
++ startup the recovery daemon as a child of the main ctdb daemon
++ */
int ctdb_start_recoverd(struct ctdb_context *ctdb)
{
int ret;
exit(1);
}
-- /* register a message port for recovery elections */
-- ctdb_set_message_handler(ctdb, CTDB_SRVID_RECOVERY, election_handler, NULL);
--
-- /* and one for when nodes are disabled/enabled */
-- ctdb_set_message_handler(ctdb, CTDB_SRVID_NODE_FLAGS_CHANGED, monitor_handler, NULL);
--
monitor_cluster(ctdb);
DEBUG(0,("ERROR: ctdb_recoverd finished!?\n"));