TDB_DATA data;
if (status != 0) {
- struct ctdb_node *node = ctdb->nodes[ctdb->pnn];
-
if (status == -ETIME) {
ctdb_ban_self(ctdb);
}
ctdb_vnn_iface_string(state->vnn)));
ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
- node->flags |= NODE_FLAGS_UNHEALTHY;
talloc_free(state);
return;
}
TALLOC_FREE(ctdb->ip_tree);
ctdb->ip_tree = trbt_create(ctdb, 0);
- for (i=0; i < ctdb->num_nodes; i++) {
-
- if (ctdb->nodes[i]->flags & NODE_FLAGS_DELETED) {
- continue;
- }
+ if (ipalloc_state->known_public_ips == NULL) {
+ DEBUG(DEBUG_ERR, ("Known public IPs not set\n"));
+ return NULL;
+ }
- /* there were no public ips for this node */
- if (ipalloc_state->known_public_ips == NULL) {
- continue;
- }
+ for (i=0; i < ipalloc_state->num; i++) {
public_ips = &ipalloc_state->known_public_ips[i];
CTDB_NO_MEMORY_NULL(ctdb, tmp_ip);
/* Do not use information about IP addresses hosted
* on other nodes, it may not be accurate */
- if (public_ips->ip[j].pnn == ctdb->nodes[i]->pnn) {
+ if (public_ips->ip[j].pnn == i) {
tmp_ip->pnn = public_ips->ip[j].pnn;
} else {
tmp_ip->pnn = -1;
struct takeover_callback_data {
uint32_t num_nodes;
- bool *node_failed;
- client_async_callback fail_callback;
- void *fail_callback_data;
+ unsigned int *fail_count;
};
static struct takeover_callback_data *
takeover_callback_data_init(TALLOC_CTX *mem_ctx,
- uint32_t num_nodes,
- client_async_callback fail_callback,
- void *callback_data)
+ uint32_t num_nodes)
{
static struct takeover_callback_data *takeover_data;
return NULL;
}
- takeover_data->node_failed = talloc_zero_array(takeover_data,
- bool, num_nodes);
- if (takeover_data->node_failed == NULL) {
+ takeover_data->fail_count = talloc_zero_array(takeover_data,
+ unsigned int, num_nodes);
+ if (takeover_data->fail_count == NULL) {
DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
talloc_free(takeover_data);
return NULL;
}
takeover_data->num_nodes = num_nodes;
- takeover_data->fail_callback = fail_callback;
- takeover_data->fail_callback_data = callback_data;
return takeover_data;
}
return;
}
- if (!cd->node_failed[node_pnn]) {
+ if (cd->fail_count[node_pnn] == 0) {
DEBUG(DEBUG_ERR,
("Node %u failed the takeover run\n", node_pnn));
- cd->node_failed[node_pnn] = true;
- cd->fail_callback(ctdb, node_pnn, res, outdata,
- cd->fail_callback_data);
+ }
+
+ cd->fail_count[node_pnn]++;
+}
+
+static void takeover_run_process_failures(struct ctdb_context *ctdb,
+ struct takeover_callback_data *tcd)
+{
+ unsigned int max_fails = 0;
+ uint32_t max_pnn = -1;
+ uint32_t i;
+
+ for (i = 0; i < tcd->num_nodes; i++) {
+ if (tcd->fail_count[i] > max_fails) {
+ max_pnn = i;
+ max_fails = tcd->fail_count[i];
+ }
+ }
+
+ if (max_fails > 0) {
+ int ret;
+ TDB_DATA data;
+
+ DEBUG(DEBUG_ERR,
+ ("Sending banning credits to %u with fail count %u\n",
+ max_pnn, max_fails));
+
+ data.dptr = (uint8_t *)&max_pnn;
+ data.dsize = sizeof(uint32_t);
+ ret = ctdb_client_send_message(ctdb,
+ CTDB_BROADCAST_CONNECTED,
+ CTDB_SRVID_BANNING,
+ data);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Failed to set banning credits for node %u\n",
+ max_pnn));
+ }
}
}
* - Send IPREALLOCATED to all nodes (with backward compatibility hack)
*/
int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map_old *nodemap,
- uint32_t *force_rebalance_nodes,
- client_async_callback fail_callback, void *callback_data)
+ uint32_t *force_rebalance_nodes)
{
int i, ret;
struct ctdb_public_ip ip;
* following steps will cause an early return, so this can be
* reused for each of those steps without re-initialising. */
takeover_data = takeover_callback_data_init(tmp_ctx,
- nodemap->num,
- fail_callback,
- callback_data);
+ nodemap->num);
if (takeover_data == NULL) {
talloc_free(tmp_ctx);
return -1;
}
}
if (ctdb_client_async_wait(ctdb, async_data) != 0) {
- DEBUG(DEBUG_ERR,(__location__ " Async control CTDB_CONTROL_RELEASE_IP failed\n"));
- talloc_free(tmp_ctx);
- return -1;
+ DEBUG(DEBUG_ERR,
+ ("Async control CTDB_CONTROL_RELEASE_IP failed\n"));
+ goto fail;
}
talloc_free(async_data);
ctdb_client_async_add(async_data, state);
}
if (ctdb_client_async_wait(ctdb, async_data) != 0) {
- DEBUG(DEBUG_ERR,(__location__ " Async control CTDB_CONTROL_TAKEOVER_IP failed\n"));
- talloc_free(tmp_ctx);
- return -1;
+ DEBUG(DEBUG_ERR,
+ ("Async control CTDB_CONTROL_TAKEOVER_IP failed\n"));
+ goto fail;
}
ipreallocated:
if (ret != 0) {
DEBUG(DEBUG_ERR,
("Async CTDB_CONTROL_IPREALLOCATED control failed\n"));
+ goto fail;
}
talloc_free(tmp_ctx);
return ret;
+
+fail:
+ takeover_run_process_failures(ctdb, takeover_data);
+ talloc_free(tmp_ctx);
+ return -1;
}