return 0;
}
-static struct ctdb_public_ip_list *
-ctdb_fetch_remote_public_ips(struct ctdb_context *ctdb,
- TALLOC_CTX *mem_ctx,
- struct ctdb_node_map_old *nodemap,
- uint32_t public_ip_flags)
-{
- int j, ret;
- struct ctdb_public_ip_list_old *ip_list;
- struct ctdb_public_ip_list *public_ips;
-
- public_ips = talloc_zero_array(mem_ctx,
- struct ctdb_public_ip_list,
- nodemap->num);
- if (public_ips == NULL) {
- DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
- return NULL;
- }
-
- for (j = 0; j < nodemap->num; j++) {
- if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) {
- continue;
- }
-
- /* Retrieve the list of public IPs from the
- * node. Flags says whether it is known or
- * available. */
- ret = ctdb_ctrl_get_public_ips_flags(
- ctdb, TAKEOVER_TIMEOUT(), j, public_ips,
- public_ip_flags, &ip_list);
- if (ret != 0) {
- DEBUG(DEBUG_ERR,
- ("Failed to read public IPs from node: %u\n", j));
- talloc_free(public_ips);
- return NULL;
- }
- public_ips[j].num = ip_list->num;
- if (ip_list->num == 0) {
- talloc_free(ip_list);
- continue;
- }
- public_ips[j].ip = talloc_zero_array(public_ips,
- struct ctdb_public_ip,
- ip_list->num);
- if (public_ips[j].ip == NULL) {
- DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
- talloc_free(public_ips);
- return NULL;
- }
- memcpy(public_ips[j].ip, &ip_list->ips[0],
- sizeof(struct ctdb_public_ip) * ip_list->num);
- talloc_free(ip_list);
- }
-
- return public_ips;
-}
-
-static struct ctdb_node_map *
-ctdb_node_map_old_to_new(TALLOC_CTX *mem_ctx,
- const struct ctdb_node_map_old *old)
-{
- struct ctdb_node_map *new;
-
- new = talloc(mem_ctx, struct ctdb_node_map);
- if (new == NULL) {
- DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
- return NULL;
- }
- new->num = old->num;
- new->node = talloc_zero_array(new,
- struct ctdb_node_and_flags, new->num);
- memcpy(new->node, &old->nodes[0],
- sizeof(struct ctdb_node_and_flags) * new->num);
-
- return new;
-}
-
-
-static bool set_ipflags(struct ctdb_context *ctdb,
- struct ipalloc_state *ipalloc_state,
- struct ctdb_node_map_old *nodemap)
-{
- struct ctdb_node_map *new;
-
- new = ctdb_node_map_old_to_new(ipalloc_state, nodemap);
- if (new == NULL) {
- return false;
- }
-
- ipalloc_set_node_flags(ipalloc_state, new);
-
- talloc_free(new);
-
- return true;
-}
-
-static enum ipalloc_algorithm
-determine_algorithm(const struct ctdb_tunable_list *tunables)
-{
- switch (tunables->ip_alloc_algorithm) {
- case 0:
- return IPALLOC_DETERMINISTIC;
- case 1:
- return IPALLOC_NONDETERMINISTIC;
- case 2:
- return IPALLOC_LCP2;
- default:
- return IPALLOC_LCP2;
- };
-}
-
-struct takeover_callback_data {
- uint32_t num_nodes;
- unsigned int *fail_count;
-};
-
-static struct takeover_callback_data *
-takeover_callback_data_init(TALLOC_CTX *mem_ctx,
- uint32_t num_nodes)
-{
- static struct takeover_callback_data *takeover_data;
-
- takeover_data = talloc_zero(mem_ctx, struct takeover_callback_data);
- if (takeover_data == NULL) {
- DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
- return NULL;
- }
-
- takeover_data->fail_count = talloc_zero_array(takeover_data,
- unsigned int, num_nodes);
- if (takeover_data->fail_count == NULL) {
- DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
- talloc_free(takeover_data);
- return NULL;
- }
-
- takeover_data->num_nodes = num_nodes;
-
- return takeover_data;
-}
-
-static void takeover_run_fail_callback(struct ctdb_context *ctdb,
- uint32_t node_pnn, int32_t res,
- TDB_DATA outdata, void *callback_data)
-{
- struct takeover_callback_data *cd =
- talloc_get_type_abort(callback_data,
- struct takeover_callback_data);
-
- if (node_pnn >= cd->num_nodes) {
- DEBUG(DEBUG_ERR, (__location__ " invalid PNN %u\n", node_pnn));
- return;
- }
-
- if (cd->fail_count[node_pnn] == 0) {
- DEBUG(DEBUG_ERR,
- ("Node %u failed the takeover run\n", node_pnn));
- }
-
- cd->fail_count[node_pnn]++;
-}
-
-static void takeover_run_process_failures(struct ctdb_context *ctdb,
- struct takeover_callback_data *tcd)
-{
- unsigned int max_fails = 0;
- uint32_t max_pnn = -1;
- uint32_t i;
-
- for (i = 0; i < tcd->num_nodes; i++) {
- if (tcd->fail_count[i] > max_fails) {
- max_pnn = i;
- max_fails = tcd->fail_count[i];
- }
- }
-
- if (max_fails > 0) {
- int ret;
- TDB_DATA data;
-
- DEBUG(DEBUG_ERR,
- ("Sending banning credits to %u with fail count %u\n",
- max_pnn, max_fails));
-
- data.dptr = (uint8_t *)&max_pnn;
- data.dsize = sizeof(uint32_t);
- ret = ctdb_client_send_message(ctdb,
- CTDB_BROADCAST_CONNECTED,
- CTDB_SRVID_BANNING,
- data);
- if (ret != 0) {
- DEBUG(DEBUG_ERR,
- ("Failed to set banning credits for node %u\n",
- max_pnn));
- }
- }
-}
-
-/*
- * Recalculate the allocation of public IPs to nodes and have the
- * nodes host their allocated addresses.
- *
- * - Initialise IP allocation state. Pass:
- + algorithm to be used;
- + whether IP rebalancing ("failback") should be done (this uses a
- cluster-wide configuration variable and only the value form the
- master node is used); and
- * + list of nodes to force rebalance (internal structure, currently
- * no way to fetch, only used by LCP2 for nodes that have had new
- * IP addresses added).
- * - Set IP flags for IP allocation based on node map and tunables
- * NoIPTakeover/NoIPHostOnAllDisabled from all connected nodes
- * (tunable fetching done separately so values can be faked in unit
- * testing)
- * - Retrieve known and available IP addresses (done separately so
- * values can be faked in unit testing)
- * - Use ipalloc_set_public_ips() to set known and available IP
- addresses for allocation
- * - If cluster can't host IP addresses then early exit
- * - Run IP allocation algorithm
- * - Send RELEASE_IP to all nodes for IPs they should not host
- * - Send TAKE_IP to all nodes for IPs they should host
- * - Send IPREALLOCATED to all nodes (with backward compatibility hack)
- */
-int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map_old *nodemap,
- uint32_t *force_rebalance_nodes)
-{
- int i, ret;
- struct ctdb_public_ip ip;
- uint32_t *nodes;
- struct public_ip_list *all_ips, *tmp_ip;
- TDB_DATA data;
- struct timeval timeout;
- struct client_async_data *async_data;
- struct ctdb_client_control_state *state;
- TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
- struct ipalloc_state *ipalloc_state;
- struct ctdb_public_ip_list *known_ips, *available_ips;
- struct takeover_callback_data *takeover_data;
-
- /* Initialise fail callback data to be used with
- * takeover_run_fail_callback(). A failure in any of the
- * following steps will cause an early return, so this can be
- * reused for each of those steps without re-initialising. */
- takeover_data = takeover_callback_data_init(tmp_ctx,
- nodemap->num);
- if (takeover_data == NULL) {
- talloc_free(tmp_ctx);
- return -1;
- }
-
- /* Default timeout for early jump to IPREALLOCATED. See below
- * for explanation of 3 times... */
- timeout = timeval_current_ofs(3 * ctdb->tunable.takeover_timeout, 0);
-
- /*
- * ip failover is completely disabled, just send out the
- * ipreallocated event.
- */
- if (ctdb->tunable.disable_ip_failover != 0) {
- goto ipreallocated;
- }
-
- ipalloc_state = ipalloc_state_init(
- tmp_ctx, ctdb->num_nodes,
- determine_algorithm(&ctdb->tunable),
- (ctdb->tunable.no_ip_takeover != 0),
- (ctdb->tunable.no_ip_failback != 0),
- (ctdb->tunable.no_ip_host_on_all_disabled != 0),
- force_rebalance_nodes);
- if (ipalloc_state == NULL) {
- talloc_free(tmp_ctx);
- return -1;
- }
-
- if (!set_ipflags(ctdb, ipalloc_state, nodemap)) {
- DEBUG(DEBUG_ERR,
- ("Failed to set IP flags - aborting takeover run\n"));
- talloc_free(tmp_ctx);
- return -1;
- }
-
- /* Fetch known/available public IPs from each active node */
- /* Fetch lists of known public IPs from all nodes */
- known_ips = ctdb_fetch_remote_public_ips(ctdb, ipalloc_state,
- nodemap, 0);
- if (known_ips == NULL) {
- DEBUG(DEBUG_ERR, ("Failed to read known public IPs\n"));
- talloc_free(tmp_ctx);
- return -1;
- }
- available_ips = ctdb_fetch_remote_public_ips(
- ctdb, ipalloc_state, nodemap,
- CTDB_PUBLIC_IP_FLAGS_ONLY_AVAILABLE);
- if (available_ips == NULL) {
- DEBUG(DEBUG_ERR, ("Failed to read available public IPs\n"));
- talloc_free(tmp_ctx);
- return -1;
- }
-
- ipalloc_set_public_ips(ipalloc_state, known_ips, available_ips);
-
- if (! ipalloc_can_host_ips(ipalloc_state)) {
- DEBUG(DEBUG_WARNING,("No nodes available to host public IPs yet\n"));
- goto ipreallocated;
- }
-
- /* Do the IP reassignment calculations */
- all_ips = ipalloc(ipalloc_state);
- if (all_ips == NULL) {
- talloc_free(tmp_ctx);
- return -1;
- }
-
- /* Now tell all nodes to release any public IPs should not
- * host. This will be a NOOP on nodes that don't currently
- * hold the given IP.
- */
- async_data = talloc_zero(tmp_ctx, struct client_async_data);
- CTDB_NO_MEMORY_FATAL(ctdb, async_data);
-
- async_data->fail_callback = takeover_run_fail_callback;
- async_data->callback_data = takeover_data;
-
- ZERO_STRUCT(ip); /* Avoid valgrind warnings for union */
-
- /* Each of the following stages (RELEASE_IP, TAKEOVER_IP,
- * IPREALLOCATED) notionally has a timeout of TakeoverTimeout
- * seconds. However, RELEASE_IP can take longer due to TCP
- * connection killing, so sometimes needs more time.
- * Therefore, use a cumulative timeout of TakeoverTimeout * 3
- * seconds across all 3 stages. No explicit expiry checks are
- * needed before each stage because tevent is smart enough to
- * fire the timeouts even if they are in the past. Initialise
- * this here so it explicitly covers the stages we're
- * interested in but, in particular, not the time taken by the
- * ipalloc().
- */
- timeout = timeval_current_ofs(3 * ctdb->tunable.takeover_timeout, 0);
-
- /* Send a RELEASE_IP to all nodes that should not be hosting
- * each IP. For each IP, all but one of these will be
- * redundant. However, the redundant ones are used to tell
- * nodes which node should be hosting the IP so that commands
- * like "ctdb ip" can display a particular nodes idea of who
- * is hosting what. */
- for (i=0;i<nodemap->num;i++) {
- /* don't talk to unconnected nodes, but do talk to banned nodes */
- if (nodemap->nodes[i].flags & NODE_FLAGS_DISCONNECTED) {
- continue;
- }
-
- for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
- if (tmp_ip->pnn == nodemap->nodes[i].pnn) {
- /* This node should be serving this
- vnn so don't tell it to release the ip
- */
- continue;
- }
- ip.pnn = tmp_ip->pnn;
- ip.addr = tmp_ip->addr;
-
- data.dsize = sizeof(ip);
- data.dptr = (uint8_t *)&ip;
- state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
- 0, CTDB_CONTROL_RELEASE_IP, 0,
- data, async_data,
- &timeout, NULL);
- if (state == NULL) {
- DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_RELEASE_IP to node %u\n", nodemap->nodes[i].pnn));
- talloc_free(tmp_ctx);
- return -1;
- }
-
- ctdb_client_async_add(async_data, state);
- }
- }
- if (ctdb_client_async_wait(ctdb, async_data) != 0) {
- DEBUG(DEBUG_ERR,
- ("Async control CTDB_CONTROL_RELEASE_IP failed\n"));
- goto fail;
- }
- talloc_free(async_data);
-
-
- /* For each IP, send a TAKOVER_IP to the node that should be
- * hosting it. Many of these will often be redundant (since
- * the allocation won't have changed) but they can be useful
- * to recover from inconsistencies. */
- async_data = talloc_zero(tmp_ctx, struct client_async_data);
- CTDB_NO_MEMORY_FATAL(ctdb, async_data);
-
- async_data->fail_callback = takeover_run_fail_callback;
- async_data->callback_data = takeover_data;
-
- for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
- if (tmp_ip->pnn == -1) {
- /* this IP won't be taken over */
- continue;
- }
-
- ip.pnn = tmp_ip->pnn;
- ip.addr = tmp_ip->addr;
-
- data.dsize = sizeof(ip);
- data.dptr = (uint8_t *)&ip;
- state = ctdb_control_send(ctdb, tmp_ip->pnn,
- 0, CTDB_CONTROL_TAKEOVER_IP, 0,
- data, async_data, &timeout, NULL);
- if (state == NULL) {
- DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_TAKEOVER_IP to node %u\n", tmp_ip->pnn));
- talloc_free(tmp_ctx);
- return -1;
- }
-
- ctdb_client_async_add(async_data, state);
- }
- if (ctdb_client_async_wait(ctdb, async_data) != 0) {
- DEBUG(DEBUG_ERR,
- ("Async control CTDB_CONTROL_TAKEOVER_IP failed\n"));
- goto fail;
- }
-
-ipreallocated:
- /*
- * Tell all nodes to run eventscripts to process the
- * "ipreallocated" event. This can do a lot of things,
- * including restarting services to reconfigure them if public
- * IPs have moved. Once upon a time this event only used to
- * update natgw.
- */
- nodes = list_of_connected_nodes(ctdb, nodemap, tmp_ctx, true);
- ret = ctdb_client_async_control(ctdb, CTDB_CONTROL_IPREALLOCATED,
- nodes, 0, timeout,
- false, tdb_null,
- NULL, takeover_run_fail_callback,
- takeover_data);
- if (ret != 0) {
- DEBUG(DEBUG_ERR,
- ("Async CTDB_CONTROL_IPREALLOCATED control failed\n"));
- goto fail;
- }
-
- talloc_free(tmp_ctx);
- return ret;
-
-fail:
- takeover_run_process_failures(ctdb, takeover_data);
- talloc_free(tmp_ctx);
- return -1;
-}
-
-
/*
destroy a ctdb_client_ip structure
*/