#include "lib/util/dlinklist.h"
#include "lib/util/debug.h"
#include "lib/util/samba_util.h"
+#include "lib/util/util_process.h"
#include "ctdb_private.h"
#include "ctdb_client.h"
-#include "ctdb_logging.h"
#include "common/rb_tree.h"
#include "common/reqid.h"
#include "common/system.h"
#include "common/common.h"
+#include "common/logging.h"
+#include "server/ipalloc.h"
#define TAKEOVER_TIMEOUT() timeval_current_ofs(ctdb->tunable.takeover_timeout,0)
#define CTDB_ARP_INTERVAL 1
#define CTDB_ARP_REPEAT 3
-/* Flags used in IP allocation algorithms. */
-struct ctdb_ipflags {
- bool noiptakeover;
- bool noiphost;
- enum ctdb_runstate runstate;
-};
-
struct ctdb_interface {
struct ctdb_interface *prev, *next;
const char *name;
{
struct ctdb_interface *i;
- /* Verify that we dont have an entry for this ip yet */
+ if (strlen(iface) > CTDB_IFACE_SIZE) {
+ DEBUG(DEBUG_ERR, ("Interface name too long \"%s\"\n", iface));
+ return -1;
+ }
+
+ /* Verify that we don't have an entry for this ip yet */
for (i=ctdb->ifaces;i;i=i->next) {
if (strcmp(i->name, iface) == 0) {
return 0;
* foolproof. One alternative is reference counting, where the logic
* is distributed and can, therefore, be broken in multiple places.
* Another alternative is to build a red-black tree of interfaces that
- * can have addresses (by walking ctdb->vnn and ctdb->single_ip_vnn
- * once) and then walking ctdb->ifaces once and deleting those not in
- * the tree. Let's go to one of those if the naive implementation
- * causes problems... :-)
+ * can have addresses (by walking ctdb->vnn once) and then walking
+ * ctdb->ifaces once and deleting those not in the tree. Let's go to
+ * one of those if the naive implementation causes problems... :-)
*/
static void ctdb_remove_orphaned_ifaces(struct ctdb_context *ctdb,
struct ctdb_vnn *vnn)
continue;
}
- /* Is the "single IP" on this interface? */
- if ((ctdb->single_ip_vnn != NULL) &&
- (ctdb->single_ip_vnn->ifaces[0] != NULL) &&
- (strcmp(i->name, ctdb->single_ip_vnn->ifaces[0]) == 0)) {
- /* Found, next interface please... */
- continue;
- }
/* Search for a vnn with this interface. */
found = false;
for (tv=ctdb->vnn; tv; tv=tv->next) {
{
int i;
+ /* Nodes that are not RUNNING can not host IPs */
+ if (ctdb->runstate != CTDB_RUNSTATE_RUNNING) {
+ return false;
+ }
+
if (vnn->delete_pending) {
return false;
}
TDB_DATA data;
if (status != 0) {
- struct ctdb_node *node = ctdb->nodes[ctdb->pnn];
-
if (status == -ETIME) {
ctdb_ban_self(ctdb);
}
ctdb_vnn_iface_string(state->vnn)));
ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
- node->flags |= NODE_FLAGS_UNHEALTHY;
talloc_free(state);
return;
}
return 0;
}
-/*
- kill any clients that are registered with a IP that is being released
- */
-static void release_kill_clients(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
-{
- struct ctdb_client_ip *ip;
-
- DEBUG(DEBUG_INFO,("release_kill_clients for ip %s\n",
- ctdb_addr_to_str(addr)));
-
- for (ip=ctdb->client_ip_list; ip; ip=ip->next) {
- ctdb_sock_addr tmp_addr;
-
- tmp_addr = ip->addr;
- DEBUG(DEBUG_INFO,("checking for client %u with IP %s\n",
- ip->client_id,
- ctdb_addr_to_str(&ip->addr)));
-
- if (ctdb_same_ip(&tmp_addr, addr)) {
- struct ctdb_client *client = reqid_find(ctdb->idr,
- ip->client_id,
- struct ctdb_client);
- DEBUG(DEBUG_INFO,("matched client %u with IP %s and pid %u\n",
- ip->client_id,
- ctdb_addr_to_str(&ip->addr),
- client->pid));
-
- if (client->pid != 0) {
- DEBUG(DEBUG_INFO,(__location__ " Killing client pid %u for IP %s on client_id %u\n",
- (unsigned)client->pid,
- ctdb_addr_to_str(addr),
- ip->client_id));
- kill(client->pid, SIGKILL);
- }
- }
- }
-}
-
static void do_delete_ip(struct ctdb_context *ctdb, struct ctdb_vnn *vnn)
{
DLIST_REMOVE(ctdb->vnn, vnn);
ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_RELEASE_IP, data);
- /* kill clients that have registered with this IP */
- release_kill_clients(ctdb, state->addr);
-
ctdb_vnn_unassign_iface(ctdb, state->vnn);
/* Process the IP if it has been marked for deletion */
talloc_free(vnn->takeover_ctx);
vnn->takeover_ctx = NULL;
- /* Some ctdb tool commands (e.g. moveip, rebalanceip) send
+ /* Some ctdb tool commands (e.g. moveip) send
* lazy multicast to drop an IP from any node that isn't the
* intended new node. The following causes makes ctdbd ignore
* a release for any address it doesn't host.
}
free(tmp);
- /* Verify that we dont have an entry for this ip yet */
+ /* Verify that we don't have an entry for this ip yet */
for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
if (ctdb_same_sockaddr(addr, &vnn->public_address)) {
DEBUG(DEBUG_CRIT,("Same ip '%s' specified multiple times in the public address list \n",
return 0;
}
-int ctdb_set_single_public_ip(struct ctdb_context *ctdb,
- const char *iface,
- const char *ip)
-{
- struct ctdb_vnn *svnn;
- struct ctdb_interface *cur = NULL;
- bool ok;
- int ret;
-
- svnn = talloc_zero(ctdb, struct ctdb_vnn);
- CTDB_NO_MEMORY(ctdb, svnn);
-
- svnn->ifaces = talloc_array(svnn, const char *, 2);
- CTDB_NO_MEMORY(ctdb, svnn->ifaces);
- svnn->ifaces[0] = talloc_strdup(svnn->ifaces, iface);
- CTDB_NO_MEMORY(ctdb, svnn->ifaces[0]);
- svnn->ifaces[1] = NULL;
-
- ok = parse_ip(ip, iface, 0, &svnn->public_address);
- if (!ok) {
- talloc_free(svnn);
- return -1;
- }
-
- ret = ctdb_add_local_iface(ctdb, svnn->ifaces[0]);
- if (ret != 0) {
- DEBUG(DEBUG_CRIT, (__location__ " failed to add iface[%s] "
- "for single_ip[%s]\n",
- svnn->ifaces[0],
- ctdb_addr_to_str(&svnn->public_address)));
- talloc_free(svnn);
- return -1;
- }
-
- /* assume the single public ip interface is initially "good" */
- cur = ctdb_find_iface(ctdb, iface);
- if (cur == NULL) {
- DEBUG(DEBUG_CRIT,("Can not find public interface %s used by --single-public-ip", iface));
- return -1;
- }
- cur->link_up = true;
-
- ret = ctdb_vnn_assign_iface(ctdb, svnn);
- if (ret != 0) {
- talloc_free(svnn);
- return -1;
- }
-
- ctdb->single_ip_vnn = svnn;
- return 0;
-}
-
-struct public_ip_list {
- struct public_ip_list *next;
- uint32_t pnn;
- ctdb_sock_addr addr;
-};
-
-/* Given a physical node, return the number of
- public addresses that is currently assigned to this node.
-*/
-static int node_ip_coverage(struct ctdb_context *ctdb, int32_t pnn,
- struct public_ip_list *ips)
-{
- int num=0;
-
- for (;ips;ips=ips->next) {
- if (ips->pnn == pnn) {
- num++;
- }
- }
- return num;
-}
-
-
-/* Can the given node host the given IP: is the public IP known to the
- * node and is NOIPHOST unset?
-*/
-static bool can_node_host_ip(struct ctdb_context *ctdb, int32_t pnn,
- struct ctdb_ipflags ipflags,
- struct public_ip_list *ip)
-{
- struct ctdb_public_ip_list_old *public_ips;
- int i;
-
- if (ipflags.noiphost) {
- return false;
- }
-
- public_ips = ctdb->nodes[pnn]->available_public_ips;
-
- if (public_ips == NULL) {
- return false;
- }
-
- for (i=0; i<public_ips->num; i++) {
- if (ctdb_same_ip(&ip->addr, &public_ips->ips[i].addr)) {
- /* yes, this node can serve this public ip */
- return true;
- }
- }
-
- return false;
-}
-
-static bool can_node_takeover_ip(struct ctdb_context *ctdb, int32_t pnn,
- struct ctdb_ipflags ipflags,
- struct public_ip_list *ip)
-{
- if (ipflags.noiptakeover) {
- return false;
- }
-
- return can_node_host_ip(ctdb, pnn, ipflags, ip);
-}
-
-/* search the node lists list for a node to takeover this ip.
- pick the node that currently are serving the least number of ips
- so that the ips get spread out evenly.
-*/
-static int find_takeover_node(struct ctdb_context *ctdb,
- struct ctdb_ipflags *ipflags,
- struct public_ip_list *ip,
- struct public_ip_list *all_ips)
-{
- int pnn, min=0, num;
- int i, numnodes;
-
- numnodes = talloc_array_length(ipflags);
- pnn = -1;
- for (i=0; i<numnodes; i++) {
- /* verify that this node can serve this ip */
- if (!can_node_takeover_ip(ctdb, i, ipflags[i], ip)) {
- /* no it couldnt so skip to the next node */
- continue;
- }
-
- num = node_ip_coverage(ctdb, i, all_ips);
- /* was this the first node we checked ? */
- if (pnn == -1) {
- pnn = i;
- min = num;
- } else {
- if (num < min) {
- pnn = i;
- min = num;
- }
- }
- }
- if (pnn == -1) {
- DEBUG(DEBUG_WARNING,(__location__ " Could not find node to take over public address '%s'\n",
- ctdb_addr_to_str(&ip->addr)));
-
- return -1;
- }
-
- ip->pnn = pnn;
- return 0;
-}
-
-#define IP_KEYLEN 4
-static uint32_t *ip_key(ctdb_sock_addr *ip)
-{
- static uint32_t key[IP_KEYLEN];
-
- bzero(key, sizeof(key));
-
- switch (ip->sa.sa_family) {
- case AF_INET:
- key[3] = htonl(ip->ip.sin_addr.s_addr);
- break;
- case AF_INET6: {
- uint32_t *s6_a32 = (uint32_t *)&(ip->ip6.sin6_addr.s6_addr);
- key[0] = htonl(s6_a32[0]);
- key[1] = htonl(s6_a32[1]);
- key[2] = htonl(s6_a32[2]);
- key[3] = htonl(s6_a32[3]);
- break;
- }
- default:
- DEBUG(DEBUG_ERR, (__location__ " ERROR, unknown family passed :%u\n", ip->sa.sa_family));
- return key;
- }
-
- return key;
-}
-
static void *add_ip_callback(void *parm, void *data)
{
struct public_ip_list *this_ip = parm;
return 0;
}
+static int verify_remote_ip_allocation(struct ctdb_context *ctdb,
+ struct ctdb_public_ip_list *ips,
+ uint32_t pnn);
+
+static int ctdb_reload_remote_public_ips(struct ctdb_context *ctdb,
+ struct ipalloc_state *ipalloc_state,
+ struct ctdb_node_map_old *nodemap)
+{
+ int j;
+ int ret;
+ struct ctdb_public_ip_list_old *ip_list;
+
+ if (ipalloc_state->num != nodemap->num) {
+ DEBUG(DEBUG_ERR,
+ (__location__
+ " ipalloc_state->num (%d) != nodemap->num (%d) invalid param\n",
+ ipalloc_state->num, nodemap->num));
+ return -1;
+ }
+
+ for (j=0; j<nodemap->num; j++) {
+ if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) {
+ continue;
+ }
+
+ /* Retrieve the list of known public IPs from the node */
+ ret = ctdb_ctrl_get_public_ips_flags(ctdb,
+ TAKEOVER_TIMEOUT(),
+ j,
+ ipalloc_state->known_public_ips,
+ 0,
+ &ip_list);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Failed to read known public IPs from node: %u\n",
+ j));
+ return -1;
+ }
+ ipalloc_state->known_public_ips[j].num = ip_list->num;
+ /* This could be copied and freed. However, ip_list
+ * is allocated off ipalloc_state->known_public_ips,
+ * so this is a safe hack. This will go away in a
+ * while anyway... */
+ ipalloc_state->known_public_ips[j].ip = &ip_list->ips[0];
+
+ if (ctdb->do_checkpublicip) {
+ verify_remote_ip_allocation(
+ ctdb,
+ &ipalloc_state->known_public_ips[j],
+ j);
+ }
+
+ /* Retrieve the list of available public IPs from the node */
+ ret = ctdb_ctrl_get_public_ips_flags(ctdb,
+ TAKEOVER_TIMEOUT(),
+ j,
+ ipalloc_state->available_public_ips,
+ CTDB_PUBLIC_IP_FLAGS_ONLY_AVAILABLE,
+ &ip_list);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Failed to read available public IPs from node: %u\n",
+ j));
+ return -1;
+ }
+ ipalloc_state->available_public_ips[j].num = ip_list->num;
+ /* This could be copied and freed. However, ip_list
+ * is allocated off ipalloc_state->available_public_ips,
+ * so this is a safe hack. This will go away in a
+ * while anyway... */
+ ipalloc_state->available_public_ips[j].ip = &ip_list->ips[0];
+ }
+
+ return 0;
+}
+
static struct public_ip_list *
-create_merged_ip_list(struct ctdb_context *ctdb)
+create_merged_ip_list(struct ctdb_context *ctdb, struct ipalloc_state *ipalloc_state)
{
int i, j;
struct public_ip_list *ip_list;
- struct ctdb_public_ip_list_old *public_ips;
+ struct ctdb_public_ip_list *public_ips;
- if (ctdb->ip_tree != NULL) {
- talloc_free(ctdb->ip_tree);
- ctdb->ip_tree = NULL;
- }
+ TALLOC_FREE(ctdb->ip_tree);
ctdb->ip_tree = trbt_create(ctdb, 0);
- for (i=0;i<ctdb->num_nodes;i++) {
- public_ips = ctdb->nodes[i]->known_public_ips;
+ if (ipalloc_state->known_public_ips == NULL) {
+ DEBUG(DEBUG_ERR, ("Known public IPs not set\n"));
+ return NULL;
+ }
- if (ctdb->nodes[i]->flags & NODE_FLAGS_DELETED) {
- continue;
- }
+ for (i=0; i < ipalloc_state->num; i++) {
- /* there were no public ips for this node */
- if (public_ips == NULL) {
- continue;
- }
+ public_ips = &ipalloc_state->known_public_ips[i];
- for (j=0;j<public_ips->num;j++) {
+ for (j=0; j < public_ips->num; j++) {
struct public_ip_list *tmp_ip;
tmp_ip = talloc_zero(ctdb->ip_tree, struct public_ip_list);
CTDB_NO_MEMORY_NULL(ctdb, tmp_ip);
/* Do not use information about IP addresses hosted
* on other nodes, it may not be accurate */
- if (public_ips->ips[j].pnn == ctdb->nodes[i]->pnn) {
- tmp_ip->pnn = public_ips->ips[j].pnn;
+ if (public_ips->ip[j].pnn == i) {
+ tmp_ip->pnn = public_ips->ip[j].pnn;
} else {
tmp_ip->pnn = -1;
}
- tmp_ip->addr = public_ips->ips[j].addr;
+ tmp_ip->addr = public_ips->ip[j].addr;
tmp_ip->next = NULL;
trbt_insertarray32_callback(ctdb->ip_tree,
- IP_KEYLEN, ip_key(&public_ips->ips[j].addr),
+ IP_KEYLEN, ip_key(&public_ips->ip[j].addr),
add_ip_callback,
tmp_ip);
}
return ip_list;
}
-/*
- * This is the length of the longtest common prefix between the IPs.
- * It is calculated by XOR-ing the 2 IPs together and counting the
- * number of leading zeroes. The implementation means that all
- * addresses end up being 128 bits long.
- *
- * FIXME? Should we consider IPv4 and IPv6 separately given that the
- * 12 bytes of 0 prefix padding will hurt the algorithm if there are
- * lots of nodes and IP addresses?
- */
-static uint32_t ip_distance(ctdb_sock_addr *ip1, ctdb_sock_addr *ip2)
+static bool all_nodes_are_disabled(struct ctdb_node_map_old *nodemap)
{
- uint32_t ip1_k[IP_KEYLEN];
- uint32_t *t;
int i;
- uint32_t x;
- uint32_t distance = 0;
-
- memcpy(ip1_k, ip_key(ip1), sizeof(ip1_k));
- t = ip_key(ip2);
- for (i=0; i<IP_KEYLEN; i++) {
- x = ip1_k[i] ^ t[i];
- if (x == 0) {
- distance += 32;
- } else {
- /* Count number of leading zeroes.
- * FIXME? This could be optimised...
- */
- while ((x & (1 << 31)) == 0) {
- x <<= 1;
- distance += 1;
- }
+ for (i=0;i<nodemap->num;i++) {
+ if (!(nodemap->nodes[i].flags & (NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED))) {
+ /* Found one completely healthy node */
+ return false;
}
}
- return distance;
+ return true;
}
-/* Calculate the IP distance for the given IP relative to IPs on the
- given node. The ips argument is generally the all_ips variable
- used in the main part of the algorithm.
- */
-static uint32_t ip_distance_2_sum(ctdb_sock_addr *ip,
- struct public_ip_list *ips,
- int pnn)
-{
- struct public_ip_list *t;
- uint32_t d;
-
- uint32_t sum = 0;
-
- for (t=ips; t != NULL; t=t->next) {
- if (t->pnn != pnn) {
- continue;
- }
+struct get_tunable_callback_data {
+ const char *tunable;
+ uint32_t *out;
+ bool fatal;
+};
- /* Optimisation: We never calculate the distance
- * between an address and itself. This allows us to
- * calculate the effect of removing an address from a
- * node by simply calculating the distance between
- * that address and all of the exitsing addresses.
- * Moreover, we assume that we're only ever dealing
- * with addresses from all_ips so we can identify an
- * address via a pointer rather than doing a more
- * expensive address comparison. */
- if (&(t->addr) == ip) {
- continue;
- }
+static void get_tunable_callback(struct ctdb_context *ctdb, uint32_t pnn,
+ int32_t res, TDB_DATA outdata,
+ void *callback)
+{
+ struct get_tunable_callback_data *cd =
+ (struct get_tunable_callback_data *)callback;
+ int size;
- d = ip_distance(ip, &(t->addr));
- sum += d * d; /* Cheaper than pulling in math.h :-) */
+ if (res != 0) {
+ /* Already handled in fail callback */
+ return;
}
- return sum;
-}
-
-/* Return the LCP2 imbalance metric for addresses currently assigned
- to the given node.
- */
-static uint32_t lcp2_imbalance(struct public_ip_list * all_ips, int pnn)
-{
- struct public_ip_list *t;
-
- uint32_t imbalance = 0;
+ if (outdata.dsize != sizeof(uint32_t)) {
+ DEBUG(DEBUG_ERR,("Wrong size of returned data when reading \"%s\" tunable from node %d. Expected %d bytes but received %d bytes\n",
+ cd->tunable, pnn, (int)sizeof(uint32_t),
+ (int)outdata.dsize));
+ cd->fatal = true;
+ return;
+ }
- for (t=all_ips; t!=NULL; t=t->next) {
- if (t->pnn != pnn) {
- continue;
- }
- /* Pass the rest of the IPs rather than the whole
- all_ips input list.
- */
- imbalance += ip_distance_2_sum(&(t->addr), t->next, pnn);
+ size = talloc_array_length(cd->out);
+ if (pnn >= size) {
+ DEBUG(DEBUG_ERR,("Got %s reply from node %d but nodemap only has %d entries\n",
+ cd->tunable, pnn, size));
+ return;
}
- return imbalance;
+
+ cd->out[pnn] = *(uint32_t *)outdata.dptr;
}
-/* Allocate any unassigned IPs just by looping through the IPs and
- * finding the best node for each.
- */
-static void basic_allocate_unassigned(struct ctdb_context *ctdb,
- struct ctdb_ipflags *ipflags,
- struct public_ip_list *all_ips)
+static void get_tunable_fail_callback(struct ctdb_context *ctdb, uint32_t pnn,
+ int32_t res, TDB_DATA outdata,
+ void *callback)
{
- struct public_ip_list *tmp_ip;
+ struct get_tunable_callback_data *cd =
+ (struct get_tunable_callback_data *)callback;
- /* loop over all ip's and find a physical node to cover for
- each unassigned ip.
- */
- for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
- if (tmp_ip->pnn == -1) {
- if (find_takeover_node(ctdb, ipflags, tmp_ip, all_ips)) {
- DEBUG(DEBUG_WARNING,("Failed to find node to cover ip %s\n",
- ctdb_addr_to_str(&tmp_ip->addr)));
- }
- }
+ switch (res) {
+ case -ETIME:
+ DEBUG(DEBUG_ERR,
+ ("Timed out getting tunable \"%s\" from node %d\n",
+ cd->tunable, pnn));
+ cd->fatal = true;
+ break;
+ case -EINVAL:
+ case -1:
+ DEBUG(DEBUG_WARNING,
+ ("Tunable \"%s\" not implemented on node %d\n",
+ cd->tunable, pnn));
+ break;
+ default:
+ DEBUG(DEBUG_ERR,
+ ("Unexpected error getting tunable \"%s\" from node %d\n",
+ cd->tunable, pnn));
+ cd->fatal = true;
}
}
-/* Basic non-deterministic rebalancing algorithm.
- */
-static void basic_failback(struct ctdb_context *ctdb,
- struct ctdb_ipflags *ipflags,
- struct public_ip_list *all_ips,
- int num_ips)
+static uint32_t *get_tunable_from_nodes(struct ctdb_context *ctdb,
+ TALLOC_CTX *tmp_ctx,
+ struct ctdb_node_map_old *nodemap,
+ const char *tunable,
+ uint32_t default_value)
{
- int i, numnodes;
- int maxnode, maxnum, minnode, minnum, num, retries;
- struct public_ip_list *tmp_ip;
-
- numnodes = talloc_array_length(ipflags);
- retries = 0;
-
-try_again:
- maxnum=0;
- minnum=0;
+ TDB_DATA data;
+ struct ctdb_control_get_tunable *t;
+ uint32_t *nodes;
+ uint32_t *tvals;
+ struct get_tunable_callback_data callback_data;
+ int i;
- /* for each ip address, loop over all nodes that can serve
- this ip and make sure that the difference between the node
- serving the most and the node serving the least ip's are
- not greater than 1.
- */
- for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
- if (tmp_ip->pnn == -1) {
- continue;
- }
-
- /* Get the highest and lowest number of ips's served by any
- valid node which can serve this ip.
- */
- maxnode = -1;
- minnode = -1;
- for (i=0; i<numnodes; i++) {
- /* only check nodes that can actually serve this ip */
- if (!can_node_takeover_ip(ctdb, i, ipflags[i], tmp_ip)) {
- /* no it couldnt so skip to the next node */
- continue;
- }
-
- num = node_ip_coverage(ctdb, i, all_ips);
- if (maxnode == -1) {
- maxnode = i;
- maxnum = num;
- } else {
- if (num > maxnum) {
- maxnode = i;
- maxnum = num;
- }
- }
- if (minnode == -1) {
- minnode = i;
- minnum = num;
- } else {
- if (num < minnum) {
- minnode = i;
- minnum = num;
- }
- }
- }
- if (maxnode == -1) {
- DEBUG(DEBUG_WARNING,(__location__ " Could not find maxnode. May not be able to serve ip '%s'\n",
- ctdb_addr_to_str(&tmp_ip->addr)));
-
- continue;
- }
-
- /* if the spread between the smallest and largest coverage by
- a node is >=2 we steal one of the ips from the node with
- most coverage to even things out a bit.
- try to do this a limited number of times since we dont
- want to spend too much time balancing the ip coverage.
- */
- if ( (maxnum > minnum+1)
- && (retries < (num_ips + 5)) ){
- struct public_ip_list *tmp;
-
- /* Reassign one of maxnode's VNNs */
- for (tmp=all_ips;tmp;tmp=tmp->next) {
- if (tmp->pnn == maxnode) {
- (void)find_takeover_node(ctdb, ipflags, tmp, all_ips);
- retries++;
- goto try_again;;
- }
- }
- }
- }
-}
-
-static void lcp2_init(struct ctdb_context *tmp_ctx,
- struct ctdb_ipflags *ipflags,
- struct public_ip_list *all_ips,
- uint32_t *force_rebalance_nodes,
- uint32_t **lcp2_imbalances,
- bool **rebalance_candidates)
-{
- int i, numnodes;
- struct public_ip_list *tmp_ip;
-
- numnodes = talloc_array_length(ipflags);
-
- *rebalance_candidates = talloc_array(tmp_ctx, bool, numnodes);
- CTDB_NO_MEMORY_FATAL(tmp_ctx, *rebalance_candidates);
- *lcp2_imbalances = talloc_array(tmp_ctx, uint32_t, numnodes);
- CTDB_NO_MEMORY_FATAL(tmp_ctx, *lcp2_imbalances);
-
- for (i=0; i<numnodes; i++) {
- (*lcp2_imbalances)[i] = lcp2_imbalance(all_ips, i);
- /* First step: assume all nodes are candidates */
- (*rebalance_candidates)[i] = true;
- }
-
- /* 2nd step: if a node has IPs assigned then it must have been
- * healthy before, so we remove it from consideration. This
- * is overkill but is all we have because we don't maintain
- * state between takeover runs. An alternative would be to
- * keep state and invalidate it every time the recovery master
- * changes.
- */
- for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
- if (tmp_ip->pnn != -1) {
- (*rebalance_candidates)[tmp_ip->pnn] = false;
- }
- }
-
- /* 3rd step: if a node is forced to re-balance then
- we allow failback onto the node */
- if (force_rebalance_nodes == NULL) {
- return;
- }
- for (i = 0; i < talloc_array_length(force_rebalance_nodes); i++) {
- uint32_t pnn = force_rebalance_nodes[i];
- if (pnn >= numnodes) {
- DEBUG(DEBUG_ERR,
- (__location__ "unknown node %u\n", pnn));
- continue;
- }
-
- DEBUG(DEBUG_NOTICE,
- ("Forcing rebalancing of IPs to node %u\n", pnn));
- (*rebalance_candidates)[pnn] = true;
- }
-}
-
-/* Allocate any unassigned addresses using the LCP2 algorithm to find
- * the IP/node combination that will cost the least.
- */
-static void lcp2_allocate_unassigned(struct ctdb_context *ctdb,
- struct ctdb_ipflags *ipflags,
- struct public_ip_list *all_ips,
- uint32_t *lcp2_imbalances)
-{
- struct public_ip_list *tmp_ip;
- int dstnode, numnodes;
-
- int minnode;
- uint32_t mindsum, dstdsum, dstimbl, minimbl;
- struct public_ip_list *minip;
-
- bool should_loop = true;
- bool have_unassigned = true;
-
- numnodes = talloc_array_length(ipflags);
-
- while (have_unassigned && should_loop) {
- should_loop = false;
-
- DEBUG(DEBUG_DEBUG,(" ----------------------------------------\n"));
- DEBUG(DEBUG_DEBUG,(" CONSIDERING MOVES (UNASSIGNED)\n"));
-
- minnode = -1;
- mindsum = 0;
- minip = NULL;
-
- /* loop over each unassigned ip. */
- for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
- if (tmp_ip->pnn != -1) {
- continue;
- }
-
- for (dstnode=0; dstnode<numnodes; dstnode++) {
- /* only check nodes that can actually takeover this ip */
- if (!can_node_takeover_ip(ctdb, dstnode,
- ipflags[dstnode],
- tmp_ip)) {
- /* no it couldnt so skip to the next node */
- continue;
- }
-
- dstdsum = ip_distance_2_sum(&(tmp_ip->addr), all_ips, dstnode);
- dstimbl = lcp2_imbalances[dstnode] + dstdsum;
- DEBUG(DEBUG_DEBUG,(" %s -> %d [+%d]\n",
- ctdb_addr_to_str(&(tmp_ip->addr)),
- dstnode,
- dstimbl - lcp2_imbalances[dstnode]));
-
-
- if ((minnode == -1) || (dstdsum < mindsum)) {
- minnode = dstnode;
- minimbl = dstimbl;
- mindsum = dstdsum;
- minip = tmp_ip;
- should_loop = true;
- }
- }
- }
-
- DEBUG(DEBUG_DEBUG,(" ----------------------------------------\n"));
-
- /* If we found one then assign it to the given node. */
- if (minnode != -1) {
- minip->pnn = minnode;
- lcp2_imbalances[minnode] = minimbl;
- DEBUG(DEBUG_INFO,(" %s -> %d [+%d]\n",
- ctdb_addr_to_str(&(minip->addr)),
- minnode,
- mindsum));
- }
-
- /* There might be a better way but at least this is clear. */
- have_unassigned = false;
- for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
- if (tmp_ip->pnn == -1) {
- have_unassigned = true;
- }
- }
- }
-
- /* We know if we have an unassigned addresses so we might as
- * well optimise.
- */
- if (have_unassigned) {
- for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
- if (tmp_ip->pnn == -1) {
- DEBUG(DEBUG_WARNING,("Failed to find node to cover ip %s\n",
- ctdb_addr_to_str(&tmp_ip->addr)));
- }
- }
- }
-}
-
-/* LCP2 algorithm for rebalancing the cluster. Given a candidate node
- * to move IPs from, determines the best IP/destination node
- * combination to move from the source node.
- */
-static bool lcp2_failback_candidate(struct ctdb_context *ctdb,
- struct ctdb_ipflags *ipflags,
- struct public_ip_list *all_ips,
- int srcnode,
- uint32_t *lcp2_imbalances,
- bool *rebalance_candidates)
-{
- int dstnode, mindstnode, numnodes;
- uint32_t srcimbl, srcdsum, dstimbl, dstdsum;
- uint32_t minsrcimbl, mindstimbl;
- struct public_ip_list *minip;
- struct public_ip_list *tmp_ip;
-
- /* Find an IP and destination node that best reduces imbalance. */
- srcimbl = 0;
- minip = NULL;
- minsrcimbl = 0;
- mindstnode = -1;
- mindstimbl = 0;
-
- numnodes = talloc_array_length(ipflags);
-
- DEBUG(DEBUG_DEBUG,(" ----------------------------------------\n"));
- DEBUG(DEBUG_DEBUG,(" CONSIDERING MOVES FROM %d [%d]\n",
- srcnode, lcp2_imbalances[srcnode]));
-
- for (tmp_ip=all_ips; tmp_ip; tmp_ip=tmp_ip->next) {
- /* Only consider addresses on srcnode. */
- if (tmp_ip->pnn != srcnode) {
- continue;
- }
-
- /* What is this IP address costing the source node? */
- srcdsum = ip_distance_2_sum(&(tmp_ip->addr), all_ips, srcnode);
- srcimbl = lcp2_imbalances[srcnode] - srcdsum;
-
- /* Consider this IP address would cost each potential
- * destination node. Destination nodes are limited to
- * those that are newly healthy, since we don't want
- * to do gratuitous failover of IPs just to make minor
- * balance improvements.
- */
- for (dstnode=0; dstnode<numnodes; dstnode++) {
- if (!rebalance_candidates[dstnode]) {
- continue;
- }
-
- /* only check nodes that can actually takeover this ip */
- if (!can_node_takeover_ip(ctdb, dstnode,
- ipflags[dstnode], tmp_ip)) {
- /* no it couldnt so skip to the next node */
- continue;
- }
-
- dstdsum = ip_distance_2_sum(&(tmp_ip->addr), all_ips, dstnode);
- dstimbl = lcp2_imbalances[dstnode] + dstdsum;
- DEBUG(DEBUG_DEBUG,(" %d [%d] -> %s -> %d [+%d]\n",
- srcnode, -srcdsum,
- ctdb_addr_to_str(&(tmp_ip->addr)),
- dstnode, dstdsum));
-
- if ((dstimbl < lcp2_imbalances[srcnode]) &&
- (dstdsum < srcdsum) && \
- ((mindstnode == -1) || \
- ((srcimbl + dstimbl) < (minsrcimbl + mindstimbl)))) {
-
- minip = tmp_ip;
- minsrcimbl = srcimbl;
- mindstnode = dstnode;
- mindstimbl = dstimbl;
- }
- }
- }
- DEBUG(DEBUG_DEBUG,(" ----------------------------------------\n"));
-
- if (mindstnode != -1) {
- /* We found a move that makes things better... */
- DEBUG(DEBUG_INFO,("%d [%d] -> %s -> %d [+%d]\n",
- srcnode, minsrcimbl - lcp2_imbalances[srcnode],
- ctdb_addr_to_str(&(minip->addr)),
- mindstnode, mindstimbl - lcp2_imbalances[mindstnode]));
-
-
- lcp2_imbalances[srcnode] = minsrcimbl;
- lcp2_imbalances[mindstnode] = mindstimbl;
- minip->pnn = mindstnode;
-
- return true;
- }
-
- return false;
-
-}
-
-struct lcp2_imbalance_pnn {
- uint32_t imbalance;
- int pnn;
-};
-
-static int lcp2_cmp_imbalance_pnn(const void * a, const void * b)
-{
- const struct lcp2_imbalance_pnn * lipa = (const struct lcp2_imbalance_pnn *) a;
- const struct lcp2_imbalance_pnn * lipb = (const struct lcp2_imbalance_pnn *) b;
-
- if (lipa->imbalance > lipb->imbalance) {
- return -1;
- } else if (lipa->imbalance == lipb->imbalance) {
- return 0;
- } else {
- return 1;
- }
-}
-
-/* LCP2 algorithm for rebalancing the cluster. This finds the source
- * node with the highest LCP2 imbalance, and then determines the best
- * IP/destination node combination to move from the source node.
- */
-static void lcp2_failback(struct ctdb_context *ctdb,
- struct ctdb_ipflags *ipflags,
- struct public_ip_list *all_ips,
- uint32_t *lcp2_imbalances,
- bool *rebalance_candidates)
-{
- int i, numnodes;
- struct lcp2_imbalance_pnn * lips;
- bool again;
-
- numnodes = talloc_array_length(ipflags);
-
-try_again:
- /* Put the imbalances and nodes into an array, sort them and
- * iterate through candidates. Usually the 1st one will be
- * used, so this doesn't cost much...
- */
- DEBUG(DEBUG_DEBUG,("+++++++++++++++++++++++++++++++++++++++++\n"));
- DEBUG(DEBUG_DEBUG,("Selecting most imbalanced node from:\n"));
- lips = talloc_array(ctdb, struct lcp2_imbalance_pnn, numnodes);
- for (i=0; i<numnodes; i++) {
- lips[i].imbalance = lcp2_imbalances[i];
- lips[i].pnn = i;
- DEBUG(DEBUG_DEBUG,(" %d [%d]\n", i, lcp2_imbalances[i]));
- }
- qsort(lips, numnodes, sizeof(struct lcp2_imbalance_pnn),
- lcp2_cmp_imbalance_pnn);
-
- again = false;
- for (i=0; i<numnodes; i++) {
- /* This means that all nodes had 0 or 1 addresses, so
- * can't be imbalanced.
- */
- if (lips[i].imbalance == 0) {
- break;
- }
-
- if (lcp2_failback_candidate(ctdb,
- ipflags,
- all_ips,
- lips[i].pnn,
- lcp2_imbalances,
- rebalance_candidates)) {
- again = true;
- break;
- }
- }
-
- talloc_free(lips);
- if (again) {
- goto try_again;
- }
-}
-
-static void unassign_unsuitable_ips(struct ctdb_context *ctdb,
- struct ctdb_ipflags *ipflags,
- struct public_ip_list *all_ips)
-{
- struct public_ip_list *tmp_ip;
-
- /* verify that the assigned nodes can serve that public ip
- and set it to -1 if not
- */
- for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
- if (tmp_ip->pnn == -1) {
- continue;
- }
- if (!can_node_host_ip(ctdb, tmp_ip->pnn,
- ipflags[tmp_ip->pnn], tmp_ip) != 0) {
- /* this node can not serve this ip. */
- DEBUG(DEBUG_DEBUG,("Unassign IP: %s from %d\n",
- ctdb_addr_to_str(&(tmp_ip->addr)),
- tmp_ip->pnn));
- tmp_ip->pnn = -1;
- }
- }
-}
-
-static void ip_alloc_deterministic_ips(struct ctdb_context *ctdb,
- struct ctdb_ipflags *ipflags,
- struct public_ip_list *all_ips)
-{
- struct public_ip_list *tmp_ip;
- int i, numnodes;
-
- numnodes = talloc_array_length(ipflags);
-
- DEBUG(DEBUG_NOTICE,("Deterministic IPs enabled. Resetting all ip allocations\n"));
- /* Allocate IPs to nodes in a modulo fashion so that IPs will
- * always be allocated the same way for a specific set of
- * available/unavailable nodes.
- */
-
- for (i=0,tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next,i++) {
- tmp_ip->pnn = i % numnodes;
- }
-
- /* IP failback doesn't make sense with deterministic
- * IPs, since the modulo step above implicitly fails
- * back IPs to their "home" node.
- */
- if (1 == ctdb->tunable.no_ip_failback) {
- DEBUG(DEBUG_WARNING, ("WARNING: 'NoIPFailback' set but ignored - incompatible with 'DeterministicIPs\n"));
- }
-
- unassign_unsuitable_ips(ctdb, ipflags, all_ips);
-
- basic_allocate_unassigned(ctdb, ipflags, all_ips);
-
- /* No failback here! */
-}
-
-static void ip_alloc_nondeterministic_ips(struct ctdb_context *ctdb,
- struct ctdb_ipflags *ipflags,
- struct public_ip_list *all_ips)
-{
- /* This should be pushed down into basic_failback. */
- struct public_ip_list *tmp_ip;
- int num_ips = 0;
- for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
- num_ips++;
- }
-
- unassign_unsuitable_ips(ctdb, ipflags, all_ips);
-
- basic_allocate_unassigned(ctdb, ipflags, all_ips);
-
- /* If we don't want IPs to fail back then don't rebalance IPs. */
- if (1 == ctdb->tunable.no_ip_failback) {
- return;
- }
-
- /* Now, try to make sure the ip adresses are evenly distributed
- across the nodes.
- */
- basic_failback(ctdb, ipflags, all_ips, num_ips);
-}
-
-static void ip_alloc_lcp2(struct ctdb_context *ctdb,
- struct ctdb_ipflags *ipflags,
- struct public_ip_list *all_ips,
- uint32_t *force_rebalance_nodes)
-{
- uint32_t *lcp2_imbalances;
- bool *rebalance_candidates;
- int numnodes, num_rebalance_candidates, i;
-
- TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
-
- unassign_unsuitable_ips(ctdb, ipflags, all_ips);
-
- lcp2_init(tmp_ctx, ipflags, all_ips,force_rebalance_nodes,
- &lcp2_imbalances, &rebalance_candidates);
-
- lcp2_allocate_unassigned(ctdb, ipflags, all_ips, lcp2_imbalances);
-
- /* If we don't want IPs to fail back then don't rebalance IPs. */
- if (1 == ctdb->tunable.no_ip_failback) {
- goto finished;
- }
-
- /* It is only worth continuing if we have suitable target
- * nodes to transfer IPs to. This check is much cheaper than
- * continuing on...
- */
- numnodes = talloc_array_length(ipflags);
- num_rebalance_candidates = 0;
- for (i=0; i<numnodes; i++) {
- if (rebalance_candidates[i]) {
- num_rebalance_candidates++;
- }
- }
- if (num_rebalance_candidates == 0) {
- goto finished;
- }
-
- /* Now, try to make sure the ip adresses are evenly distributed
- across the nodes.
- */
- lcp2_failback(ctdb, ipflags, all_ips,
- lcp2_imbalances, rebalance_candidates);
-
-finished:
- talloc_free(tmp_ctx);
-}
-
-static bool all_nodes_are_disabled(struct ctdb_node_map_old *nodemap)
-{
- int i;
-
- for (i=0;i<nodemap->num;i++) {
- if (!(nodemap->nodes[i].flags & (NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED))) {
- /* Found one completely healthy node */
- return false;
- }
- }
-
- return true;
-}
-
-/* The calculation part of the IP allocation algorithm. */
-static void ctdb_takeover_run_core(struct ctdb_context *ctdb,
- struct ctdb_ipflags *ipflags,
- struct public_ip_list **all_ips_p,
- uint32_t *force_rebalance_nodes)
-{
- /* since nodes only know about those public addresses that
- can be served by that particular node, no single node has
- a full list of all public addresses that exist in the cluster.
- Walk over all node structures and create a merged list of
- all public addresses that exist in the cluster.
-
- keep the tree of ips around as ctdb->ip_tree
- */
- *all_ips_p = create_merged_ip_list(ctdb);
-
- if (1 == ctdb->tunable.lcp2_public_ip_assignment) {
- ip_alloc_lcp2(ctdb, ipflags, *all_ips_p, force_rebalance_nodes);
- } else if (1 == ctdb->tunable.deterministic_public_ips) {
- ip_alloc_deterministic_ips(ctdb, ipflags, *all_ips_p);
- } else {
- ip_alloc_nondeterministic_ips(ctdb, ipflags, *all_ips_p);
- }
-
- /* at this point ->pnn is the node which will own each IP
- or -1 if there is no node that can cover this ip
- */
-
- return;
-}
-
-struct get_tunable_callback_data {
- const char *tunable;
- uint32_t *out;
- bool fatal;
-};
-
-static void get_tunable_callback(struct ctdb_context *ctdb, uint32_t pnn,
- int32_t res, TDB_DATA outdata,
- void *callback)
-{
- struct get_tunable_callback_data *cd =
- (struct get_tunable_callback_data *)callback;
- int size;
-
- if (res != 0) {
- /* Already handled in fail callback */
- return;
- }
-
- if (outdata.dsize != sizeof(uint32_t)) {
- DEBUG(DEBUG_ERR,("Wrong size of returned data when reading \"%s\" tunable from node %d. Expected %d bytes but received %d bytes\n",
- cd->tunable, pnn, (int)sizeof(uint32_t),
- (int)outdata.dsize));
- cd->fatal = true;
- return;
- }
-
- size = talloc_array_length(cd->out);
- if (pnn >= size) {
- DEBUG(DEBUG_ERR,("Got %s reply from node %d but nodemap only has %d entries\n",
- cd->tunable, pnn, size));
- return;
- }
-
-
- cd->out[pnn] = *(uint32_t *)outdata.dptr;
-}
-
-static void get_tunable_fail_callback(struct ctdb_context *ctdb, uint32_t pnn,
- int32_t res, TDB_DATA outdata,
- void *callback)
-{
- struct get_tunable_callback_data *cd =
- (struct get_tunable_callback_data *)callback;
-
- switch (res) {
- case -ETIME:
- DEBUG(DEBUG_ERR,
- ("Timed out getting tunable \"%s\" from node %d\n",
- cd->tunable, pnn));
- cd->fatal = true;
- break;
- case -EINVAL:
- case -1:
- DEBUG(DEBUG_WARNING,
- ("Tunable \"%s\" not implemented on node %d\n",
- cd->tunable, pnn));
- break;
- default:
- DEBUG(DEBUG_ERR,
- ("Unexpected error getting tunable \"%s\" from node %d\n",
- cd->tunable, pnn));
- cd->fatal = true;
- }
-}
-
-static uint32_t *get_tunable_from_nodes(struct ctdb_context *ctdb,
- TALLOC_CTX *tmp_ctx,
- struct ctdb_node_map_old *nodemap,
- const char *tunable,
- uint32_t default_value)
-{
- TDB_DATA data;
- struct ctdb_control_get_tunable *t;
- uint32_t *nodes;
- uint32_t *tvals;
- struct get_tunable_callback_data callback_data;
- int i;
-
- tvals = talloc_array(tmp_ctx, uint32_t, nodemap->num);
- CTDB_NO_MEMORY_NULL(ctdb, tvals);
- for (i=0; i<nodemap->num; i++) {
- tvals[i] = default_value;
- }
-
- callback_data.out = tvals;
- callback_data.tunable = tunable;
- callback_data.fatal = false;
+ tvals = talloc_array(tmp_ctx, uint32_t, nodemap->num);
+ CTDB_NO_MEMORY_NULL(ctdb, tvals);
+ for (i=0; i<nodemap->num; i++) {
+ tvals[i] = default_value;
+ }
+
+ callback_data.out = tvals;
+ callback_data.tunable = tunable;
+ callback_data.fatal = false;
data.dsize = offsetof(struct ctdb_control_get_tunable, name) + strlen(tunable) + 1;
data.dptr = talloc_size(tmp_ctx, data.dsize);
return tvals;
}
-struct get_runstate_callback_data {
- enum ctdb_runstate *out;
- bool fatal;
-};
-
-static void get_runstate_callback(struct ctdb_context *ctdb, uint32_t pnn,
- int32_t res, TDB_DATA outdata,
- void *callback_data)
-{
- struct get_runstate_callback_data *cd =
- (struct get_runstate_callback_data *)callback_data;
- int size;
-
- if (res != 0) {
- /* Already handled in fail callback */
- return;
- }
-
- if (outdata.dsize != sizeof(uint32_t)) {
- DEBUG(DEBUG_ERR,("Wrong size of returned data when getting runstate from node %d. Expected %d bytes but received %d bytes\n",
- pnn, (int)sizeof(uint32_t),
- (int)outdata.dsize));
- cd->fatal = true;
- return;
- }
-
- size = talloc_array_length(cd->out);
- if (pnn >= size) {
- DEBUG(DEBUG_ERR,("Got reply from node %d but nodemap only has %d entries\n",
- pnn, size));
- return;
- }
-
- cd->out[pnn] = (enum ctdb_runstate)*(uint32_t *)outdata.dptr;
-}
-
-static void get_runstate_fail_callback(struct ctdb_context *ctdb, uint32_t pnn,
- int32_t res, TDB_DATA outdata,
- void *callback)
-{
- struct get_runstate_callback_data *cd =
- (struct get_runstate_callback_data *)callback;
-
- switch (res) {
- case -ETIME:
- DEBUG(DEBUG_ERR,
- ("Timed out getting runstate from node %d\n", pnn));
- cd->fatal = true;
- break;
- default:
- DEBUG(DEBUG_WARNING,
- ("Error getting runstate from node %d - assuming runstates not supported\n",
- pnn));
- }
-}
-
-static enum ctdb_runstate * get_runstate_from_nodes(struct ctdb_context *ctdb,
- TALLOC_CTX *tmp_ctx,
- struct ctdb_node_map_old *nodemap,
- enum ctdb_runstate default_value)
-{
- uint32_t *nodes;
- enum ctdb_runstate *rs;
- struct get_runstate_callback_data callback_data;
- int i;
-
- rs = talloc_array(tmp_ctx, enum ctdb_runstate, nodemap->num);
- CTDB_NO_MEMORY_NULL(ctdb, rs);
- for (i=0; i<nodemap->num; i++) {
- rs[i] = default_value;
- }
-
- callback_data.out = rs;
- callback_data.fatal = false;
-
- nodes = list_of_connected_nodes(ctdb, nodemap, tmp_ctx, true);
- if (ctdb_client_async_control(ctdb, CTDB_CONTROL_GET_RUNSTATE,
- nodes, 0, TAKEOVER_TIMEOUT(),
- true, tdb_null,
- get_runstate_callback,
- get_runstate_fail_callback,
- &callback_data) != 0) {
- if (callback_data.fatal) {
- free(rs);
- rs = NULL;
- }
- }
- talloc_free(nodes);
-
- return rs;
-}
-
/* Set internal flags for IP allocation:
* Clear ip flags
* Set NOIPTAKOVER ip flags from per-node NoIPTakeover tunable
* else
* Set NOIPHOST ip flags for disabled nodes
*/
-static struct ctdb_ipflags *
-set_ipflags_internal(struct ctdb_context *ctdb,
- TALLOC_CTX *tmp_ctx,
- struct ctdb_node_map_old *nodemap,
- uint32_t *tval_noiptakeover,
- uint32_t *tval_noiphostonalldisabled,
- enum ctdb_runstate *runstate)
+static void set_ipflags_internal(struct ipalloc_state *ipalloc_state,
+ struct ctdb_node_map_old *nodemap,
+ uint32_t *tval_noiptakeover,
+ uint32_t *tval_noiphostonalldisabled)
{
int i;
- struct ctdb_ipflags *ipflags;
-
- /* Clear IP flags - implicit due to talloc_zero */
- ipflags = talloc_zero_array(tmp_ctx, struct ctdb_ipflags, nodemap->num);
- CTDB_NO_MEMORY_NULL(ctdb, ipflags);
for (i=0;i<nodemap->num;i++) {
/* Can not take IPs on node with NoIPTakeover set */
if (tval_noiptakeover[i] != 0) {
- ipflags[i].noiptakeover = true;
+ ipalloc_state->noiptakeover[i] = true;
}
- /* Can not host IPs on node not in RUNNING state */
- if (runstate[i] != CTDB_RUNSTATE_RUNNING) {
- ipflags[i].noiphost = true;
- continue;
- }
/* Can not host IPs on INACTIVE node */
if (nodemap->nodes[i].flags & NODE_FLAGS_INACTIVE) {
- ipflags[i].noiphost = true;
+ ipalloc_state->noiphost[i] = true;
}
- /* Remember the runstate */
- ipflags[i].runstate = runstate[i];
}
if (all_nodes_are_disabled(nodemap)) {
*/
for (i=0;i<nodemap->num;i++) {
if (tval_noiphostonalldisabled[i] != 0) {
- ipflags[i].noiphost = true;
+ ipalloc_state->noiphost[i] = true;
}
}
} else {
*/
for (i=0;i<nodemap->num;i++) {
if (nodemap->nodes[i].flags & NODE_FLAGS_DISABLED) {
- ipflags[i].noiphost = true;
+ ipalloc_state->noiphost[i] = true;
}
}
}
-
- return ipflags;
}
-static struct ctdb_ipflags *set_ipflags(struct ctdb_context *ctdb,
- TALLOC_CTX *tmp_ctx,
- struct ctdb_node_map_old *nodemap)
+static bool set_ipflags(struct ctdb_context *ctdb,
+ struct ipalloc_state *ipalloc_state,
+ struct ctdb_node_map_old *nodemap)
{
uint32_t *tval_noiptakeover;
uint32_t *tval_noiphostonalldisabled;
- struct ctdb_ipflags *ipflags;
- enum ctdb_runstate *runstate;
-
- tval_noiptakeover = get_tunable_from_nodes(ctdb, tmp_ctx, nodemap,
+ tval_noiptakeover = get_tunable_from_nodes(ctdb, ipalloc_state, nodemap,
"NoIPTakeover", 0);
if (tval_noiptakeover == NULL) {
- return NULL;
+ return false;
}
tval_noiphostonalldisabled =
- get_tunable_from_nodes(ctdb, tmp_ctx, nodemap,
+ get_tunable_from_nodes(ctdb, ipalloc_state, nodemap,
"NoIPHostOnAllDisabled", 0);
if (tval_noiphostonalldisabled == NULL) {
/* Caller frees tmp_ctx */
- return NULL;
+ return false;
}
- /* Any nodes where CTDB_CONTROL_GET_RUNSTATE is not supported
- * will default to CTDB_RUNSTATE_RUNNING. This ensures
- * reasonable behaviour on a mixed cluster during upgrade.
- */
- runstate = get_runstate_from_nodes(ctdb, tmp_ctx, nodemap,
- CTDB_RUNSTATE_RUNNING);
- if (runstate == NULL) {
- /* Caller frees tmp_ctx */
+ set_ipflags_internal(ipalloc_state, nodemap,
+ tval_noiptakeover,
+ tval_noiphostonalldisabled);
+
+ talloc_free(tval_noiptakeover);
+ talloc_free(tval_noiphostonalldisabled);
+
+ return true;
+}
+
+static struct ipalloc_state * ipalloc_state_init(struct ctdb_context *ctdb,
+ TALLOC_CTX *mem_ctx)
+{
+ struct ipalloc_state *ipalloc_state =
+ talloc_zero(mem_ctx, struct ipalloc_state);
+ if (ipalloc_state == NULL) {
+ DEBUG(DEBUG_ERR, (__location__ " Out of memory\n"));
return NULL;
}
- ipflags = set_ipflags_internal(ctdb, tmp_ctx, nodemap,
- tval_noiptakeover,
- tval_noiphostonalldisabled,
- runstate);
+ ipalloc_state->num = ctdb->num_nodes;
+
+ ipalloc_state->known_public_ips =
+ talloc_zero_array(ipalloc_state,
+ struct ctdb_public_ip_list,
+ ipalloc_state->num);
+ if (ipalloc_state->known_public_ips == NULL) {
+ DEBUG(DEBUG_ERR, (__location__ " Out of memory\n"));
+ goto fail;
+ }
+
+ ipalloc_state->available_public_ips =
+ talloc_zero_array(ipalloc_state,
+ struct ctdb_public_ip_list,
+ ipalloc_state->num);
+ if (ipalloc_state->available_public_ips == NULL) {
+ DEBUG(DEBUG_ERR, (__location__ " Out of memory\n"));
+ goto fail;
+ }
+ ipalloc_state->noiptakeover =
+ talloc_zero_array(ipalloc_state,
+ bool,
+ ipalloc_state->num);
+ if (ipalloc_state->noiptakeover == NULL) {
+ DEBUG(DEBUG_ERR, (__location__ " Out of memory\n"));
+ goto fail;
+ }
+ ipalloc_state->noiphost =
+ talloc_zero_array(ipalloc_state,
+ bool,
+ ipalloc_state->num);
+ if (ipalloc_state->noiphost == NULL) {
+ DEBUG(DEBUG_ERR, (__location__ " Out of memory\n"));
+ goto fail;
+ }
+
+ if (1 == ctdb->tunable.lcp2_public_ip_assignment) {
+ ipalloc_state->algorithm = IPALLOC_LCP2;
+ } else if (1 == ctdb->tunable.deterministic_public_ips) {
+ ipalloc_state->algorithm = IPALLOC_DETERMINISTIC;
+ } else {
+ ipalloc_state->algorithm = IPALLOC_NONDETERMINISTIC;
+ }
- talloc_free(tval_noiptakeover);
- talloc_free(tval_noiphostonalldisabled);
- talloc_free(runstate);
+ ipalloc_state->no_ip_failback = ctdb->tunable.no_ip_failback;
- return ipflags;
+ return ipalloc_state;
+fail:
+ talloc_free(ipalloc_state);
+ return NULL;
}
-struct iprealloc_callback_data {
- bool *retry_nodes;
- int retry_count;
- client_async_callback fail_callback;
- void *fail_callback_data;
- struct ctdb_node_map_old *nodemap;
+struct takeover_callback_data {
+ uint32_t num_nodes;
+ unsigned int *fail_count;
};
-static void iprealloc_fail_callback(struct ctdb_context *ctdb, uint32_t pnn,
- int32_t res, TDB_DATA outdata,
- void *callback)
+static struct takeover_callback_data *
+takeover_callback_data_init(TALLOC_CTX *mem_ctx,
+ uint32_t num_nodes)
{
- int numnodes;
- struct iprealloc_callback_data *cd =
- (struct iprealloc_callback_data *)callback;
+ static struct takeover_callback_data *takeover_data;
- numnodes = talloc_array_length(cd->retry_nodes);
- if (pnn > numnodes) {
- DEBUG(DEBUG_ERR,
- ("ipreallocated failure from node %d, "
- "but only %d nodes in nodemap\n",
- pnn, numnodes));
- return;
+ takeover_data = talloc_zero(mem_ctx, struct takeover_callback_data);
+ if (takeover_data == NULL) {
+ DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
+ return NULL;
}
- /* Can't run the "ipreallocated" event on a INACTIVE node */
- if (cd->nodemap->nodes[pnn].flags & NODE_FLAGS_INACTIVE) {
- DEBUG(DEBUG_WARNING,
- ("ipreallocated failed on inactive node %d, ignoring\n",
- pnn));
- return;
+ takeover_data->fail_count = talloc_zero_array(takeover_data,
+ unsigned int, num_nodes);
+ if (takeover_data->fail_count == NULL) {
+ DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
+ talloc_free(takeover_data);
+ return NULL;
}
- switch (res) {
- case -ETIME:
- /* If the control timed out then that's a real error,
- * so call the real fail callback
- */
- if (cd->fail_callback) {
- cd->fail_callback(ctdb, pnn, res, outdata,
- cd->fail_callback_data);
- } else {
- DEBUG(DEBUG_WARNING,
- ("iprealloc timed out but no callback registered\n"));
- }
- break;
- default:
- /* If not a timeout then either the ipreallocated
- * eventscript (or some setup) failed. This might
- * have failed because the IPREALLOCATED control isn't
- * implemented - right now there is no way of knowing
- * because the error codes are all folded down to -1.
- * Consider retrying using EVENTSCRIPT control...
- */
- DEBUG(DEBUG_WARNING,
- ("ipreallocated failure from node %d, flagging retry\n",
- pnn));
- cd->retry_nodes[pnn] = true;
- cd->retry_count++;
- }
-}
+ takeover_data->num_nodes = num_nodes;
-struct takeover_callback_data {
- bool *node_failed;
- client_async_callback fail_callback;
- void *fail_callback_data;
- struct ctdb_node_map_old *nodemap;
-};
+ return takeover_data;
+}
static void takeover_run_fail_callback(struct ctdb_context *ctdb,
uint32_t node_pnn, int32_t res,
struct takeover_callback_data *cd =
talloc_get_type_abort(callback_data,
struct takeover_callback_data);
- int i;
- for (i = 0; i < cd->nodemap->num; i++) {
- if (node_pnn == cd->nodemap->nodes[i].pnn) {
- break;
+ if (node_pnn >= cd->num_nodes) {
+ DEBUG(DEBUG_ERR, (__location__ " invalid PNN %u\n", node_pnn));
+ return;
+ }
+
+ if (cd->fail_count[node_pnn] == 0) {
+ DEBUG(DEBUG_ERR,
+ ("Node %u failed the takeover run\n", node_pnn));
+ }
+
+ cd->fail_count[node_pnn]++;
+}
+
+static void takeover_run_process_failures(struct ctdb_context *ctdb,
+ struct takeover_callback_data *tcd)
+{
+ unsigned int max_fails = 0;
+ uint32_t max_pnn = -1;
+ uint32_t i;
+
+ for (i = 0; i < tcd->num_nodes; i++) {
+ if (tcd->fail_count[i] > max_fails) {
+ max_pnn = i;
+ max_fails = tcd->fail_count[i];
}
}
- if (i == cd->nodemap->num) {
- DEBUG(DEBUG_ERR, (__location__ " invalid PNN %u\n", node_pnn));
- return;
- }
+ if (max_fails > 0) {
+ int ret;
+ TDB_DATA data;
- if (!cd->node_failed[i]) {
- cd->node_failed[i] = true;
- cd->fail_callback(ctdb, node_pnn, res, outdata,
- cd->fail_callback_data);
+ DEBUG(DEBUG_ERR,
+ ("Sending banning credits to %u with fail count %u\n",
+ max_pnn, max_fails));
+
+ data.dptr = (uint8_t *)&max_pnn;
+ data.dsize = sizeof(uint32_t);
+ ret = ctdb_client_send_message(ctdb,
+ CTDB_BROADCAST_CONNECTED,
+ CTDB_SRVID_BANNING,
+ data);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,
+ ("Failed to set banning credits for node %u\n",
+ max_pnn));
+ }
}
}
/*
- make any IP alias changes for public addresses that are necessary
+ * Recalculate the allocation of public IPs to nodes and have the
+ * nodes host their allocated addresses.
+ *
+ * - Allocate memory for IP allocation state, including per node
+ * arrays
+ * - Populate IP allocation algorithm in IP allocation state
+ * - Populate local value of tunable NoIPFailback in IP allocation
+ state - this is really a cluster-wide configuration variable and
+ only the value form the master node is used
+ * - Retrieve tunables NoIPTakeover and NoIPHostOnAllDisabled from all
+ * connected nodes - this is done separately so tunable values can
+ * be faked in unit testing
+ * - Populate NoIPTakover tunable in IP allocation state
+ * - Populate NoIPHost in IP allocation state, derived from node flags
+ * and NoIPHostOnAllDisabled tunable
+ * - Retrieve and populate known and available IP lists in IP
+ * allocation state
+ * - If no available IP addresses then early exit
+ * - Build list of (known IPs, currently assigned node)
+ * - Populate list of nodes to force rebalance - internal structure,
+ * currently no way to fetch, only used by LCP2 for nodes that have
+ * had new IP addresses added
+ * - Run IP allocation algorithm
+ * - Send RELEASE_IP to all nodes for IPs they should not host
+ * - Send TAKE_IP to all nodes for IPs they should host
+ * - Send IPREALLOCATED to all nodes (with backward compatibility hack)
*/
int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map_old *nodemap,
- uint32_t *force_rebalance_nodes,
- client_async_callback fail_callback, void *callback_data)
+ uint32_t *force_rebalance_nodes)
{
- int i, j, ret;
+ int i, ret;
struct ctdb_public_ip ip;
uint32_t *nodes;
struct public_ip_list *all_ips, *tmp_ip;
struct client_async_data *async_data;
struct ctdb_client_control_state *state;
TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
- struct ctdb_ipflags *ipflags;
+ struct ipalloc_state *ipalloc_state;
struct takeover_callback_data *takeover_data;
- struct iprealloc_callback_data iprealloc_data;
- bool *retry_data;
bool can_host_ips;
+ /* Initialise fail callback data to be used with
+ * takeover_run_fail_callback(). A failure in any of the
+ * following steps will cause an early return, so this can be
+ * reused for each of those steps without re-initialising. */
+ takeover_data = takeover_callback_data_init(tmp_ctx,
+ nodemap->num);
+ if (takeover_data == NULL) {
+ talloc_free(tmp_ctx);
+ return -1;
+ }
+
/*
* ip failover is completely disabled, just send out the
* ipreallocated event.
goto ipreallocated;
}
- ipflags = set_ipflags(ctdb, tmp_ctx, nodemap);
- if (ipflags == NULL) {
+ ipalloc_state = ipalloc_state_init(ctdb, tmp_ctx);
+ if (ipalloc_state == NULL) {
+ talloc_free(tmp_ctx);
+ return -1;
+ }
+
+ if (!set_ipflags(ctdb, ipalloc_state, nodemap)) {
DEBUG(DEBUG_ERR,("Failed to set IP flags - aborting takeover run\n"));
talloc_free(tmp_ctx);
return -1;
}
- /* Short-circuit IP allocation if no nodes are in the RUNNING
- * runstate yet, since no nodes will be able to host IPs */
+ /* Fetch known/available public IPs from each active node */
+ ret = ctdb_reload_remote_public_ips(ctdb, ipalloc_state, nodemap);
+ if (ret != 0) {
+ talloc_free(tmp_ctx);
+ return -1;
+ }
+
+ /* Short-circuit IP allocation if no node has available IPs */
can_host_ips = false;
- for (i=0; i<nodemap->num; i++) {
- if (ipflags[i].runstate == CTDB_RUNSTATE_RUNNING) {
+ for (i=0; i < ipalloc_state->num; i++) {
+ if (ipalloc_state->available_public_ips[i].num != 0) {
can_host_ips = true;
}
}
if (!can_host_ips) {
DEBUG(DEBUG_WARNING,("No nodes available to host public IPs yet\n"));
- return 0;
+ goto ipreallocated;
}
+ /* since nodes only know about those public addresses that
+ can be served by that particular node, no single node has
+ a full list of all public addresses that exist in the cluster.
+ Walk over all node structures and create a merged list of
+ all public addresses that exist in the cluster.
+
+ keep the tree of ips around as ctdb->ip_tree
+ */
+ all_ips = create_merged_ip_list(ctdb, ipalloc_state);
+ ipalloc_state->all_ips = all_ips;
+
+ ipalloc_state->force_rebalance_nodes = force_rebalance_nodes;
+
/* Do the IP reassignment calculations */
- ctdb_takeover_run_core(ctdb, ipflags, &all_ips, force_rebalance_nodes);
+ ipalloc(ipalloc_state);
/* Now tell all nodes to release any public IPs should not
* host. This will be a NOOP on nodes that don't currently
* hold the given IP.
*/
- takeover_data = talloc_zero(tmp_ctx, struct takeover_callback_data);
- CTDB_NO_MEMORY_FATAL(ctdb, takeover_data);
-
- takeover_data->node_failed = talloc_zero_array(tmp_ctx,
- bool, nodemap->num);
- CTDB_NO_MEMORY_FATAL(ctdb, takeover_data->node_failed);
- takeover_data->fail_callback = fail_callback;
- takeover_data->fail_callback_data = callback_data;
- takeover_data->nodemap = nodemap;
-
async_data = talloc_zero(tmp_ctx, struct client_async_data);
CTDB_NO_MEMORY_FATAL(ctdb, async_data);
for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
if (tmp_ip->pnn == nodemap->nodes[i].pnn) {
/* This node should be serving this
- vnn so dont tell it to release the ip
+ vnn so don't tell it to release the ip
*/
continue;
}
}
}
if (ctdb_client_async_wait(ctdb, async_data) != 0) {
- DEBUG(DEBUG_ERR,(__location__ " Async control CTDB_CONTROL_RELEASE_IP failed\n"));
- talloc_free(tmp_ctx);
- return -1;
+ DEBUG(DEBUG_ERR,
+ ("Async control CTDB_CONTROL_RELEASE_IP failed\n"));
+ goto fail;
}
talloc_free(async_data);
async_data = talloc_zero(tmp_ctx, struct client_async_data);
CTDB_NO_MEMORY_FATAL(ctdb, async_data);
- async_data->fail_callback = fail_callback;
- async_data->callback_data = callback_data;
+ async_data->fail_callback = takeover_run_fail_callback;
+ async_data->callback_data = takeover_data;
for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
if (tmp_ip->pnn == -1) {
ctdb_client_async_add(async_data, state);
}
if (ctdb_client_async_wait(ctdb, async_data) != 0) {
- DEBUG(DEBUG_ERR,(__location__ " Async control CTDB_CONTROL_TAKEOVER_IP failed\n"));
- talloc_free(tmp_ctx);
- return -1;
+ DEBUG(DEBUG_ERR,
+ ("Async control CTDB_CONTROL_TAKEOVER_IP failed\n"));
+ goto fail;
}
ipreallocated:
* IPs have moved. Once upon a time this event only used to
* update natgw.
*/
- retry_data = talloc_zero_array(tmp_ctx, bool, nodemap->num);
- CTDB_NO_MEMORY_FATAL(ctdb, retry_data);
- iprealloc_data.retry_nodes = retry_data;
- iprealloc_data.retry_count = 0;
- iprealloc_data.fail_callback = fail_callback;
- iprealloc_data.fail_callback_data = callback_data;
- iprealloc_data.nodemap = nodemap;
-
nodes = list_of_connected_nodes(ctdb, nodemap, tmp_ctx, true);
ret = ctdb_client_async_control(ctdb, CTDB_CONTROL_IPREALLOCATED,
nodes, 0, TAKEOVER_TIMEOUT(),
false, tdb_null,
- NULL, iprealloc_fail_callback,
- &iprealloc_data);
+ NULL, takeover_run_fail_callback,
+ takeover_data);
if (ret != 0) {
- /* If the control failed then we should retry to any
- * nodes flagged by iprealloc_fail_callback using the
- * EVENTSCRIPT control. This is a best-effort at
- * backward compatiblity when running a mixed cluster
- * where some nodes have not yet been upgraded to
- * support the IPREALLOCATED control.
- */
- DEBUG(DEBUG_WARNING,
- ("Retry ipreallocated to some nodes using eventscript control\n"));
-
- nodes = talloc_array(tmp_ctx, uint32_t,
- iprealloc_data.retry_count);
- CTDB_NO_MEMORY_FATAL(ctdb, nodes);
-
- j = 0;
- for (i=0; i<nodemap->num; i++) {
- if (iprealloc_data.retry_nodes[i]) {
- nodes[j] = i;
- j++;
- }
- }
-
- data.dptr = discard_const("ipreallocated");
- data.dsize = strlen((char *)data.dptr) + 1;
- ret = ctdb_client_async_control(ctdb,
- CTDB_CONTROL_RUN_EVENTSCRIPTS,
- nodes, 0, TAKEOVER_TIMEOUT(),
- false, data,
- NULL, fail_callback,
- callback_data);
- if (ret != 0) {
- DEBUG(DEBUG_ERR, (__location__ " failed to send control to run eventscripts with \"ipreallocated\"\n"));
- }
+ DEBUG(DEBUG_ERR,
+ ("Async CTDB_CONTROL_IPREALLOCATED control failed\n"));
+ goto fail;
}
talloc_free(tmp_ctx);
return ret;
+
+fail:
+ takeover_run_process_failures(ctdb, takeover_data);
+ talloc_free(tmp_ctx);
+ return -1;
}
}
-/*
- called by a daemon to inform us of a TCP connection that one of its
- clients managing that should tickled with an ACK when IP takeover is
- done
- */
-static void ctdb_remove_connection(struct ctdb_context *ctdb, struct ctdb_connection *conn)
+static void ctdb_remove_connection(struct ctdb_vnn *vnn, struct ctdb_connection *conn)
{
struct ctdb_connection *tcpp;
- struct ctdb_vnn *vnn = find_public_ip_vnn(ctdb, &conn->dst);
if (vnn == NULL) {
- DEBUG(DEBUG_ERR,(__location__ " unable to find public address %s\n",
- ctdb_addr_to_str(&conn->dst)));
return;
}
/* if the array is empty we cant remove it
- and we dont need to do anything
+ and we don't need to do anything
*/
if (vnn->tcp_array == NULL) {
DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist (array is empty) %s:%u\n",
/* See if we know this connection
- if we dont know this connection then we dont need to do anything
+ if we don't know this connection then we dont need to do anything
*/
tcpp = ctdb_tcp_find(vnn->tcp_array, conn);
if (tcpp == NULL) {
*/
int32_t ctdb_control_tcp_remove(struct ctdb_context *ctdb, TDB_DATA indata)
{
+ struct ctdb_vnn *vnn;
struct ctdb_connection *conn = (struct ctdb_connection *)indata.dptr;
/* If we don't have public IPs, tickles are useless */
return 0;
}
- ctdb_remove_connection(ctdb, conn);
+ vnn = find_public_ip_vnn(ctdb, &conn->dst);
+ if (vnn == NULL) {
+ DEBUG(DEBUG_ERR,
+ (__location__ " unable to find public address %s\n",
+ ctdb_addr_to_str(&conn->dst)));
+ return 0;
+ }
+
+ ctdb_remove_connection(vnn, conn);
return 0;
}
void ctdb_takeover_client_destructor_hook(struct ctdb_client *client)
{
while (client->tcp_list) {
+ struct ctdb_vnn *vnn;
struct ctdb_tcp_list *tcp = client->tcp_list;
+ struct ctdb_connection *conn = &tcp->connection;
+
DLIST_REMOVE(client->tcp_list, tcp);
- ctdb_remove_connection(client->ctdb, &tcp->connection);
+
+ vnn = find_public_ip_vnn(client->ctdb,
+ &conn->dst);
+ if (vnn == NULL) {
+ DEBUG(DEBUG_ERR,
+ (__location__ " unable to find public address %s\n",
+ ctdb_addr_to_str(&conn->dst)));
+ continue;
+ }
+
+ /* If the IP address is hosted on this node then
+ * remove the connection. */
+ if (vnn->pnn == client->ctdb->pnn) {
+ ctdb_remove_connection(vnn, conn);
+ }
+
+ /* Otherwise this function has been called because the
+ * server IP address has been released to another node
+ * and the client has exited. This means that we
+ * should not delete the connection information. The
+ * takeover node processes connections too. */
}
}
{
struct ctdb_vnn *vnn;
int count = 0;
+ TDB_DATA data;
if (ctdb->tunable.disable_ip_failover == 1) {
return;
ctdb_vnn_iface_string(vnn),
ctdb_addr_to_str(&vnn->public_address),
vnn->public_netmask_bits);
- release_kill_clients(ctdb, &vnn->public_address);
+
+ data.dptr = (uint8_t *)talloc_strdup(
+ vnn, ctdb_addr_to_str(&vnn->public_address));
+ if (data.dptr != NULL) {
+ data.dsize = strlen((char *)data.dptr) + 1;
+ ctdb_daemon_send_message(ctdb, ctdb->pnn,
+ CTDB_SRVID_RELEASE_IP, data);
+ talloc_free(data.dptr);
+ }
+
ctdb_vnn_unassign_iface(ctdb, vnn);
vnn->update_in_flight = false;
count++;
{
int i, num, len;
ctdb_sock_addr *addr;
- struct ctdb_control_public_ip_info *info;
+ struct ctdb_public_ip_info_old *info;
struct ctdb_vnn *vnn;
addr = (ctdb_sock_addr *)indata.dptr;
vnn = find_public_ip_vnn(ctdb, addr);
- if (vnn == NULL) {
- /* if it is not a public ip it could be our 'single ip' */
- if (ctdb->single_ip_vnn) {
- if (ctdb_same_ip(&ctdb->single_ip_vnn->public_address, addr)) {
- vnn = ctdb->single_ip_vnn;
- }
- }
- }
if (vnn == NULL) {
DEBUG(DEBUG_ERR,(__location__ " Could not get public ip info, "
"'%s'not a public address\n",
num++;
}
- len = offsetof(struct ctdb_control_public_ip_info, ifaces) +
- num*sizeof(struct ctdb_control_iface_info);
+ len = offsetof(struct ctdb_public_ip_info_old, ifaces) +
+ num*sizeof(struct ctdb_iface);
info = talloc_zero_size(outdata, len);
CTDB_NO_MEMORY(ctdb, info);
if (vnn->iface == cur) {
info->active_idx = i;
}
- strncpy(info->ifaces[i].name, cur->name, sizeof(info->ifaces[i].name)-1);
+ strncpy(info->ifaces[i].name, cur->name,
+ sizeof(info->ifaces[i].name));
+ info->ifaces[i].name[sizeof(info->ifaces[i].name)-1] = '\0';
info->ifaces[i].link_state = cur->link_up;
info->ifaces[i].references = cur->references;
}
info->num = i;
- len = offsetof(struct ctdb_control_public_ip_info, ifaces) +
- i*sizeof(struct ctdb_control_iface_info);
+ len = offsetof(struct ctdb_public_ip_info_old, ifaces) +
+ i*sizeof(struct ctdb_iface);
outdata->dsize = len;
outdata->dptr = (uint8_t *)info;
TDB_DATA *outdata)
{
int i, num, len;
- struct ctdb_control_get_ifaces *ifaces;
+ struct ctdb_iface_list_old *ifaces;
struct ctdb_interface *cur;
/* count how many public ip structures we have */
num++;
}
- len = offsetof(struct ctdb_control_get_ifaces, ifaces) +
- num*sizeof(struct ctdb_control_iface_info);
+ len = offsetof(struct ctdb_iface_list_old, ifaces) +
+ num*sizeof(struct ctdb_iface);
ifaces = talloc_zero_size(outdata, len);
CTDB_NO_MEMORY(ctdb, ifaces);
i = 0;
for (cur=ctdb->ifaces;cur;cur=cur->next) {
- strcpy(ifaces->ifaces[i].name, cur->name);
+ strncpy(ifaces->ifaces[i].name, cur->name,
+ sizeof(ifaces->ifaces[i].name));
+ ifaces->ifaces[i].name[sizeof(ifaces->ifaces[i].name)-1] = '\0';
ifaces->ifaces[i].link_state = cur->link_up;
ifaces->ifaces[i].references = cur->references;
i++;
}
ifaces->num = i;
- len = offsetof(struct ctdb_control_get_ifaces, ifaces) +
- i*sizeof(struct ctdb_control_iface_info);
+ len = offsetof(struct ctdb_iface_list_old, ifaces) +
+ i*sizeof(struct ctdb_iface);
outdata->dsize = len;
outdata->dptr = (uint8_t *)ifaces;
struct ctdb_req_control_old *c,
TDB_DATA indata)
{
- struct ctdb_control_iface_info *info;
+ struct ctdb_iface *info;
struct ctdb_interface *iface;
bool link_up = false;
- info = (struct ctdb_control_iface_info *)indata.dptr;
+ info = (struct ctdb_iface *)indata.dptr;
if (info->name[CTDB_IFACE_SIZE] != '\0') {
int len = strnlen(info->name, CTDB_IFACE_SIZE);
}
-/*
- structure containing the listening socket and the list of tcp connections
- that the ctdb daemon is to kill
-*/
-struct ctdb_kill_tcp {
- struct ctdb_vnn *vnn;
- struct ctdb_context *ctdb;
- int capture_fd;
- struct tevent_fd *fde;
- trbt_tree_t *connections;
- void *private_data;
-};
-
-/*
- a tcp connection that is to be killed
- */
-struct ctdb_killtcp_con {
- ctdb_sock_addr src_addr;
- ctdb_sock_addr dst_addr;
- int count;
- struct ctdb_kill_tcp *killtcp;
-};
-
-/* this function is used to create a key to represent this socketpair
- in the killtcp tree.
- this key is used to insert and lookup matching socketpairs that are
- to be tickled and RST
-*/
-#define KILLTCP_KEYLEN 10
-static uint32_t *killtcp_key(ctdb_sock_addr *src, ctdb_sock_addr *dst)
-{
- static uint32_t key[KILLTCP_KEYLEN];
-
- bzero(key, sizeof(key));
-
- if (src->sa.sa_family != dst->sa.sa_family) {
- DEBUG(DEBUG_ERR, (__location__ " ERROR, different families passed :%u vs %u\n", src->sa.sa_family, dst->sa.sa_family));
- return key;
- }
-
- switch (src->sa.sa_family) {
- case AF_INET:
- key[0] = dst->ip.sin_addr.s_addr;
- key[1] = src->ip.sin_addr.s_addr;
- key[2] = dst->ip.sin_port;
- key[3] = src->ip.sin_port;
- break;
- case AF_INET6: {
- uint32_t *dst6_addr32 =
- (uint32_t *)&(dst->ip6.sin6_addr.s6_addr);
- uint32_t *src6_addr32 =
- (uint32_t *)&(src->ip6.sin6_addr.s6_addr);
- key[0] = dst6_addr32[3];
- key[1] = src6_addr32[3];
- key[2] = dst6_addr32[2];
- key[3] = src6_addr32[2];
- key[4] = dst6_addr32[1];
- key[5] = src6_addr32[1];
- key[6] = dst6_addr32[0];
- key[7] = src6_addr32[0];
- key[8] = dst->ip6.sin6_port;
- key[9] = src->ip6.sin6_port;
- break;
- }
- default:
- DEBUG(DEBUG_ERR, (__location__ " ERROR, unknown family passed :%u\n", src->sa.sa_family));
- return key;
- }
-
- return key;
-}
-
-/*
- called when we get a read event on the raw socket
- */
-static void capture_tcp_handler(struct tevent_context *ev,
- struct tevent_fd *fde,
- uint16_t flags, void *private_data)
-{
- struct ctdb_kill_tcp *killtcp = talloc_get_type(private_data, struct ctdb_kill_tcp);
- struct ctdb_killtcp_con *con;
- ctdb_sock_addr src, dst;
- uint32_t ack_seq, seq;
-
- if (!(flags & TEVENT_FD_READ)) {
- return;
- }
-
- if (ctdb_sys_read_tcp_packet(killtcp->capture_fd,
- killtcp->private_data,
- &src, &dst,
- &ack_seq, &seq) != 0) {
- /* probably a non-tcp ACK packet */
- return;
- }
-
- /* check if we have this guy in our list of connections
- to kill
- */
- con = trbt_lookuparray32(killtcp->connections,
- KILLTCP_KEYLEN, killtcp_key(&src, &dst));
- if (con == NULL) {
- /* no this was some other packet we can just ignore */
- return;
- }
-
- /* This one has been tickled !
- now reset him and remove him from the list.
- */
- DEBUG(DEBUG_INFO, ("sending a tcp reset to kill connection :%d -> %s:%d\n",
- ntohs(con->dst_addr.ip.sin_port),
- ctdb_addr_to_str(&con->src_addr),
- ntohs(con->src_addr.ip.sin_port)));
-
- ctdb_sys_send_tcp(&con->dst_addr, &con->src_addr, ack_seq, seq, 1);
- talloc_free(con);
-}
-
-
-/* when traversing the list of all tcp connections to send tickle acks to
- (so that we can capture the ack coming back and kill the connection
- by a RST)
- this callback is called for each connection we are currently trying to kill
-*/
-static int tickle_connection_traverse(void *param, void *data)
-{
- struct ctdb_killtcp_con *con = talloc_get_type(data, struct ctdb_killtcp_con);
-
- /* have tried too many times, just give up */
- if (con->count >= 5) {
- /* can't delete in traverse: reparent to delete_cons */
- talloc_steal(param, con);
- return 0;
- }
-
- /* othervise, try tickling it again */
- con->count++;
- ctdb_sys_send_tcp(
- (ctdb_sock_addr *)&con->dst_addr,
- (ctdb_sock_addr *)&con->src_addr,
- 0, 0, 0);
- return 0;
-}
-
-
-/*
- called every second until all sentenced connections have been reset
- */
-static void ctdb_tickle_sentenced_connections(struct tevent_context *ev,
- struct tevent_timer *te,
- struct timeval t, void *private_data)
-{
- struct ctdb_kill_tcp *killtcp = talloc_get_type(private_data, struct ctdb_kill_tcp);
- void *delete_cons = talloc_new(NULL);
-
- /* loop over all connections sending tickle ACKs */
- trbt_traversearray32(killtcp->connections, KILLTCP_KEYLEN, tickle_connection_traverse, delete_cons);
-
- /* now we've finished traverse, it's safe to do deletion. */
- talloc_free(delete_cons);
-
- /* If there are no more connections to kill we can remove the
- entire killtcp structure
- */
- if ( (killtcp->connections == NULL) ||
- (killtcp->connections->root == NULL) ) {
- talloc_free(killtcp);
- return;
- }
-
- /* try tickling them again in a seconds time
- */
- tevent_add_timer(killtcp->ctdb->ev, killtcp,
- timeval_current_ofs(1, 0),
- ctdb_tickle_sentenced_connections, killtcp);
-}
-
-/*
- destroy the killtcp structure
- */
-static int ctdb_killtcp_destructor(struct ctdb_kill_tcp *killtcp)
-{
- struct ctdb_vnn *tmpvnn;
-
- /* verify that this vnn is still active */
- for (tmpvnn = killtcp->ctdb->vnn; tmpvnn; tmpvnn = tmpvnn->next) {
- if (tmpvnn == killtcp->vnn) {
- break;
- }
- }
-
- if (tmpvnn == NULL) {
- return 0;
- }
-
- if (killtcp->vnn->killtcp != killtcp) {
- return 0;
- }
-
- killtcp->vnn->killtcp = NULL;
-
- return 0;
-}
-
-
-/* nothing fancy here, just unconditionally replace any existing
- connection structure with the new one.
-
- dont even free the old one if it did exist, that one is talloc_stolen
- by the same node in the tree anyway and will be deleted when the new data
- is deleted
-*/
-static void *add_killtcp_callback(void *parm, void *data)
-{
- return parm;
-}
-
-/*
- add a tcp socket to the list of connections we want to RST
- */
-static int ctdb_killtcp_add_connection(struct ctdb_context *ctdb,
- ctdb_sock_addr *s,
- ctdb_sock_addr *d)
-{
- ctdb_sock_addr src, dst;
- struct ctdb_kill_tcp *killtcp;
- struct ctdb_killtcp_con *con;
- struct ctdb_vnn *vnn;
-
- ctdb_canonicalize_ip(s, &src);
- ctdb_canonicalize_ip(d, &dst);
-
- vnn = find_public_ip_vnn(ctdb, &dst);
- if (vnn == NULL) {
- vnn = find_public_ip_vnn(ctdb, &src);
- }
- if (vnn == NULL) {
- /* if it is not a public ip it could be our 'single ip' */
- if (ctdb->single_ip_vnn) {
- if (ctdb_same_ip(&ctdb->single_ip_vnn->public_address, &dst)) {
- vnn = ctdb->single_ip_vnn;
- }
- }
- }
- if (vnn == NULL) {
- DEBUG(DEBUG_ERR,(__location__ " Could not killtcp, not a public address\n"));
- return -1;
- }
-
- killtcp = vnn->killtcp;
-
- /* If this is the first connection to kill we must allocate
- a new structure
- */
- if (killtcp == NULL) {
- killtcp = talloc_zero(vnn, struct ctdb_kill_tcp);
- CTDB_NO_MEMORY(ctdb, killtcp);
-
- killtcp->vnn = vnn;
- killtcp->ctdb = ctdb;
- killtcp->capture_fd = -1;
- killtcp->connections = trbt_create(killtcp, 0);
-
- vnn->killtcp = killtcp;
- talloc_set_destructor(killtcp, ctdb_killtcp_destructor);
- }
-
-
-
- /* create a structure that describes this connection we want to
- RST and store it in killtcp->connections
- */
- con = talloc(killtcp, struct ctdb_killtcp_con);
- CTDB_NO_MEMORY(ctdb, con);
- con->src_addr = src;
- con->dst_addr = dst;
- con->count = 0;
- con->killtcp = killtcp;
-
-
- trbt_insertarray32_callback(killtcp->connections,
- KILLTCP_KEYLEN, killtcp_key(&con->dst_addr, &con->src_addr),
- add_killtcp_callback, con);
-
- /*
- If we dont have a socket to listen on yet we must create it
- */
- if (killtcp->capture_fd == -1) {
- const char *iface = ctdb_vnn_iface_string(vnn);
- killtcp->capture_fd = ctdb_sys_open_capture_socket(iface, &killtcp->private_data);
- if (killtcp->capture_fd == -1) {
- DEBUG(DEBUG_CRIT,(__location__ " Failed to open capturing "
- "socket on iface '%s' for killtcp (%s)\n",
- iface, strerror(errno)));
- goto failed;
- }
- }
-
-
- if (killtcp->fde == NULL) {
- killtcp->fde = tevent_add_fd(ctdb->ev, killtcp,
- killtcp->capture_fd,
- TEVENT_FD_READ,
- capture_tcp_handler, killtcp);
- tevent_fd_set_auto_close(killtcp->fde);
-
- /* We also need to set up some events to tickle all these connections
- until they are all reset
- */
- tevent_add_timer(ctdb->ev, killtcp, timeval_current_ofs(1, 0),
- ctdb_tickle_sentenced_connections, killtcp);
- }
-
- /* tickle him once now */
- ctdb_sys_send_tcp(
- &con->dst_addr,
- &con->src_addr,
- 0, 0, 0);
-
- return 0;
-
-failed:
- talloc_free(vnn->killtcp);
- vnn->killtcp = NULL;
- return -1;
-}
-
-/*
- kill a TCP connection.
- */
-int32_t ctdb_control_kill_tcp(struct ctdb_context *ctdb, TDB_DATA indata)
-{
- struct ctdb_connection *killtcp = (struct ctdb_connection *)indata.dptr;
-
- return ctdb_killtcp_add_connection(ctdb, &killtcp->src, &killtcp->dst);
-}
-
/*
called by a daemon to inform us of the entire list of TCP tickles for
a particular public address.
*/
int32_t ctdb_control_set_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata)
{
- struct ctdb_control_tcp_tickle_list *list = (struct ctdb_control_tcp_tickle_list *)indata.dptr;
+ struct ctdb_tickle_list_old *list = (struct ctdb_tickle_list_old *)indata.dptr;
struct ctdb_tcp_array *tcparray;
struct ctdb_vnn *vnn;
/* We must at least have tickles.num or else we cant verify the size
of the received data blob
*/
- if (indata.dsize < offsetof(struct ctdb_control_tcp_tickle_list,
- tickles.connections)) {
- DEBUG(DEBUG_ERR,("Bad indata in ctdb_control_set_tcp_tickle_list. Not enough data for the tickle.num field\n"));
+ if (indata.dsize < offsetof(struct ctdb_tickle_list_old, connections)) {
+ DEBUG(DEBUG_ERR,("Bad indata in ctdb_tickle_list. Not enough data for the tickle.num field\n"));
return -1;
}
/* verify that the size of data matches what we expect */
- if (indata.dsize < offsetof(struct ctdb_control_tcp_tickle_list,
- tickles.connections)
- + sizeof(struct ctdb_connection)
- * list->tickles.num) {
- DEBUG(DEBUG_ERR,("Bad indata in ctdb_control_set_tcp_tickle_list\n"));
+ if (indata.dsize < offsetof(struct ctdb_tickle_list_old, connections)
+ + sizeof(struct ctdb_connection) * list->num) {
+ DEBUG(DEBUG_ERR,("Bad indata in ctdb_tickle_list\n"));
return -1;
}
return 1;
}
+ if (vnn->pnn == ctdb->pnn) {
+ DEBUG(DEBUG_INFO,
+ ("Ignoring redundant set tcp tickle list, this node hosts '%s'\n",
+ ctdb_addr_to_str(&list->addr)));
+ return 0;
+ }
+
/* remove any old ticklelist we might have */
talloc_free(vnn->tcp_array);
vnn->tcp_array = NULL;
tcparray = talloc(vnn, struct ctdb_tcp_array);
CTDB_NO_MEMORY(ctdb, tcparray);
- tcparray->num = list->tickles.num;
+ tcparray->num = list->num;
tcparray->connections = talloc_array(tcparray, struct ctdb_connection, tcparray->num);
CTDB_NO_MEMORY(ctdb, tcparray->connections);
- memcpy(tcparray->connections, &list->tickles.connections[0],
+ memcpy(tcparray->connections, &list->connections[0],
sizeof(struct ctdb_connection)*tcparray->num);
/* We now have a new fresh tickle list array for this vnn */
int32_t ctdb_control_get_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata)
{
ctdb_sock_addr *addr = (ctdb_sock_addr *)indata.dptr;
- struct ctdb_control_tcp_tickle_list *list;
+ struct ctdb_tickle_list_old *list;
struct ctdb_tcp_array *tcparray;
- int num;
+ int num, i;
struct ctdb_vnn *vnn;
+ unsigned port;
vnn = find_public_ip_vnn(ctdb, addr);
if (vnn == NULL) {
- DEBUG(DEBUG_ERR,(__location__ " Could not get tcp tickle list, '%s' is not a public address\n",
+ DEBUG(DEBUG_ERR,(__location__ " Could not get tcp tickle list, '%s' is not a public address\n",
ctdb_addr_to_str(addr)));
return 1;
}
+ port = ctdb_addr_to_port(addr);
+
tcparray = vnn->tcp_array;
- if (tcparray) {
- num = tcparray->num;
- } else {
- num = 0;
+ num = 0;
+ if (tcparray != NULL) {
+ if (port == 0) {
+ /* All connections */
+ num = tcparray->num;
+ } else {
+ /* Count connections for port */
+ for (i = 0; i < tcparray->num; i++) {
+ if (port == ctdb_addr_to_port(&tcparray->connections[i].dst)) {
+ num++;
+ }
+ }
+ }
}
- outdata->dsize = offsetof(struct ctdb_control_tcp_tickle_list,
- tickles.connections)
+ outdata->dsize = offsetof(struct ctdb_tickle_list_old, connections)
+ sizeof(struct ctdb_connection) * num;
outdata->dptr = talloc_size(outdata, outdata->dsize);
CTDB_NO_MEMORY(ctdb, outdata->dptr);
- list = (struct ctdb_control_tcp_tickle_list *)outdata->dptr;
+ list = (struct ctdb_tickle_list_old *)outdata->dptr;
list->addr = *addr;
- list->tickles.num = num;
- if (num) {
- memcpy(&list->tickles.connections[0], tcparray->connections,
- sizeof(struct ctdb_connection) * num);
+ list->num = num;
+
+ if (num == 0) {
+ return 0;
+ }
+
+ num = 0;
+ for (i = 0; i < tcparray->num; i++) {
+ if (port == 0 || \
+ port == ctdb_addr_to_port(&tcparray->connections[i].dst)) {
+ list->connections[num] = tcparray->connections[i];
+ num++;
+ }
}
return 0;
{
int ret, num;
TDB_DATA data;
- struct ctdb_control_tcp_tickle_list *list;
+ struct ctdb_tickle_list_old *list;
if (tcparray) {
num = tcparray->num;
num = 0;
}
- data.dsize = offsetof(struct ctdb_control_tcp_tickle_list,
- tickles.connections) +
+ data.dsize = offsetof(struct ctdb_tickle_list_old, connections) +
sizeof(struct ctdb_connection) * num;
data.dptr = talloc_size(ctdb, data.dsize);
CTDB_NO_MEMORY(ctdb, data.dptr);
- list = (struct ctdb_control_tcp_tickle_list *)data.dptr;
+ list = (struct ctdb_tickle_list_old *)data.dptr;
list->addr = *addr;
- list->tickles.num = num;
+ list->num = num;
if (tcparray) {
- memcpy(&list->tickles.connections[0], tcparray->connections, sizeof(struct ctdb_connection) * num);
+ memcpy(&list->connections[0], tcparray->connections, sizeof(struct ctdb_connection) * num);
}
ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_ALL, 0,
*/
int32_t ctdb_control_send_gratious_arp(struct ctdb_context *ctdb, TDB_DATA indata)
{
- struct ctdb_control_gratious_arp *gratious_arp = (struct ctdb_control_gratious_arp *)indata.dptr;
+ struct ctdb_addr_info_old *gratious_arp = (struct ctdb_addr_info_old *)indata.dptr;
struct control_gratious_arp *arp;
/* verify the size of indata */
- if (indata.dsize < offsetof(struct ctdb_control_gratious_arp, iface)) {
+ if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_gratious_arp structure. Got %u require %u bytes\n",
(unsigned)indata.dsize,
- (unsigned)offsetof(struct ctdb_control_gratious_arp, iface)));
+ (unsigned)offsetof(struct ctdb_addr_info_old, iface)));
return -1;
}
if (indata.dsize !=
- ( offsetof(struct ctdb_control_gratious_arp, iface)
+ ( offsetof(struct ctdb_addr_info_old, iface)
+ gratious_arp->len ) ){
DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
"but should be %u bytes\n",
(unsigned)indata.dsize,
- (unsigned)(offsetof(struct ctdb_control_gratious_arp, iface)+gratious_arp->len)));
+ (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+gratious_arp->len)));
return -1;
}
int32_t ctdb_control_add_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
{
- struct ctdb_control_ip_iface *pub = (struct ctdb_control_ip_iface *)indata.dptr;
+ struct ctdb_addr_info_old *pub = (struct ctdb_addr_info_old *)indata.dptr;
int ret;
/* verify the size of indata */
- if (indata.dsize < offsetof(struct ctdb_control_ip_iface, iface)) {
- DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_ip_iface structure\n"));
+ if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
+ DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_addr_info structure\n"));
return -1;
}
if (indata.dsize !=
- ( offsetof(struct ctdb_control_ip_iface, iface)
+ ( offsetof(struct ctdb_addr_info_old, iface)
+ pub->len ) ){
DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
"but should be %u bytes\n",
(unsigned)indata.dsize,
- (unsigned)(offsetof(struct ctdb_control_ip_iface, iface)+pub->len)));
+ (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+pub->len)));
return -1;
}
struct ctdb_req_control_old *c,
TDB_DATA indata, bool *async_reply)
{
- struct ctdb_control_ip_iface *pub = (struct ctdb_control_ip_iface *)indata.dptr;
+ struct ctdb_addr_info_old *pub = (struct ctdb_addr_info_old *)indata.dptr;
struct ctdb_vnn *vnn;
/* verify the size of indata */
- if (indata.dsize < offsetof(struct ctdb_control_ip_iface, iface)) {
- DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_ip_iface structure\n"));
+ if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
+ DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_addr_info structure\n"));
return -1;
}
if (indata.dsize !=
- ( offsetof(struct ctdb_control_ip_iface, iface)
+ ( offsetof(struct ctdb_addr_info_old, iface)
+ pub->len ) ){
DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
"but should be %u bytes\n",
(unsigned)indata.dsize,
- (unsigned)(offsetof(struct ctdb_control_ip_iface, iface)+pub->len)));
+ (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+pub->len)));
return -1;
}
node has the expected ip allocation.
This is verified against ctdb->ip_tree
*/
-int verify_remote_ip_allocation(struct ctdb_context *ctdb,
- struct ctdb_public_ip_list_old *ips,
- uint32_t pnn)
+static int verify_remote_ip_allocation(struct ctdb_context *ctdb,
+ struct ctdb_public_ip_list *ips,
+ uint32_t pnn)
{
struct public_ip_list *tmp_ip;
int i;
if (ctdb->ip_tree == NULL) {
- /* dont know the expected allocation yet, assume remote node
+ /* don't know the expected allocation yet, assume remote node
is correct. */
return 0;
}
}
for (i=0; i<ips->num; i++) {
- tmp_ip = trbt_lookuparray32(ctdb->ip_tree, IP_KEYLEN, ip_key(&ips->ips[i].addr));
+ tmp_ip = trbt_lookuparray32(ctdb->ip_tree, IP_KEYLEN, ip_key(&ips->ip[i].addr));
if (tmp_ip == NULL) {
- DEBUG(DEBUG_ERR,("Node %u has new or unknown public IP %s\n", pnn, ctdb_addr_to_str(&ips->ips[i].addr)));
+ DEBUG(DEBUG_ERR,("Node %u has new or unknown public IP %s\n", pnn, ctdb_addr_to_str(&ips->ip[i].addr)));
return -1;
}
- if (tmp_ip->pnn == -1 || ips->ips[i].pnn == -1) {
+ if (tmp_ip->pnn == -1 || ips->ip[i].pnn == -1) {
continue;
}
- if (tmp_ip->pnn != ips->ips[i].pnn) {
+ if (tmp_ip->pnn != ips->ip[i].pnn) {
DEBUG(DEBUG_ERR,
("Inconsistent IP allocation - node %u thinks %s is held by node %u while it is assigned to node %u\n",
pnn,
- ctdb_addr_to_str(&ips->ips[i].addr),
- ips->ips[i].pnn, tmp_ip->pnn));
+ ctdb_addr_to_str(&ips->ip[i].addr),
+ ips->ip[i].pnn, tmp_ip->pnn));
return -1;
}
}
if (vnn == NULL) {
/* Delete IP ips->ips[i] */
- struct ctdb_control_ip_iface *pub;
+ struct ctdb_addr_info_old *pub;
DEBUG(DEBUG_NOTICE,
("IP %s no longer configured, deleting it\n",
ctdb_addr_to_str(&ips->ips[i].addr)));
- pub = talloc_zero(mem_ctx,
- struct ctdb_control_ip_iface);
+ pub = talloc_zero(mem_ctx, struct ctdb_addr_info_old);
CTDB_NO_MEMORY(ctdb, pub);
pub->addr = ips->ips[i].addr;
timeout = TAKEOVER_TIMEOUT();
- data.dsize = offsetof(struct ctdb_control_ip_iface,
+ data.dsize = offsetof(struct ctdb_addr_info_old,
iface) + pub->len;
data.dptr = (uint8_t *)pub;
}
if (i == ips->num) {
/* Add IP ips->ips[i] */
- struct ctdb_control_ip_iface *pub;
+ struct ctdb_addr_info_old *pub;
const char *ifaces = NULL;
uint32_t len;
int iface = 0;
len = strlen(ifaces) + 1;
pub = talloc_zero_size(mem_ctx,
- offsetof(struct ctdb_control_ip_iface, iface) + len);
+ offsetof(struct ctdb_addr_info_old, iface) + len);
CTDB_NO_MEMORY(ctdb, pub);
pub->addr = vnn->public_address;
timeout = TAKEOVER_TIMEOUT();
- data.dsize = offsetof(struct ctdb_control_ip_iface,
+ data.dsize = offsetof(struct ctdb_addr_info_old,
iface) + pub->len;
data.dptr = (uint8_t *)pub;
close(h->fd[0]);
debug_extra = talloc_asprintf(NULL, "reloadips:");
- ctdb_set_process_name("ctdb_reloadips");
+ prctl_set_comment("ctdb_reloadips");
if (switch_from_server_to_client(ctdb, "reloadips-child") != 0) {
DEBUG(DEBUG_CRIT,("ERROR: Failed to switch reloadips child into client mode\n"));
res = -1;
}
sys_write(h->fd[1], &res, 1);
- /* make sure we die when our parent dies */
- while (ctdb_kill(ctdb, parent, 0) == 0 || errno != ESRCH) {
- sleep(5);
- }
+ ctdb_wait_for_process_to_exit(parent);
_exit(0);
}