ZERO_STRUCT(ip);
- /* Work out which node will look after each public IP.
- * takeover_node cycles over the nodes and is incremented each time a
- * node has been assigned to take over for another node.
- * This spreads the failed nodes out across the remaining
- * nodes more evenly
- */
+ /* Count how many completely healthy nodes we have */
+ num_healthy = 0;
for (i=0;i<nodemap->num;i++) {
if (!(nodemap->nodes[i].flags & (NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED))) {
- ctdb->nodes[i]->takeover_vnn = nodemap->nodes[i].vnn;
- } else {
- uint32_t takeover_vnn;
+ num_healthy++;
+ }
+ }
- /* If this public address has already been taken over
- by a node and that node is still healthy, then
- leave the public address at that node.
- */
- takeover_vnn = ctdb->nodes[i]->takeover_vnn;
- if ( ctdb_validate_vnn(ctdb, takeover_vnn)
- && (!(nodemap->nodes[takeover_vnn].flags & (NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED))) ) {
- continue;
+ if (num_healthy > 0) {
+ /* We have healthy nodes, so only consider them for
+ serving public addresses
+ */
+ mask = NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED;
+ } else {
+ /* We didnt have any completely healthy nodes so
+ use "disabled" nodes as a fallback
+ */
+ mask = NODE_FLAGS_INACTIVE;
+ }
+
+ /* since nodes only know about those public addresses that
+ can be served by that particular node, no single node has
+ a full list of all public addresses that exist in the cluster.
+ Walk over all node structures and create a merged list of
+ all public addresses that exist in the cluster.
+ */
+ all_ips = create_merged_ip_list(ctdb, tmp_ctx);
+
+
+ /* mark all public addresses with a masked node as being served by
+ node -1
+ */
+ for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
+ if (tmp_ip->pnn == -1) {
+ continue;
+ }
+ if (nodemap->nodes[tmp_ip->pnn].flags & mask) {
+ tmp_ip->pnn = -1;
+ }
+ }
+
+
+ /* now we must redistribute all public addresses with takeover node
+ -1 among the nodes available
+ */
+ retries = 0;
+ try_again:
+ /* loop over all ip's and find a physical node to cover for
+ each unassigned ip.
+ */
+ for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
+ if (tmp_ip->pnn == -1) {
+ if (find_takeover_node(ctdb, nodemap, mask, tmp_ip, all_ips)) {
+ DEBUG(0,("Failed to find node to cover ip %s\n", inet_ntoa(tmp_ip->sin.sin_addr)));
}
+ }
+ }
+
+
+ /* now, try to make sure the ip adresses are evenly distributed
+ across the node.
+ for each ip address, loop over all nodes that can serve this
+ ip and make sure that the difference between the node
+ serving the most and the node serving the least ip's are not greater
+ than 1.
+ */
+ for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
+ if (tmp_ip->pnn == -1) {
+ continue;
+ }
+ /* Get the highest and lowest number of ips's served by any
+ valid node which can serve this ip.
+ */
+ maxnode = -1;
+ minnode = -1;
+ for (i=0;i<nodemap->num;i++) {
+ if (nodemap->nodes[i].flags & mask) {
+ continue;
+ }
- ctdb->nodes[i]->takeover_vnn = (uint32_t)-1;
+ /* only check nodes that can actually serve this ip */
+ if (can_node_serve_ip(ctdb, i, tmp_ip)) {
+ /* no it couldnt so skip to the next node */
+ continue;
+ }
- ctdb_takeover_find_node(ctdb, nodemap, i, NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED);
-
- /* if no enabled node can take it, then we
- might as well use any active node. It
- probably means that some subsystem (such as
- NFS) is sick on all nodes. Best we can do
- is to keep the other services up. */
- if (ctdb->nodes[i]->takeover_vnn == (uint32_t)-1) {
- ctdb_takeover_find_node(ctdb, nodemap, i, NODE_FLAGS_INACTIVE);
+ num = node_ip_coverage(ctdb, i, all_ips);
+ if (maxnode == -1) {
+ maxnode = i;
+ maxnum = num;
+ } else {
+ if (num > maxnum) {
+ maxnode = i;
+ maxnum = num;
+ }
}
- DEBUG(0,(__location__ " Could not find maxnode. May not be able to server ip '%s'\n", inet_ntoa(tmp_ip->sin.sin_addr)));
+ if (minnode == -1) {
+ minnode = i;
+ minnum = num;
+ } else {
+ if (num < minnum) {
+ minnode = i;
+ minnum = num;
+ }
+ }
+ }
+ if (maxnode == -1) {
++ DEBUG(0,(__location__ " Could not find maxnode. May not be able to serve ip '%s'\n", inet_ntoa(tmp_ip->sin.sin_addr)));
+ continue;
+ }
+
+ /* if the spread between the smallest and largest coverage by
+ a node is >=2 we steal one of the ips from the node with
+ most coverage to even things out a bit.
+ try to do this at most 5 times since we dont want to spend
+ too much time balancing the ip coverage.
+ */
+ if ( (maxnum > minnum+1)
+ && (retries < 5) ){
+ struct ctdb_public_ip_list *tmp;
- if (ctdb->nodes[i]->takeover_vnn == (uint32_t)-1) {
- DEBUG(0,(__location__ " No node available on same network to take %s\n",
- ctdb->nodes[i]->public_address));
+ /* mark one of maxnode's vnn's as unassigned and try
+ again
+ */
+ for (tmp=all_ips;tmp;tmp=tmp->next) {
+ if (tmp->pnn == maxnode) {
+ tmp->pnn = -1;
+ retries++;
+ goto try_again;
+ }
}
}
- }
+ }
+
- /* at this point ctdb->nodes[i]->takeover_vnn is the vnn which will own each IP */
+
+ /* at this point ->pnn is the node which will own each IP
+ or -1 if there is no node that can cover this ip
+ */
/* now tell all nodes to delete any alias that they should not
have. This will be a NOOP on nodes that don't currently