+ /* We know if we have an unassigned addresses so we might as
+ * well optimise.
+ */
+ if (have_unassigned) {
+ for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
+ if (tmp_ip->pnn == -1) {
+ DEBUG(DEBUG_WARNING,("Failed to find node to cover ip %s\n",
+ ctdb_addr_to_str(&tmp_ip->addr)));
+ }
+ }
+ }
+}
+
+/* LCP2 algorithm for rebalancing the cluster. This finds the source
+ * node with the highest LCP2 imbalance, and then determines the best
+ * IP/destination node combination to move from the source node.
+ *
+ * Not static, so we can easily link it into a unit test.
+ */
+bool lcp2_failback(struct ctdb_context *ctdb,
+ struct ctdb_node_map *nodemap,
+ uint32_t mask,
+ struct ctdb_public_ip_list *all_ips,
+ uint32_t *lcp2_imbalances,
+ bool *newly_healthy)
+{
+ int srcnode, dstnode, mindstnode, i, num_newly_healthy;
+ uint32_t srcimbl, srcdsum, maximbl, dstimbl, dstdsum;
+ uint32_t minsrcimbl, mindstimbl, b;
+ struct ctdb_public_ip_list *minip;
+ struct ctdb_public_ip_list *tmp_ip;
+
+ /* It is only worth continuing if we have suitable target
+ * nodes to transfer IPs to. This check is much cheaper than
+ * continuing on...
+ */
+ num_newly_healthy = 0;
+ for (i = 0; i < nodemap->num; i++) {
+ if (newly_healthy[i]) {
+ num_newly_healthy++;
+ }
+ }
+ if (num_newly_healthy == 0) {
+ return false;
+ }
+
+ /* Get the node with the highest imbalance metric. */
+ srcnode = -1;
+ maximbl = 0;
+ for (i=0; i < nodemap->num; i++) {
+ b = lcp2_imbalances[i];
+ if ((srcnode == -1) || (b > maximbl)) {
+ srcnode = i;
+ maximbl = b;
+ }
+ }
+
+ /* This means that all nodes had 0 or 1 addresses, so can't be
+ * imbalanced.
+ */
+ if (maximbl == 0) {
+ return false;
+ }
+
+ /* Find an IP and destination node that best reduces imbalance. */
+ minip = NULL;
+ minsrcimbl = 0;
+ mindstnode = -1;
+ mindstimbl = 0;
+
+ DEBUG(DEBUG_DEBUG,(" ----------------------------------------\n"));
+ DEBUG(DEBUG_DEBUG,(" CONSIDERING MOVES FROM %d [%d]\n", srcnode, maximbl));
+
+ for (tmp_ip=all_ips; tmp_ip; tmp_ip=tmp_ip->next) {
+ /* Only consider addresses on srcnode. */
+ if (tmp_ip->pnn != srcnode) {
+ continue;
+ }
+
+ /* What is this IP address costing the source node? */
+ srcdsum = ip_distance_2_sum(&(tmp_ip->addr), all_ips, srcnode);
+ srcimbl = maximbl - srcdsum;
+
+ /* Consider this IP address would cost each potential
+ * destination node. Destination nodes are limited to
+ * those that are newly healthy, since we don't want
+ * to do gratuitous failover of IPs just to make minor
+ * balance improvements.
+ */
+ for (dstnode=0; dstnode < nodemap->num; dstnode++) {
+ if (! newly_healthy[dstnode]) {
+ continue;
+ }
+ /* only check nodes that can actually serve this ip */
+ if (can_node_serve_ip(ctdb, dstnode, tmp_ip)) {
+ /* no it couldnt so skip to the next node */
+ continue;
+ }
+
+ dstdsum = ip_distance_2_sum(&(tmp_ip->addr), all_ips, dstnode);
+ dstimbl = lcp2_imbalances[dstnode] + dstdsum;
+ DEBUG(DEBUG_DEBUG,(" %d [%d] -> %s -> %d [+%d]\n",
+ srcnode, srcimbl - lcp2_imbalances[srcnode],
+ ctdb_addr_to_str(&(tmp_ip->addr)),
+ dstnode, dstimbl - lcp2_imbalances[dstnode]));
+
+ if ((dstimbl < maximbl) && (dstdsum < srcdsum) && \
+ ((mindstnode == -1) || \
+ ((srcimbl + dstimbl) < (minsrcimbl + mindstimbl)))) {
+
+ minip = tmp_ip;
+ minsrcimbl = srcimbl;
+ mindstnode = dstnode;
+ mindstimbl = dstimbl;
+ }
+ }
+ }
+ DEBUG(DEBUG_DEBUG,(" ----------------------------------------\n"));
+
+ if (mindstnode != -1) {
+ /* We found a move that makes things better... */
+ DEBUG(DEBUG_INFO,("%d [%d] -> %s -> %d [+%d]\n",
+ srcnode, minsrcimbl - lcp2_imbalances[srcnode],
+ ctdb_addr_to_str(&(minip->addr)),
+ mindstnode, mindstimbl - lcp2_imbalances[mindstnode]));
+
+
+ lcp2_imbalances[srcnode] = srcimbl;
+ lcp2_imbalances[mindstnode] = mindstimbl;
+ minip->pnn = mindstnode;
+
+ return true;
+ }
+
+ return false;
+
+}
+
+/* The calculation part of the IP allocation algorithm.
+ * Not static, so we can easily link it into a unit test.
+ */
+void ctdb_takeover_run_core(struct ctdb_context *ctdb,
+ struct ctdb_node_map *nodemap,
+ struct ctdb_public_ip_list **all_ips_p)
+{
+ int i, num_healthy, retries, num_ips;
+ uint32_t mask;
+ struct ctdb_public_ip_list *all_ips, *tmp_ip;
+ uint32_t *lcp2_imbalances;
+ bool *newly_healthy;
+
+ TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
+
+ /* Count how many completely healthy nodes we have */
+ num_healthy = 0;
+ for (i=0;i<nodemap->num;i++) {
+ if (!(nodemap->nodes[i].flags & (NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED))) {
+ num_healthy++;
+ }
+ }
+
+ if (num_healthy > 0) {
+ /* We have healthy nodes, so only consider them for
+ serving public addresses
+ */
+ mask = NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED;
+ } else {
+ /* We didnt have any completely healthy nodes so
+ use "disabled" nodes as a fallback
+ */
+ mask = NODE_FLAGS_INACTIVE;
+ }
+
+ /* since nodes only know about those public addresses that
+ can be served by that particular node, no single node has
+ a full list of all public addresses that exist in the cluster.
+ Walk over all node structures and create a merged list of
+ all public addresses that exist in the cluster.
+
+ keep the tree of ips around as ctdb->ip_tree
+ */
+ all_ips = create_merged_ip_list(ctdb);
+ *all_ips_p = all_ips; /* minimal code changes */
+
+ /* Count how many ips we have */
+ num_ips = 0;
+ for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
+ num_ips++;
+ }
+
+ /* If we want deterministic ip allocations, i.e. that the ip addresses
+ will always be allocated the same way for a specific set of
+ available/unavailable nodes.
+ */
+ if (1 == ctdb->tunable.deterministic_public_ips) {
+ DEBUG(DEBUG_NOTICE,("Deterministic IPs enabled. Resetting all ip allocations\n"));
+ for (i=0,tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next,i++) {
+ tmp_ip->pnn = i%nodemap->num;
+ }
+ }
+
+
+ /* mark all public addresses with a masked node as being served by
+ node -1
+ */
+ for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
+ if (tmp_ip->pnn == -1) {
+ continue;
+ }
+ if (nodemap->nodes[tmp_ip->pnn].flags & mask) {
+ tmp_ip->pnn = -1;
+ }
+ }
+
+ /* verify that the assigned nodes can serve that public ip
+ and set it to -1 if not
+ */
+ for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
+ if (tmp_ip->pnn == -1) {
+ continue;
+ }
+ if (can_node_serve_ip(ctdb, tmp_ip->pnn, tmp_ip) != 0) {
+ /* this node can not serve this ip. */
+ tmp_ip->pnn = -1;
+ }
+ }
+
+ if (1 == ctdb->tunable.lcp2_public_ip_assignment) {
+ lcp2_init(tmp_ctx, nodemap, mask, all_ips, &lcp2_imbalances, &newly_healthy);
+ }
+
+ /* now we must redistribute all public addresses with takeover node
+ -1 among the nodes available
+ */
+ retries = 0;
+try_again:
+ if (1 == ctdb->tunable.lcp2_public_ip_assignment) {
+ lcp2_allocate_unassigned(ctdb, nodemap, mask, all_ips, lcp2_imbalances);
+ } else {
+ basic_allocate_unassigned(ctdb, nodemap, mask, all_ips);
+ }
+
+ /* If we dont want ips to fail back after a node becomes healthy
+ again, we wont even try to reallocat the ip addresses so that
+ they are evenly spread out.
+ This can NOT be used at the same time as DeterministicIPs !
+ */
+ if (1 == ctdb->tunable.no_ip_failback) {
+ if (1 == ctdb->tunable.deterministic_public_ips) {
+ DEBUG(DEBUG_ERR, ("ERROR: You can not use 'DeterministicIPs' and 'NoIPFailback' at the same time\n"));
+ }
+ goto finished;
+ }
+
+
+ /* now, try to make sure the ip adresses are evenly distributed
+ across the node.
+ */
+ if (1 == ctdb->tunable.lcp2_public_ip_assignment) {
+ if (lcp2_failback(ctdb, nodemap, mask, all_ips, lcp2_imbalances, newly_healthy)) {
+ goto try_again;
+ }
+ } else {
+ if (basic_failback(ctdb, nodemap, mask, all_ips, num_ips, &retries)) {
+ goto try_again;
+ }
+ }