recoverd: Do not send "ipreallocated" event to stopped nodes
[ctdb.git] / server / ctdb_takeover.c
index 20311724834e71988fbfc104576bb89e9b935ce3..1e8dc757c5dafc0a26f4a59bc51a5ec9f862dff4 100644 (file)
@@ -1701,6 +1701,25 @@ bool lcp2_failback_candidate(struct ctdb_context *ctdb,
        
 }
 
+struct lcp2_imbalance_pnn {
+       uint32_t imbalance;
+       int pnn;
+};
+
+int lcp2_cmp_imbalance_pnn(const void * a, const void * b)
+{
+       const struct lcp2_imbalance_pnn * lipa = (const struct lcp2_imbalance_pnn *) a;
+       const struct lcp2_imbalance_pnn * lipb = (const struct lcp2_imbalance_pnn *) b;
+
+       if (lipa->imbalance > lipb->imbalance) {
+               return -1;
+       } else if (lipa->imbalance == lipb->imbalance) {
+               return 0;
+       } else {
+               return 1;
+       }
+}
+
 /* LCP2 algorithm for rebalancing the cluster.  This finds the source
  * node with the highest LCP2 imbalance, and then determines the best
  * IP/destination node combination to move from the source node.
@@ -1714,8 +1733,9 @@ bool lcp2_failback(struct ctdb_context *ctdb,
                   uint32_t *lcp2_imbalances,
                   bool *newly_healthy)
 {
-       int srcnode, i, num_newly_healthy;
-       uint32_t maximbl, b;
+       int i, num_newly_healthy;
+       struct lcp2_imbalance_pnn * lips;
+       bool ret;
 
        /* It is only worth continuing if we have suitable target
         * nodes to transfer IPs to.  This check is much cheaper than
@@ -1731,31 +1751,41 @@ bool lcp2_failback(struct ctdb_context *ctdb,
                return false;
        }
 
-        /* Get the node with the highest imbalance metric. */
-        srcnode = -1;
-        maximbl = 0;
-       for (i=0; i < nodemap->num; i++) {
-               b = lcp2_imbalances[i];
-               if ((srcnode == -1) || (b > maximbl)) {
-                       srcnode = i;
-                       maximbl = b;
-               }
+       /* Put the imbalances and nodes into an array, sort them and
+        * iterate through candidates.  Usually the 1st one will be
+        * used, so this doesn't cost much...
+        */
+       lips = talloc_array(ctdb, struct lcp2_imbalance_pnn, nodemap->num);
+       for (i = 0; i < nodemap->num; i++) {
+               lips[i].imbalance = lcp2_imbalances[i];
+               lips[i].pnn = i;
        }
+       qsort(lips, nodemap->num, sizeof(struct lcp2_imbalance_pnn),
+             lcp2_cmp_imbalance_pnn);
 
-        /* This means that all nodes had 0 or 1 addresses, so can't be
-        * imbalanced.
-        */
-        if (maximbl == 0) {
-               return false;
+       ret = false;
+       for (i = 0; i < nodemap->num; i++) {
+               /* This means that all nodes had 0 or 1 addresses, so
+                * can't be imbalanced.
+                */
+               if (lips[i].imbalance == 0) {
+                       break;
+               }
+
+               if (lcp2_failback_candidate(ctdb,
+                                           nodemap,
+                                           all_ips,
+                                           lips[i].pnn,
+                                           lips[i].imbalance,
+                                           lcp2_imbalances,
+                                           newly_healthy)) {
+                       ret = true;
+                       break;
+               }
        }
 
-       return lcp2_failback_candidate(ctdb,
-                                      nodemap,
-                                      all_ips,
-                                      srcnode,
-                                      maximbl,
-                                      lcp2_imbalances,
-                                      newly_healthy);
+       talloc_free(lips);
+       return ret;
 }
 
 /* The calculation part of the IP allocation algorithm.
@@ -1903,7 +1933,8 @@ finished:
 /*
   make any IP alias changes for public addresses that are necessary 
  */
-int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap)
+int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap,
+                     client_async_callback fail_callback, void *callback_data)
 {
        int i;
        struct ctdb_public_ip ip;
@@ -1935,6 +1966,9 @@ int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap)
        async_data = talloc_zero(tmp_ctx, struct client_async_data);
        CTDB_NO_MEMORY_FATAL(ctdb, async_data);
 
+       async_data->fail_callback = fail_callback;
+       async_data->callback_data = callback_data;
+
        for (i=0;i<nodemap->num;i++) {
                /* don't talk to unconnected nodes, but do talk to banned nodes */
                if (nodemap->nodes[i].flags & NODE_FLAGS_DISCONNECTED) {
@@ -1992,6 +2026,10 @@ int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap)
        /* tell all nodes to get their own IPs */
        async_data = talloc_zero(tmp_ctx, struct client_async_data);
        CTDB_NO_MEMORY_FATAL(ctdb, async_data);
+
+       async_data->fail_callback = fail_callback;
+       async_data->callback_data = callback_data;
+
        for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
                if (tmp_ip->pnn == -1) {
                        /* this IP won't be taken over */
@@ -2036,16 +2074,23 @@ int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap)
        }
 
 ipreallocated:
-       /* tell all nodes to update natwg */
-       /* send the flags update natgw on all connected nodes */
+       /*
+        * Tell all connected, but not stopped (since they are in
+        * recovery and will reject the event), nodes to run
+        * eventscripts to process the "ipreallocated" event.  This
+        * can do a lot of things, including restarting services to
+        * reconfigure them if public IPs have moved.  Once upon a
+        * time this event only used to update natwg.
+        */
        data.dptr  = discard_const("ipreallocated");
        data.dsize = strlen((char *)data.dptr) + 1; 
-       nodes = list_of_connected_nodes(ctdb, nodemap, tmp_ctx, true);
+       nodes = list_of_nodes(ctdb, nodemap, tmp_ctx,
+                             NODE_FLAGS_DISCONNECTED|NODE_FLAGS_STOPPED, -1);
        if (ctdb_client_async_control(ctdb, CTDB_CONTROL_RUN_EVENTSCRIPTS,
                                      nodes, 0, TAKEOVER_TIMEOUT(),
                                      false, data,
-                                     NULL, NULL,
-                                     NULL) != 0) {
+                                     NULL, fail_callback,
+                                     callback_data) != 0) {
                DEBUG(DEBUG_ERR, (__location__ " ctdb_control to updatenatgw failed\n"));
        }
 
@@ -2850,9 +2895,25 @@ static void ctdb_tickle_sentenced_connections(struct event_context *ev, struct t
  */
 static int ctdb_killtcp_destructor(struct ctdb_kill_tcp *killtcp)
 {
-       if (killtcp->vnn) {
-               killtcp->vnn->killtcp = NULL;
+       struct ctdb_vnn *tmpvnn;
+
+       /* verify that this vnn is still active */
+       for (tmpvnn = killtcp->ctdb->vnn; tmpvnn; tmpvnn = tmpvnn->next) {
+               if (tmpvnn == killtcp->vnn) {
+                       break;
+               }
+       }
+
+       if (tmpvnn == NULL) {
+               return 0;
        }
+
+       if (killtcp->vnn->killtcp != killtcp) {
+               return 0;
+       }
+
+       killtcp->vnn->killtcp = NULL;
+
        return 0;
 }
 
@@ -2907,7 +2968,7 @@ static int ctdb_killtcp_add_connection(struct ctdb_context *ctdb,
           a new structure
         */
        if (killtcp == NULL) {
-               killtcp = talloc_zero(ctdb, struct ctdb_kill_tcp);
+               killtcp = talloc_zero(vnn, struct ctdb_kill_tcp);
                CTDB_NO_MEMORY(ctdb, killtcp);
 
                killtcp->vnn         = vnn;