ctdb-takeover: Drop unused ctdb_takeover_run() and related code
authorMartin Schwenke <martin@meltin.net>
Fri, 9 Dec 2016 08:19:49 +0000 (19:19 +1100)
committerAmitay Isaacs <amitay@samba.org>
Mon, 19 Dec 2016 03:07:08 +0000 (04:07 +0100)
Signed-off-by: Martin Schwenke <martin@meltin.net>
Reviewed-by: Amitay Isaacs <amitay@gmail.com>
ctdb/include/ctdb_private.h
ctdb/server/ctdb_takeover.c

index 9716c7cdde3200023731e802f9a39ccb0ff7c4d4..d81ed56d76306954d5d1ee4ad41892f5f98499c6 100644 (file)
@@ -887,9 +887,6 @@ int32_t ctdb_control_ipreallocated(struct ctdb_context *ctdb,
 
 int ctdb_set_public_addresses(struct ctdb_context *ctdb, bool check_addresses);
 
-int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map_old *nodemap,
-                     uint32_t *force_rebalance_nodes);
-
 int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id,
                                TDB_DATA indata);
 int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata,
index 19b7f07b45e3e0a7dec2c1b192c27b6d592ed0b5..513da63cd3823fef85aa0cae517e4a4ad0c723f3 100644 (file)
@@ -1180,458 +1180,6 @@ int ctdb_set_public_addresses(struct ctdb_context *ctdb, bool check_addresses)
        return 0;
 }
 
-static struct ctdb_public_ip_list *
-ctdb_fetch_remote_public_ips(struct ctdb_context *ctdb,
-                            TALLOC_CTX *mem_ctx,
-                            struct ctdb_node_map_old *nodemap,
-                            uint32_t public_ip_flags)
-{
-       int j, ret;
-       struct ctdb_public_ip_list_old *ip_list;
-       struct ctdb_public_ip_list *public_ips;
-
-       public_ips = talloc_zero_array(mem_ctx,
-                                      struct ctdb_public_ip_list,
-                                      nodemap->num);
-       if (public_ips == NULL) {
-               DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
-               return NULL;
-       }
-
-       for (j = 0; j < nodemap->num; j++) {
-               if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) {
-                       continue;
-               }
-
-               /* Retrieve the list of public IPs from the
-                * node. Flags says whether it is known or
-                * available. */
-               ret = ctdb_ctrl_get_public_ips_flags(
-                       ctdb, TAKEOVER_TIMEOUT(), j, public_ips,
-                       public_ip_flags, &ip_list);
-               if (ret != 0) {
-                       DEBUG(DEBUG_ERR,
-                             ("Failed to read public IPs from node: %u\n", j));
-                       talloc_free(public_ips);
-                       return NULL;
-               }
-               public_ips[j].num = ip_list->num;
-               if (ip_list->num == 0) {
-                       talloc_free(ip_list);
-                       continue;
-               }
-               public_ips[j].ip = talloc_zero_array(public_ips,
-                                                    struct ctdb_public_ip,
-                                                    ip_list->num);
-               if (public_ips[j].ip == NULL) {
-                       DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
-                       talloc_free(public_ips);
-                       return NULL;
-               }
-               memcpy(public_ips[j].ip, &ip_list->ips[0],
-                      sizeof(struct ctdb_public_ip) * ip_list->num);
-               talloc_free(ip_list);
-       }
-
-       return public_ips;
-}
-
-static struct ctdb_node_map *
-ctdb_node_map_old_to_new(TALLOC_CTX *mem_ctx,
-                        const struct ctdb_node_map_old *old)
-{
-       struct ctdb_node_map *new;
-
-       new = talloc(mem_ctx, struct ctdb_node_map);
-       if (new == NULL) {
-               DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
-               return NULL;
-       }
-       new->num = old->num;
-       new->node = talloc_zero_array(new,
-                                     struct ctdb_node_and_flags, new->num);
-       memcpy(new->node, &old->nodes[0],
-              sizeof(struct ctdb_node_and_flags) * new->num);
-
-       return new;
-}
-
-
-static bool set_ipflags(struct ctdb_context *ctdb,
-                       struct ipalloc_state *ipalloc_state,
-                       struct ctdb_node_map_old *nodemap)
-{
-       struct ctdb_node_map *new;
-
-       new = ctdb_node_map_old_to_new(ipalloc_state, nodemap);
-       if (new == NULL) {
-               return false;
-       }
-
-       ipalloc_set_node_flags(ipalloc_state, new);
-
-       talloc_free(new);
-
-       return true;
-}
-
-static enum ipalloc_algorithm
-determine_algorithm(const struct ctdb_tunable_list *tunables)
-{
-       switch (tunables->ip_alloc_algorithm) {
-       case 0:
-               return IPALLOC_DETERMINISTIC;
-       case 1:
-               return IPALLOC_NONDETERMINISTIC;
-       case 2:
-               return IPALLOC_LCP2;
-       default:
-               return IPALLOC_LCP2;
-       };
-}
-
-struct takeover_callback_data {
-       uint32_t num_nodes;
-       unsigned int *fail_count;
-};
-
-static struct takeover_callback_data *
-takeover_callback_data_init(TALLOC_CTX *mem_ctx,
-                           uint32_t num_nodes)
-{
-       static struct takeover_callback_data *takeover_data;
-
-       takeover_data = talloc_zero(mem_ctx, struct takeover_callback_data);
-       if (takeover_data == NULL) {
-               DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
-               return NULL;
-       }
-
-       takeover_data->fail_count = talloc_zero_array(takeover_data,
-                                                     unsigned int, num_nodes);
-       if (takeover_data->fail_count == NULL) {
-               DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
-               talloc_free(takeover_data);
-               return NULL;
-       }
-
-       takeover_data->num_nodes = num_nodes;
-
-       return takeover_data;
-}
-
-static void takeover_run_fail_callback(struct ctdb_context *ctdb,
-                                      uint32_t node_pnn, int32_t res,
-                                      TDB_DATA outdata, void *callback_data)
-{
-       struct takeover_callback_data *cd =
-               talloc_get_type_abort(callback_data,
-                                     struct takeover_callback_data);
-
-       if (node_pnn >= cd->num_nodes) {
-               DEBUG(DEBUG_ERR, (__location__ " invalid PNN %u\n", node_pnn));
-               return;
-       }
-
-       if (cd->fail_count[node_pnn] == 0) {
-               DEBUG(DEBUG_ERR,
-                     ("Node %u failed the takeover run\n", node_pnn));
-       }
-
-       cd->fail_count[node_pnn]++;
-}
-
-static void takeover_run_process_failures(struct ctdb_context *ctdb,
-                                         struct takeover_callback_data *tcd)
-{
-       unsigned int max_fails = 0;
-       uint32_t max_pnn = -1;
-       uint32_t i;
-
-       for (i = 0; i < tcd->num_nodes; i++) {
-               if (tcd->fail_count[i] > max_fails) {
-                       max_pnn = i;
-                       max_fails = tcd->fail_count[i];
-               }
-       }
-
-       if (max_fails > 0) {
-               int ret;
-               TDB_DATA data;
-
-               DEBUG(DEBUG_ERR,
-                     ("Sending banning credits to %u with fail count %u\n",
-                      max_pnn, max_fails));
-
-               data.dptr = (uint8_t *)&max_pnn;
-               data.dsize = sizeof(uint32_t);
-               ret = ctdb_client_send_message(ctdb,
-                                              CTDB_BROADCAST_CONNECTED,
-                                              CTDB_SRVID_BANNING,
-                                              data);
-               if (ret != 0) {
-                       DEBUG(DEBUG_ERR,
-                             ("Failed to set banning credits for node %u\n",
-                              max_pnn));
-               }
-       }
-}
-
-/*
- * Recalculate the allocation of public IPs to nodes and have the
- * nodes host their allocated addresses.
- *
- * - Initialise IP allocation state.  Pass:
-     + algorithm to be used;
-     + whether IP rebalancing ("failback") should be done (this uses a
-       cluster-wide configuration variable and only the value form the
-       master node is used); and
- *   + list of nodes to force rebalance (internal structure, currently
- *     no way to fetch, only used by LCP2 for nodes that have had new
- *     IP addresses added).
- * - Set IP flags for IP allocation based on node map and tunables
- *   NoIPTakeover/NoIPHostOnAllDisabled from all connected nodes
- *   (tunable fetching done separately so values can be faked in unit
- *   testing)
- * - Retrieve known and available IP addresses (done separately so
- *   values can be faked in unit testing)
- * - Use ipalloc_set_public_ips() to set known and available IP
-     addresses for allocation
- * - If cluster can't host IP addresses then early exit
- * - Run IP allocation algorithm
- * - Send RELEASE_IP to all nodes for IPs they should not host
- * - Send TAKE_IP to all nodes for IPs they should host
- * - Send IPREALLOCATED to all nodes (with backward compatibility hack)
- */
-int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map_old *nodemap,
-                     uint32_t *force_rebalance_nodes)
-{
-       int i, ret;
-       struct ctdb_public_ip ip;
-       uint32_t *nodes;
-       struct public_ip_list *all_ips, *tmp_ip;
-       TDB_DATA data;
-       struct timeval timeout;
-       struct client_async_data *async_data;
-       struct ctdb_client_control_state *state;
-       TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
-       struct ipalloc_state *ipalloc_state;
-       struct ctdb_public_ip_list *known_ips, *available_ips;
-       struct takeover_callback_data *takeover_data;
-
-       /* Initialise fail callback data to be used with
-        * takeover_run_fail_callback().  A failure in any of the
-        * following steps will cause an early return, so this can be
-        * reused for each of those steps without re-initialising. */
-       takeover_data = takeover_callback_data_init(tmp_ctx,
-                                                   nodemap->num);
-       if (takeover_data == NULL) {
-               talloc_free(tmp_ctx);
-               return -1;
-       }
-
-       /* Default timeout for early jump to IPREALLOCATED.  See below
-        * for explanation of 3 times... */
-       timeout = timeval_current_ofs(3 * ctdb->tunable.takeover_timeout, 0);
-
-       /*
-        * ip failover is completely disabled, just send out the 
-        * ipreallocated event.
-        */
-       if (ctdb->tunable.disable_ip_failover != 0) {
-               goto ipreallocated;
-       }
-
-       ipalloc_state = ipalloc_state_init(
-               tmp_ctx, ctdb->num_nodes,
-               determine_algorithm(&ctdb->tunable),
-               (ctdb->tunable.no_ip_takeover != 0),
-               (ctdb->tunable.no_ip_failback != 0),
-               (ctdb->tunable.no_ip_host_on_all_disabled != 0),
-               force_rebalance_nodes);
-       if (ipalloc_state == NULL) {
-               talloc_free(tmp_ctx);
-               return -1;
-       }
-
-       if (!set_ipflags(ctdb, ipalloc_state, nodemap)) {
-               DEBUG(DEBUG_ERR,
-                     ("Failed to set IP flags - aborting takeover run\n"));
-               talloc_free(tmp_ctx);
-               return -1;
-       }
-
-       /* Fetch known/available public IPs from each active node */
-       /* Fetch lists of known public IPs from all nodes */
-       known_ips = ctdb_fetch_remote_public_ips(ctdb, ipalloc_state,
-                                                nodemap, 0);
-       if (known_ips == NULL) {
-               DEBUG(DEBUG_ERR, ("Failed to read known public IPs\n"));
-               talloc_free(tmp_ctx);
-               return -1;
-       }
-       available_ips = ctdb_fetch_remote_public_ips(
-               ctdb, ipalloc_state, nodemap,
-               CTDB_PUBLIC_IP_FLAGS_ONLY_AVAILABLE);
-       if (available_ips == NULL) {
-               DEBUG(DEBUG_ERR, ("Failed to read available public IPs\n"));
-               talloc_free(tmp_ctx);
-               return -1;
-       }
-
-       ipalloc_set_public_ips(ipalloc_state, known_ips, available_ips);
-
-       if (! ipalloc_can_host_ips(ipalloc_state)) {
-               DEBUG(DEBUG_WARNING,("No nodes available to host public IPs yet\n"));
-               goto ipreallocated;
-       }
-
-       /* Do the IP reassignment calculations */
-       all_ips = ipalloc(ipalloc_state);
-       if (all_ips == NULL) {
-               talloc_free(tmp_ctx);
-               return -1;
-       }
-
-       /* Now tell all nodes to release any public IPs should not
-        * host.  This will be a NOOP on nodes that don't currently
-        * hold the given IP.
-        */
-       async_data = talloc_zero(tmp_ctx, struct client_async_data);
-       CTDB_NO_MEMORY_FATAL(ctdb, async_data);
-
-       async_data->fail_callback = takeover_run_fail_callback;
-       async_data->callback_data = takeover_data;
-
-       ZERO_STRUCT(ip); /* Avoid valgrind warnings for union */
-
-       /* Each of the following stages (RELEASE_IP, TAKEOVER_IP,
-        * IPREALLOCATED) notionally has a timeout of TakeoverTimeout
-        * seconds.  However, RELEASE_IP can take longer due to TCP
-        * connection killing, so sometimes needs more time.
-        * Therefore, use a cumulative timeout of TakeoverTimeout * 3
-        * seconds across all 3 stages.  No explicit expiry checks are
-        * needed before each stage because tevent is smart enough to
-        * fire the timeouts even if they are in the past.  Initialise
-        * this here so it explicitly covers the stages we're
-        * interested in but, in particular, not the time taken by the
-        * ipalloc().
-        */
-       timeout = timeval_current_ofs(3 * ctdb->tunable.takeover_timeout, 0);
-
-       /* Send a RELEASE_IP to all nodes that should not be hosting
-        * each IP.  For each IP, all but one of these will be
-        * redundant.  However, the redundant ones are used to tell
-        * nodes which node should be hosting the IP so that commands
-        * like "ctdb ip" can display a particular nodes idea of who
-        * is hosting what. */
-       for (i=0;i<nodemap->num;i++) {
-               /* don't talk to unconnected nodes, but do talk to banned nodes */
-               if (nodemap->nodes[i].flags & NODE_FLAGS_DISCONNECTED) {
-                       continue;
-               }
-
-               for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
-                       if (tmp_ip->pnn == nodemap->nodes[i].pnn) {
-                               /* This node should be serving this
-                                  vnn so don't tell it to release the ip
-                               */
-                               continue;
-                       }
-                       ip.pnn  = tmp_ip->pnn;
-                       ip.addr = tmp_ip->addr;
-
-                       data.dsize = sizeof(ip);
-                       data.dptr  = (uint8_t *)&ip;
-                       state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
-                                                 0, CTDB_CONTROL_RELEASE_IP, 0,
-                                                 data, async_data,
-                                                 &timeout, NULL);
-                       if (state == NULL) {
-                               DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_RELEASE_IP to node %u\n", nodemap->nodes[i].pnn));
-                               talloc_free(tmp_ctx);
-                               return -1;
-                       }
-
-                       ctdb_client_async_add(async_data, state);
-               }
-       }
-       if (ctdb_client_async_wait(ctdb, async_data) != 0) {
-               DEBUG(DEBUG_ERR,
-                     ("Async control CTDB_CONTROL_RELEASE_IP failed\n"));
-               goto fail;
-       }
-       talloc_free(async_data);
-
-
-       /* For each IP, send a TAKOVER_IP to the node that should be
-        * hosting it.  Many of these will often be redundant (since
-        * the allocation won't have changed) but they can be useful
-        * to recover from inconsistencies. */
-       async_data = talloc_zero(tmp_ctx, struct client_async_data);
-       CTDB_NO_MEMORY_FATAL(ctdb, async_data);
-
-       async_data->fail_callback = takeover_run_fail_callback;
-       async_data->callback_data = takeover_data;
-
-       for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
-               if (tmp_ip->pnn == -1) {
-                       /* this IP won't be taken over */
-                       continue;
-               }
-
-               ip.pnn  = tmp_ip->pnn;
-               ip.addr = tmp_ip->addr;
-
-               data.dsize = sizeof(ip);
-               data.dptr  = (uint8_t *)&ip;
-               state = ctdb_control_send(ctdb, tmp_ip->pnn,
-                                         0, CTDB_CONTROL_TAKEOVER_IP, 0,
-                                         data, async_data, &timeout, NULL);
-               if (state == NULL) {
-                       DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_TAKEOVER_IP to node %u\n", tmp_ip->pnn));
-                       talloc_free(tmp_ctx);
-                       return -1;
-               }
-
-               ctdb_client_async_add(async_data, state);
-       }
-       if (ctdb_client_async_wait(ctdb, async_data) != 0) {
-               DEBUG(DEBUG_ERR,
-                     ("Async control CTDB_CONTROL_TAKEOVER_IP failed\n"));
-               goto fail;
-       }
-
-ipreallocated:
-       /*
-        * Tell all nodes to run eventscripts to process the
-        * "ipreallocated" event.  This can do a lot of things,
-        * including restarting services to reconfigure them if public
-        * IPs have moved.  Once upon a time this event only used to
-        * update natgw.
-        */
-       nodes = list_of_connected_nodes(ctdb, nodemap, tmp_ctx, true);
-       ret = ctdb_client_async_control(ctdb, CTDB_CONTROL_IPREALLOCATED,
-                                       nodes, 0, timeout,
-                                       false, tdb_null,
-                                       NULL, takeover_run_fail_callback,
-                                       takeover_data);
-       if (ret != 0) {
-               DEBUG(DEBUG_ERR,
-                     ("Async CTDB_CONTROL_IPREALLOCATED control failed\n"));
-               goto fail;
-       }
-
-       talloc_free(tmp_ctx);
-       return ret;
-
-fail:
-       takeover_run_process_failures(ctdb, takeover_data);
-       talloc_free(tmp_ctx);
-       return -1;
-}
-
-
 /*
   destroy a ctdb_client_ip structure
  */