Make create_merged_ip_list() a static function since
[metze/ctdb/wip.git] / server / ctdb_takeover.c
index 14dcec06b9bd4fbcac6fbd412aa79e2d30ab7549..eb40357178092bc7a776ce9bf0c3cf7f95187c25 100644 (file)
@@ -128,6 +128,9 @@ static void takeover_ip_callback(struct ctdb_context *ctdb, int status,
        struct ctdb_tcp_array *tcparray;
 
        if (status != 0) {
+               if (status == -ETIME) {
+                       ctdb_ban_self(ctdb);
+               }
                DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
                        ctdb_addr_to_str(state->addr),
                        state->vnn->iface));
@@ -235,9 +238,10 @@ int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb,
                vnn->iface));
 
        ret = ctdb_event_script_callback(ctdb, 
-                                        timeval_set(ctdb->tunable.script_timeout, 0),
                                         state, takeover_ip_callback, state,
-                                        "takeip %s %s %u",
+                                        false,
+                                        CTDB_EVENT_TAKE_IP,
+                                        "%s %s %u",
                                         vnn->iface, 
                                         talloc_strdup(state, ctdb_addr_to_str(&pip->addr)),
                                         vnn->public_netmask_bits);
@@ -322,6 +326,10 @@ static void release_ip_callback(struct ctdb_context *ctdb, int status,
                talloc_get_type(private_data, struct takeover_callback_state);
        TDB_DATA data;
 
+       if (status == -ETIME) {
+               ctdb_ban_self(ctdb);
+       }
+
        /* send a message to all clients of this node telling them
           that the cluster has been reconfigured and they should
           release any sockets on this IP */
@@ -368,7 +376,7 @@ int32_t ctdb_control_release_ip(struct ctdb_context *ctdb,
        vnn->takeover_ctx = NULL;
 
        if (!ctdb_sys_have_ip(&pip->addr)) {
-               DEBUG(DEBUG_NOTICE,("Redundant release of IP %s/%u on interface %s (ip not held)\n", 
+               DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u on interface %s (ip not held)\n", 
                        ctdb_addr_to_str(&pip->addr),
                        vnn->public_netmask_bits, 
                        vnn->iface));
@@ -391,9 +399,10 @@ int32_t ctdb_control_release_ip(struct ctdb_context *ctdb,
        state->vnn   = vnn;
 
        ret = ctdb_event_script_callback(ctdb, 
-                                        timeval_set(ctdb->tunable.script_timeout, 0),
                                         state, release_ip_callback, state,
-                                        "releaseip %s %s %u",
+                                        false,
+                                        CTDB_EVENT_RELEASE_IP,
+                                        "%s %s %u",
                                         vnn->iface, 
                                         talloc_strdup(state, ctdb_addr_to_str(&pip->addr)),
                                         vnn->public_netmask_bits);
@@ -676,15 +685,18 @@ void getips_count_callback(void *param, void *data)
        *ip_list     = new_ip;
 }
 
-struct ctdb_public_ip_list *
-create_merged_ip_list(struct ctdb_context *ctdb, TALLOC_CTX *tmp_ctx)
+static struct ctdb_public_ip_list *
+create_merged_ip_list(struct ctdb_context *ctdb)
 {
        int i, j;
        struct ctdb_public_ip_list *ip_list;
        struct ctdb_all_public_ips *public_ips;
-       trbt_tree_t *ip_tree;
 
-       ip_tree = trbt_create(tmp_ctx, 0);
+       if (ctdb->ip_tree != NULL) {
+               talloc_free(ctdb->ip_tree);
+               ctdb->ip_tree = NULL;
+       }
+       ctdb->ip_tree = trbt_create(ctdb, 0);
 
        for (i=0;i<ctdb->num_nodes;i++) {
                public_ips = ctdb->nodes[i]->public_ips;
@@ -701,13 +713,13 @@ create_merged_ip_list(struct ctdb_context *ctdb, TALLOC_CTX *tmp_ctx)
                for (j=0;j<public_ips->num;j++) {
                        struct ctdb_public_ip_list *tmp_ip; 
 
-                       tmp_ip = talloc_zero(tmp_ctx, struct ctdb_public_ip_list);
+                       tmp_ip = talloc_zero(ctdb->ip_tree, struct ctdb_public_ip_list);
                        CTDB_NO_MEMORY_NULL(ctdb, tmp_ip);
                        tmp_ip->pnn  = public_ips->ips[j].pnn;
                        tmp_ip->addr = public_ips->ips[j].addr;
                        tmp_ip->next = NULL;
 
-                       trbt_insertarray32_callback(ip_tree,
+                       trbt_insertarray32_callback(ctdb->ip_tree,
                                IP_KEYLEN, ip_key(&public_ips->ips[j].addr),
                                add_ip_callback,
                                tmp_ip);
@@ -715,7 +727,7 @@ create_merged_ip_list(struct ctdb_context *ctdb, TALLOC_CTX *tmp_ctx)
        }
 
        ip_list = NULL;
-       trbt_traversearray32(ip_tree, IP_KEYLEN, getips_count_callback, &ip_list);
+       trbt_traversearray32(ctdb->ip_tree, IP_KEYLEN, getips_count_callback, &ip_list);
 
        return ip_list;
 }
@@ -765,8 +777,10 @@ int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap)
           a full list of all public addresses that exist in the cluster.
           Walk over all node structures and create a merged list of
           all public addresses that exist in the cluster.
+
+          keep the tree of ips around as ctdb->ip_tree
        */
-       all_ips = create_merged_ip_list(ctdb, tmp_ctx);
+       all_ips = create_merged_ip_list(ctdb);
 
        /* If we want deterministic ip allocations, i.e. that the ip addresses
           will always be allocated the same way for a specific set of
@@ -1382,7 +1396,7 @@ void ctdb_release_all_ips(struct ctdb_context *ctdb)
                if (vnn->pnn == ctdb->pnn) {
                        vnn->pnn = -1;
                }
-               ctdb_event_script(ctdb, "releaseip %s %s %u",
+               ctdb_event_script_args(ctdb, CTDB_EVENT_RELEASE_IP, "%s %s %u",
                                  vnn->iface, 
                                  talloc_strdup(ctdb, ctdb_addr_to_str(&vnn->public_address)),
                                  vnn->public_netmask_bits);
@@ -2122,9 +2136,10 @@ int32_t ctdb_control_del_public_address(struct ctdb_context *ctdb, TDB_DATA inda
                        DLIST_REMOVE(ctdb->vnn, vnn);
 
                        ret = ctdb_event_script_callback(ctdb, 
-                                        timeval_set(ctdb->tunable.script_timeout, 0),
                                         mem_ctx, delete_ip_callback, mem_ctx,
-                                        "releaseip %s %s %u",
+                                        false,
+                                        CTDB_EVENT_RELEASE_IP,
+                                        "%s %s %u",
                                         vnn->iface, 
                                         talloc_strdup(mem_ctx, ctdb_addr_to_str(&vnn->public_address)),
                                         vnn->public_netmask_bits);
@@ -2139,3 +2154,41 @@ int32_t ctdb_control_del_public_address(struct ctdb_context *ctdb, TDB_DATA inda
        return -1;
 }
 
+/* This function is called from the recovery daemon to verify that a remote
+   node has the expected ip allocation.
+   This is verified against ctdb->ip_tree
+*/
+int verify_remote_ip_allocation(struct ctdb_context *ctdb, struct ctdb_all_public_ips *ips)
+{
+       struct ctdb_public_ip_list *tmp_ip; 
+       int i;
+
+       if (ctdb->ip_tree == NULL) {
+               /* dont know the expected allocation yet, assume remote node
+                  is correct. */
+               return 0;
+       }
+
+       if (ips == NULL) {
+               return 0;
+       }
+
+       for (i=0; i<ips->num; i++) {
+               tmp_ip = trbt_lookuparray32(ctdb->ip_tree, IP_KEYLEN, ip_key(&ips->ips[i].addr));
+               if (tmp_ip == NULL) {
+                       DEBUG(DEBUG_ERR,(__location__ " Could not find host for address %s, reassign ips\n", ctdb_addr_to_str(&ips->ips[i].addr)));
+                       return -1;
+               }
+
+               if (tmp_ip->pnn == -1 || ips->ips[i].pnn == -1) {
+                       continue;
+               }
+
+               if (tmp_ip->pnn != ips->ips[i].pnn) {
+                       DEBUG(DEBUG_ERR,("Inconsistent ip allocation. Trigger reallocation. Thinks %s is held by node %u while it is held by node %u\n", ctdb_addr_to_str(&ips->ips[i].addr), ips->ips[i].pnn, tmp_ip->pnn));
+                       return -1;
+               }
+       }
+
+       return 0;
+}