Make create_merged_ip_list() a static function since
[metze/ctdb/wip.git] / server / ctdb_takeover.c
index c14afb305624b1b55caec9d6b56d6dfcdaf0aef0..eb40357178092bc7a776ce9bf0c3cf7f95187c25 100644 (file)
@@ -106,7 +106,7 @@ static void ctdb_control_send_arp(struct event_context *ev, struct timed_event *
        }
 
        event_add_timed(arp->ctdb->ev, arp->vnn->takeover_ctx, 
-                       timeval_current_ofs(CTDB_ARP_INTERVAL, 0), 
+                       timeval_current_ofs(CTDB_ARP_INTERVAL, 100000), 
                        ctdb_control_send_arp, arp);
 }
 
@@ -128,6 +128,9 @@ static void takeover_ip_callback(struct ctdb_context *ctdb, int status,
        struct ctdb_tcp_array *tcparray;
 
        if (status != 0) {
+               if (status == -ETIME) {
+                       ctdb_ban_self(ctdb);
+               }
                DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
                        ctdb_addr_to_str(state->addr),
                        state->vnn->iface));
@@ -137,7 +140,7 @@ static void takeover_ip_callback(struct ctdb_context *ctdb, int status,
        }
 
        if (!state->vnn->takeover_ctx) {
-               state->vnn->takeover_ctx = talloc_new(ctdb);
+               state->vnn->takeover_ctx = talloc_new(state->vnn);
                if (!state->vnn->takeover_ctx) {
                        goto failed;
                }
@@ -208,7 +211,7 @@ int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb,
        /* update out vnn list */
        vnn = find_public_ip_vnn(ctdb, &pip->addr);
        if (vnn == NULL) {
-               DEBUG(DEBUG_ERR,("takeoverip called for an ip '%s' that is not a public address\n", 
+               DEBUG(DEBUG_INFO,("takeoverip called for an ip '%s' that is not a public address\n", 
                        ctdb_addr_to_str(&pip->addr)));
                return 0;
        }
@@ -219,7 +222,7 @@ int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb,
                return 0;
        }
 
-       state = talloc(ctdb, struct takeover_callback_state);
+       state = talloc(vnn, struct takeover_callback_state);
        CTDB_NO_MEMORY(ctdb, state);
 
        state->c = talloc_steal(ctdb, c);
@@ -235,9 +238,10 @@ int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb,
                vnn->iface));
 
        ret = ctdb_event_script_callback(ctdb, 
-                                        timeval_current_ofs(ctdb->tunable.script_timeout, 0),
                                         state, takeover_ip_callback, state,
-                                        "takeip %s %s %u",
+                                        false,
+                                        CTDB_EVENT_TAKE_IP,
+                                        "%s %s %u",
                                         vnn->iface, 
                                         talloc_strdup(state, ctdb_addr_to_str(&pip->addr)),
                                         vnn->public_netmask_bits);
@@ -322,10 +326,15 @@ static void release_ip_callback(struct ctdb_context *ctdb, int status,
                talloc_get_type(private_data, struct takeover_callback_state);
        TDB_DATA data;
 
+       if (status == -ETIME) {
+               ctdb_ban_self(ctdb);
+       }
+
        /* send a message to all clients of this node telling them
           that the cluster has been reconfigured and they should
           release any sockets on this IP */
        data.dptr = (uint8_t *)talloc_strdup(state, ctdb_addr_to_str(state->addr));
+       CTDB_NO_MEMORY_VOID(ctdb, data.dptr);
        data.dsize = strlen((char *)data.dptr)+1;
 
        DEBUG(DEBUG_INFO,(__location__ " sending RELEASE_IP for '%s'\n", data.dptr));
@@ -356,7 +365,7 @@ int32_t ctdb_control_release_ip(struct ctdb_context *ctdb,
        /* update our vnn list */
        vnn = find_public_ip_vnn(ctdb, &pip->addr);
        if (vnn == NULL) {
-               DEBUG(DEBUG_ERR,("takeoverip called for an ip '%s' that is not a public address\n",
+               DEBUG(DEBUG_INFO,("releaseip called for an ip '%s' that is not a public address\n",
                        ctdb_addr_to_str(&pip->addr)));
                return 0;
        }
@@ -367,17 +376,18 @@ int32_t ctdb_control_release_ip(struct ctdb_context *ctdb,
        vnn->takeover_ctx = NULL;
 
        if (!ctdb_sys_have_ip(&pip->addr)) {
-               DEBUG(DEBUG_INFO,("Redundant release of IP %s/%u on interface %s (ip not held)\n", 
+               DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u on interface %s (ip not held)\n", 
                        ctdb_addr_to_str(&pip->addr),
                        vnn->public_netmask_bits, 
                        vnn->iface));
                return 0;
        }
 
-       DEBUG(DEBUG_NOTICE,("Release of IP %s/%u on interface %s\n", 
+       DEBUG(DEBUG_NOTICE,("Release of IP %s/%u on interface %s  node:%u\n", 
                ctdb_addr_to_str(&pip->addr),
                vnn->public_netmask_bits, 
-               vnn->iface));
+               vnn->iface,
+               pip->pnn));
 
        state = talloc(ctdb, struct takeover_callback_state);
        CTDB_NO_MEMORY(ctdb, state);
@@ -389,9 +399,10 @@ int32_t ctdb_control_release_ip(struct ctdb_context *ctdb,
        state->vnn   = vnn;
 
        ret = ctdb_event_script_callback(ctdb, 
-                                        timeval_current_ofs(ctdb->tunable.script_timeout, 0),
                                         state, release_ip_callback, state,
-                                        "releaseip %s %s %u",
+                                        false,
+                                        CTDB_EVENT_RELEASE_IP,
+                                        "%s %s %u",
                                         vnn->iface, 
                                         talloc_strdup(state, ctdb_addr_to_str(&pip->addr)),
                                         vnn->public_netmask_bits);
@@ -444,6 +455,7 @@ static int ctdb_add_public_address(struct ctdb_context *ctdb, ctdb_sock_addr *ad
        vnn = talloc_zero(ctdb, struct ctdb_vnn);
        CTDB_NO_MEMORY_FATAL(ctdb, vnn);
        vnn->iface = talloc_strdup(vnn, iface);
+       CTDB_NO_MEMORY(ctdb, vnn->iface);
        vnn->public_address      = *addr;
        vnn->public_netmask_bits = mask;
        vnn->pnn                 = -1;
@@ -485,6 +497,7 @@ int ctdb_set_public_addresses(struct ctdb_context *ctdb, const char *alist)
        for (i=0;i<nlines;i++) {
                unsigned mask;
                ctdb_sock_addr addr;
+               const char *addrstr;
                const char *iface;
                char *tok, *line;
 
@@ -499,11 +512,7 @@ int ctdb_set_public_addresses(struct ctdb_context *ctdb, const char *alist)
                        continue;
                }
                tok = strtok(line, " \t");
-               if (!tok || !parse_ip_mask(tok, &addr, &mask)) {
-                       DEBUG(DEBUG_CRIT,("Badly formed line %u in public address list\n", i+1));
-                       talloc_free(lines);
-                       return -1;
-               }
+               addrstr = tok;
                tok = strtok(NULL, " \t");
                if (tok == NULL) {
                        if (NULL == ctdb->default_public_interface) {
@@ -517,6 +526,11 @@ int ctdb_set_public_addresses(struct ctdb_context *ctdb, const char *alist)
                        iface = tok;
                }
 
+               if (!addrstr || !parse_ip_mask(addrstr, iface, &addr, &mask)) {
+                       DEBUG(DEBUG_CRIT,("Badly formed line %u in public address list\n", i+1));
+                       talloc_free(lines);
+                       return -1;
+               }
                if (ctdb_add_public_address(ctdb, &addr, mask, iface)) {
                        DEBUG(DEBUG_CRIT,("Failed to add line %u to the public address list\n", i+1));
                        talloc_free(lines);
@@ -632,54 +646,89 @@ static int find_takeover_node(struct ctdb_context *ctdb,
        return 0;
 }
 
-struct ctdb_public_ip_list *
-add_ip_to_merged_list(struct ctdb_context *ctdb,
-                       TALLOC_CTX *tmp_ctx, 
-                       struct ctdb_public_ip_list *ip_list, 
-                       struct ctdb_public_ip *ip)
+#define IP_KEYLEN      4
+static uint32_t *ip_key(ctdb_sock_addr *ip)
 {
-       struct ctdb_public_ip_list *tmp_ip; 
+       static uint32_t key[IP_KEYLEN];
 
-       /* do we already have this ip in our merged list ?*/
-       for (tmp_ip=ip_list;tmp_ip;tmp_ip=tmp_ip->next) {
+       bzero(key, sizeof(key));
 
-               /* we already have this public ip in the list */
-               if (ctdb_same_ip(&tmp_ip->addr, &ip->addr)) {
-                       return ip_list;
-               }
+       switch (ip->sa.sa_family) {
+       case AF_INET:
+               key[3]  = htonl(ip->ip.sin_addr.s_addr);
+               break;
+       case AF_INET6:
+               key[0]  = htonl(ip->ip6.sin6_addr.s6_addr32[0]);
+               key[1]  = htonl(ip->ip6.sin6_addr.s6_addr32[1]);
+               key[2]  = htonl(ip->ip6.sin6_addr.s6_addr32[2]);
+               key[3]  = htonl(ip->ip6.sin6_addr.s6_addr32[3]);
+               break;
+       default:
+               DEBUG(DEBUG_ERR, (__location__ " ERROR, unknown family passed :%u\n", ip->sa.sa_family));
+               return key;
        }
 
-       /* this is a new public ip, we must add it to the list */
-       tmp_ip = talloc_zero(tmp_ctx, struct ctdb_public_ip_list);
-       CTDB_NO_MEMORY_NULL(ctdb, tmp_ip);
-       tmp_ip->pnn  = ip->pnn;
-       tmp_ip->addr = ip->addr;
-       tmp_ip->next = ip_list;
+       return key;
+}
 
-       return tmp_ip;
+static void *add_ip_callback(void *parm, void *data)
+{
+       return parm;
 }
 
-struct ctdb_public_ip_list *
-create_merged_ip_list(struct ctdb_context *ctdb, TALLOC_CTX *tmp_ctx)
+void getips_count_callback(void *param, void *data)
+{
+       struct ctdb_public_ip_list **ip_list = (struct ctdb_public_ip_list **)param;
+       struct ctdb_public_ip_list *new_ip = (struct ctdb_public_ip_list *)data;
+
+       new_ip->next = *ip_list;
+       *ip_list     = new_ip;
+}
+
+static struct ctdb_public_ip_list *
+create_merged_ip_list(struct ctdb_context *ctdb)
 {
        int i, j;
-       struct ctdb_public_ip_list *ip_list = NULL;
+       struct ctdb_public_ip_list *ip_list;
        struct ctdb_all_public_ips *public_ips;
 
+       if (ctdb->ip_tree != NULL) {
+               talloc_free(ctdb->ip_tree);
+               ctdb->ip_tree = NULL;
+       }
+       ctdb->ip_tree = trbt_create(ctdb, 0);
+
        for (i=0;i<ctdb->num_nodes;i++) {
                public_ips = ctdb->nodes[i]->public_ips;
 
+               if (ctdb->nodes[i]->flags & NODE_FLAGS_DELETED) {
+                       continue;
+               }
+
                /* there were no public ips for this node */
                if (public_ips == NULL) {
                        continue;
                }               
 
                for (j=0;j<public_ips->num;j++) {
-                       ip_list = add_ip_to_merged_list(ctdb, tmp_ctx,
-                                       ip_list, &public_ips->ips[j]);
+                       struct ctdb_public_ip_list *tmp_ip; 
+
+                       tmp_ip = talloc_zero(ctdb->ip_tree, struct ctdb_public_ip_list);
+                       CTDB_NO_MEMORY_NULL(ctdb, tmp_ip);
+                       tmp_ip->pnn  = public_ips->ips[j].pnn;
+                       tmp_ip->addr = public_ips->ips[j].addr;
+                       tmp_ip->next = NULL;
+
+                       trbt_insertarray32_callback(ctdb->ip_tree,
+                               IP_KEYLEN, ip_key(&public_ips->ips[j].addr),
+                               add_ip_callback,
+                               tmp_ip);
                }
        }
 
+       ip_list = NULL;
+       trbt_traversearray32(ctdb->ip_tree, IP_KEYLEN, getips_count_callback, &ip_list);
+
        return ip_list;
 }
 
@@ -728,8 +777,10 @@ int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap)
           a full list of all public addresses that exist in the cluster.
           Walk over all node structures and create a merged list of
           all public addresses that exist in the cluster.
+
+          keep the tree of ips around as ctdb->ip_tree
        */
-       all_ips = create_merged_ip_list(ctdb, tmp_ctx);
+       all_ips = create_merged_ip_list(ctdb);
 
        /* If we want deterministic ip allocations, i.e. that the ip addresses
           will always be allocated the same way for a specific set of
@@ -1022,15 +1073,16 @@ static int ctdb_client_ip_destructor(struct ctdb_client_ip *ip)
 /*
   called by a client to inform us of a TCP connection that it is managing
   that should tickled with an ACK when IP takeover is done
+  we handle both the old ipv4 style of packets as well as the new ipv4/6
+  pdus.
  */
-//qqq we need a new version of this control that takes ctdb_sock_addr
-//and have samba move to that instead.
-// This is IPV4 ONLY
 int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id,
                                TDB_DATA indata)
 {
        struct ctdb_client *client = ctdb_reqid_find(ctdb, client_id, struct ctdb_client);
-       struct ctdb_control_tcp *p = (struct ctdb_control_tcp *)indata.dptr;
+       struct ctdb_control_tcp *old_addr = NULL;
+       struct ctdb_control_tcp_addr new_addr;
+       struct ctdb_control_tcp_addr *tcp_sock = NULL;
        struct ctdb_tcp_list *tcp;
        struct ctdb_control_tcp_vnn t;
        int ret;
@@ -1039,20 +1091,57 @@ int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id,
        struct ctdb_vnn *vnn;
        ctdb_sock_addr addr;
 
+       switch (indata.dsize) {
+       case sizeof(struct ctdb_control_tcp):
+               old_addr = (struct ctdb_control_tcp *)indata.dptr;
+               ZERO_STRUCT(new_addr);
+               tcp_sock = &new_addr;
+               tcp_sock->src.ip  = old_addr->src;
+               tcp_sock->dest.ip = old_addr->dest;
+               break;
+       case sizeof(struct ctdb_control_tcp_addr):
+               tcp_sock = (struct ctdb_control_tcp_addr *)indata.dptr;
+               break;
+       default:
+               DEBUG(DEBUG_ERR,(__location__ " Invalid data structure passed "
+                                "to ctdb_control_tcp_client. size was %d but "
+                                "only allowed sizes are %lu and %lu\n",
+                                (int)indata.dsize,
+                                (long unsigned)sizeof(struct ctdb_control_tcp),
+                                (long unsigned)sizeof(struct ctdb_control_tcp_addr)));
+               return -1;
+       }
+
+       addr = tcp_sock->src;
+       ctdb_canonicalize_ip(&addr,  &tcp_sock->src);
+       addr = tcp_sock->dest;
+       ctdb_canonicalize_ip(&addr, &tcp_sock->dest);
+
        ZERO_STRUCT(addr);
-       addr.ip = p->dest;
+       memcpy(&addr, &tcp_sock->dest, sizeof(addr));
        vnn = find_public_ip_vnn(ctdb, &addr);
        if (vnn == NULL) {
-               if (ntohl(p->dest.sin_addr.s_addr) != INADDR_LOOPBACK) {
-                       DEBUG(DEBUG_INFO,("Could not add client IP %s. This is not a public address.\n", 
-                               ctdb_addr_to_str((ctdb_sock_addr *)&p->dest)));
+               switch (addr.sa.sa_family) {
+               case AF_INET:
+                       if (ntohl(addr.ip.sin_addr.s_addr) != INADDR_LOOPBACK) {
+                               DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public address.\n", 
+                                       ctdb_addr_to_str(&addr)));
+                       }
+                       break;
+               case AF_INET6:
+                       DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public ipv6 address.\n", 
+                               ctdb_addr_to_str(&addr)));
+                       break;
+               default:
+                       DEBUG(DEBUG_ERR,(__location__ " Unknown family type %d\n", addr.sa.sa_family));
                }
+
                return 0;
        }
 
        if (vnn->pnn != ctdb->pnn) {
                DEBUG(DEBUG_ERR,("Attempt to register tcp client for IP %s we don't hold - failing (client_id %u pid %u)\n",
-                       ctdb_addr_to_str((ctdb_sock_addr *)&p->dest),
+                       ctdb_addr_to_str(&addr),
                        client_id, client->pid));
                /* failing this call will tell smbd to die */
                return -1;
@@ -1062,7 +1151,7 @@ int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id,
        CTDB_NO_MEMORY(ctdb, ip);
 
        ip->ctdb      = ctdb;
-       ip->addr.ip   = p->dest;
+       ip->addr      = addr;
        ip->client_id = client_id;
        talloc_set_destructor(ip, ctdb_client_ip_destructor);
        DLIST_ADD(ctdb->client_ip_list, ip);
@@ -1070,21 +1159,34 @@ int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id,
        tcp = talloc(client, struct ctdb_tcp_list);
        CTDB_NO_MEMORY(ctdb, tcp);
 
-       tcp->connection.src_addr.ip = p->src;
-       tcp->connection.dst_addr.ip = p->dest;
+       tcp->connection.src_addr = tcp_sock->src;
+       tcp->connection.dst_addr = tcp_sock->dest;
 
        DLIST_ADD(client->tcp_list, tcp);
 
-       t.src.ip  = p->src;
-       t.dest.ip = p->dest;
+       t.src  = tcp_sock->src;
+       t.dest = tcp_sock->dest;
 
        data.dptr = (uint8_t *)&t;
        data.dsize = sizeof(t);
 
-       DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
-               (unsigned)ntohs(p->dest.sin_port), 
-               ctdb_addr_to_str((ctdb_sock_addr *)&p->src),
-               (unsigned)ntohs(p->src.sin_port), client_id, client->pid));
+       switch (addr.sa.sa_family) {
+       case AF_INET:
+               DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
+                       (unsigned)ntohs(tcp_sock->dest.ip.sin_port), 
+                       ctdb_addr_to_str(&tcp_sock->src),
+                       (unsigned)ntohs(tcp_sock->src.ip.sin_port), client_id, client->pid));
+               break;
+       case AF_INET6:
+               DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
+                       (unsigned)ntohs(tcp_sock->dest.ip6.sin6_port), 
+                       ctdb_addr_to_str(&tcp_sock->src),
+                       (unsigned)ntohs(tcp_sock->src.ip6.sin6_port), client_id, client->pid));
+               break;
+       default:
+               DEBUG(DEBUG_ERR,(__location__ " Unknown family %d\n", addr.sa.sa_family));
+       }
+
 
        /* tell all nodes about this tcp connection */
        ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0, 
@@ -1294,7 +1396,7 @@ void ctdb_release_all_ips(struct ctdb_context *ctdb)
                if (vnn->pnn == ctdb->pnn) {
                        vnn->pnn = -1;
                }
-               ctdb_event_script(ctdb, "releaseip %s %s %u",
+               ctdb_event_script_args(ctdb, CTDB_EVENT_RELEASE_IP, "%s %s %u",
                                  vnn->iface, 
                                  talloc_strdup(ctdb, ctdb_addr_to_str(&vnn->public_address)),
                                  vnn->public_netmask_bits);
@@ -1967,7 +2069,7 @@ int32_t ctdb_control_send_gratious_arp(struct ctdb_context *ctdb, TDB_DATA indat
 int32_t ctdb_control_add_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
 {
        struct ctdb_control_ip_iface *pub = (struct ctdb_control_ip_iface *)indata.dptr;
-
+       int ret;
 
        /* verify the size of indata */
        if (indata.dsize < offsetof(struct ctdb_control_ip_iface, iface)) {
@@ -1985,7 +2087,14 @@ int32_t ctdb_control_add_public_address(struct ctdb_context *ctdb, TDB_DATA inda
                return -1;
        }
 
-       return ctdb_add_public_address(ctdb, &pub->addr, pub->mask, &pub->iface[0]);
+       ret = ctdb_add_public_address(ctdb, &pub->addr, pub->mask, &pub->iface[0]);
+
+       if (ret != 0) {
+               DEBUG(DEBUG_ERR,(__location__ " Failed to add public address\n"));
+               return -1;
+       }
+
+       return 0;
 }
 
 /*
@@ -2027,9 +2136,10 @@ int32_t ctdb_control_del_public_address(struct ctdb_context *ctdb, TDB_DATA inda
                        DLIST_REMOVE(ctdb->vnn, vnn);
 
                        ret = ctdb_event_script_callback(ctdb, 
-                                        timeval_current_ofs(ctdb->tunable.script_timeout, 0),
                                         mem_ctx, delete_ip_callback, mem_ctx,
-                                        "releaseip %s %s %u",
+                                        false,
+                                        CTDB_EVENT_RELEASE_IP,
+                                        "%s %s %u",
                                         vnn->iface, 
                                         talloc_strdup(mem_ctx, ctdb_addr_to_str(&vnn->public_address)),
                                         vnn->public_netmask_bits);
@@ -2044,3 +2154,41 @@ int32_t ctdb_control_del_public_address(struct ctdb_context *ctdb, TDB_DATA inda
        return -1;
 }
 
+/* This function is called from the recovery daemon to verify that a remote
+   node has the expected ip allocation.
+   This is verified against ctdb->ip_tree
+*/
+int verify_remote_ip_allocation(struct ctdb_context *ctdb, struct ctdb_all_public_ips *ips)
+{
+       struct ctdb_public_ip_list *tmp_ip; 
+       int i;
+
+       if (ctdb->ip_tree == NULL) {
+               /* dont know the expected allocation yet, assume remote node
+                  is correct. */
+               return 0;
+       }
+
+       if (ips == NULL) {
+               return 0;
+       }
+
+       for (i=0; i<ips->num; i++) {
+               tmp_ip = trbt_lookuparray32(ctdb->ip_tree, IP_KEYLEN, ip_key(&ips->ips[i].addr));
+               if (tmp_ip == NULL) {
+                       DEBUG(DEBUG_ERR,(__location__ " Could not find host for address %s, reassign ips\n", ctdb_addr_to_str(&ips->ips[i].addr)));
+                       return -1;
+               }
+
+               if (tmp_ip->pnn == -1 || ips->ips[i].pnn == -1) {
+                       continue;
+               }
+
+               if (tmp_ip->pnn != ips->ips[i].pnn) {
+                       DEBUG(DEBUG_ERR,("Inconsistent ip allocation. Trigger reallocation. Thinks %s is held by node %u while it is held by node %u\n", ctdb_addr_to_str(&ips->ips[i].addr), ips->ips[i].pnn, tmp_ip->pnn));
+                       return -1;
+               }
+       }
+
+       return 0;
+}