4 Copyright (C) Ronnie Sahlberg 2007
5 Copyright (C) Andrew Tridgell 2007
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, see <http://www.gnu.org/licenses/>.
21 #include "lib/events/events.h"
22 #include "lib/tdb/include/tdb.h"
23 #include "lib/util/dlinklist.h"
24 #include "system/network.h"
25 #include "system/filesys.h"
26 #include "system/wait.h"
27 #include "../include/ctdb_private.h"
28 #include "../common/rb_tree.h"
31 #define TAKEOVER_TIMEOUT() timeval_current_ofs(ctdb->tunable.takeover_timeout,0)
33 #define CTDB_ARP_INTERVAL 1
34 #define CTDB_ARP_REPEAT 3
36 struct ctdb_takeover_arp {
37 struct ctdb_context *ctdb;
40 struct ctdb_tcp_array *tcparray;
46 lists of tcp endpoints
48 struct ctdb_tcp_list {
49 struct ctdb_tcp_list *prev, *next;
50 struct ctdb_tcp_connection connection;
54 list of clients to kill on IP release
56 struct ctdb_client_ip {
57 struct ctdb_client_ip *prev, *next;
58 struct ctdb_context *ctdb;
67 static void ctdb_control_send_arp(struct event_context *ev, struct timed_event *te,
68 struct timeval t, void *private_data)
70 struct ctdb_takeover_arp *arp = talloc_get_type(private_data,
71 struct ctdb_takeover_arp);
73 struct ctdb_tcp_array *tcparray;
75 ret = ctdb_sys_send_arp(&arp->addr, arp->vnn->iface);
77 DEBUG(DEBUG_CRIT,(__location__ " sending of arp failed (%s)\n", strerror(errno)));
80 tcparray = arp->tcparray;
82 for (i=0;i<tcparray->num;i++) {
83 struct ctdb_tcp_connection *tcon;
85 tcon = &tcparray->connections[i];
86 DEBUG(DEBUG_INFO,("sending tcp tickle ack for %u->%s:%u\n",
87 (unsigned)ntohs(tcon->dst_addr.ip.sin_port),
88 ctdb_addr_to_str(&tcon->src_addr),
89 (unsigned)ntohs(tcon->src_addr.ip.sin_port)));
90 ret = ctdb_sys_send_tcp(
95 DEBUG(DEBUG_CRIT,(__location__ " Failed to send tcp tickle ack for %s\n",
96 ctdb_addr_to_str(&tcon->src_addr)));
103 if (arp->count == CTDB_ARP_REPEAT) {
108 event_add_timed(arp->ctdb->ev, arp->vnn->takeover_ctx,
109 timeval_current_ofs(CTDB_ARP_INTERVAL, 100000),
110 ctdb_control_send_arp, arp);
113 struct takeover_callback_state {
114 struct ctdb_req_control *c;
115 ctdb_sock_addr *addr;
116 struct ctdb_vnn *vnn;
120 called when takeip event finishes
122 static void takeover_ip_callback(struct ctdb_context *ctdb, int status,
125 struct takeover_callback_state *state =
126 talloc_get_type(private_data, struct takeover_callback_state);
127 struct ctdb_takeover_arp *arp;
128 struct ctdb_tcp_array *tcparray;
131 if (status == -ETIME) {
134 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
135 ctdb_addr_to_str(state->addr),
137 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
142 if (!state->vnn->takeover_ctx) {
143 state->vnn->takeover_ctx = talloc_new(state->vnn);
144 if (!state->vnn->takeover_ctx) {
149 arp = talloc_zero(state->vnn->takeover_ctx, struct ctdb_takeover_arp);
150 if (!arp) goto failed;
153 arp->addr = *state->addr;
154 arp->vnn = state->vnn;
156 tcparray = state->vnn->tcp_array;
158 /* add all of the known tcp connections for this IP to the
159 list of tcp connections to send tickle acks for */
160 arp->tcparray = talloc_steal(arp, tcparray);
162 state->vnn->tcp_array = NULL;
163 state->vnn->tcp_update_needed = true;
166 event_add_timed(arp->ctdb->ev, state->vnn->takeover_ctx,
167 timeval_zero(), ctdb_control_send_arp, arp);
169 /* the control succeeded */
170 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
175 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
181 Find the vnn of the node that has a public ip address
182 returns -1 if the address is not known as a public address
184 static struct ctdb_vnn *find_public_ip_vnn(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
186 struct ctdb_vnn *vnn;
188 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
189 if (ctdb_same_ip(&vnn->public_address, addr)) {
199 take over an ip address
201 int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb,
202 struct ctdb_req_control *c,
207 struct takeover_callback_state *state;
208 struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
209 struct ctdb_vnn *vnn;
211 /* update out vnn list */
212 vnn = find_public_ip_vnn(ctdb, &pip->addr);
214 DEBUG(DEBUG_INFO,("takeoverip called for an ip '%s' that is not a public address\n",
215 ctdb_addr_to_str(&pip->addr)));
220 /* if our kernel already has this IP, do nothing */
221 if (ctdb_sys_have_ip(&pip->addr)) {
225 state = talloc(vnn, struct takeover_callback_state);
226 CTDB_NO_MEMORY(ctdb, state);
228 state->c = talloc_steal(ctdb, c);
229 state->addr = talloc(ctdb, ctdb_sock_addr);
230 CTDB_NO_MEMORY(ctdb, state->addr);
232 *state->addr = pip->addr;
235 DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u on interface %s\n",
236 ctdb_addr_to_str(&pip->addr),
237 vnn->public_netmask_bits,
240 ret = ctdb_event_script_callback(ctdb,
241 state, takeover_ip_callback, state,
246 talloc_strdup(state, ctdb_addr_to_str(&pip->addr)),
247 vnn->public_netmask_bits);
250 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
251 ctdb_addr_to_str(&pip->addr),
257 /* tell ctdb_control.c that we will be replying asynchronously */
264 takeover an ip address old v4 style
266 int32_t ctdb_control_takeover_ipv4(struct ctdb_context *ctdb,
267 struct ctdb_req_control *c,
273 data.dsize = sizeof(struct ctdb_public_ip);
274 data.dptr = (uint8_t *)talloc_zero(c, struct ctdb_public_ip);
275 CTDB_NO_MEMORY(ctdb, data.dptr);
277 memcpy(data.dptr, indata.dptr, indata.dsize);
278 return ctdb_control_takeover_ip(ctdb, c, data, async_reply);
282 kill any clients that are registered with a IP that is being released
284 static void release_kill_clients(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
286 struct ctdb_client_ip *ip;
288 DEBUG(DEBUG_INFO,("release_kill_clients for ip %s\n",
289 ctdb_addr_to_str(addr)));
291 for (ip=ctdb->client_ip_list; ip; ip=ip->next) {
292 ctdb_sock_addr tmp_addr;
295 DEBUG(DEBUG_INFO,("checking for client %u with IP %s\n",
297 ctdb_addr_to_str(&ip->addr)));
299 if (ctdb_same_ip(&tmp_addr, addr)) {
300 struct ctdb_client *client = ctdb_reqid_find(ctdb,
303 DEBUG(DEBUG_INFO,("matched client %u with IP %s and pid %u\n",
305 ctdb_addr_to_str(&ip->addr),
308 if (client->pid != 0) {
309 DEBUG(DEBUG_INFO,(__location__ " Killing client pid %u for IP %s on client_id %u\n",
310 (unsigned)client->pid,
311 ctdb_addr_to_str(addr),
313 kill(client->pid, SIGKILL);
320 called when releaseip event finishes
322 static void release_ip_callback(struct ctdb_context *ctdb, int status,
325 struct takeover_callback_state *state =
326 talloc_get_type(private_data, struct takeover_callback_state);
329 if (status == -ETIME) {
333 /* send a message to all clients of this node telling them
334 that the cluster has been reconfigured and they should
335 release any sockets on this IP */
336 data.dptr = (uint8_t *)talloc_strdup(state, ctdb_addr_to_str(state->addr));
337 CTDB_NO_MEMORY_VOID(ctdb, data.dptr);
338 data.dsize = strlen((char *)data.dptr)+1;
340 DEBUG(DEBUG_INFO,(__location__ " sending RELEASE_IP for '%s'\n", data.dptr));
342 ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_RELEASE_IP, data);
344 /* kill clients that have registered with this IP */
345 release_kill_clients(ctdb, state->addr);
347 /* the control succeeded */
348 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
353 release an ip address
355 int32_t ctdb_control_release_ip(struct ctdb_context *ctdb,
356 struct ctdb_req_control *c,
361 struct takeover_callback_state *state;
362 struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
363 struct ctdb_vnn *vnn;
365 /* update our vnn list */
366 vnn = find_public_ip_vnn(ctdb, &pip->addr);
368 DEBUG(DEBUG_INFO,("releaseip called for an ip '%s' that is not a public address\n",
369 ctdb_addr_to_str(&pip->addr)));
374 /* stop any previous arps */
375 talloc_free(vnn->takeover_ctx);
376 vnn->takeover_ctx = NULL;
378 if (!ctdb_sys_have_ip(&pip->addr)) {
379 DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u on interface %s (ip not held)\n",
380 ctdb_addr_to_str(&pip->addr),
381 vnn->public_netmask_bits,
386 DEBUG(DEBUG_NOTICE,("Release of IP %s/%u on interface %s node:%u\n",
387 ctdb_addr_to_str(&pip->addr),
388 vnn->public_netmask_bits,
392 state = talloc(ctdb, struct takeover_callback_state);
393 CTDB_NO_MEMORY(ctdb, state);
395 state->c = talloc_steal(state, c);
396 state->addr = talloc(state, ctdb_sock_addr);
397 CTDB_NO_MEMORY(ctdb, state->addr);
398 *state->addr = pip->addr;
401 ret = ctdb_event_script_callback(ctdb,
402 state, release_ip_callback, state,
404 CTDB_EVENT_RELEASE_IP,
407 talloc_strdup(state, ctdb_addr_to_str(&pip->addr)),
408 vnn->public_netmask_bits);
410 DEBUG(DEBUG_ERR,(__location__ " Failed to release IP %s on interface %s\n",
411 ctdb_addr_to_str(&pip->addr),
417 /* tell the control that we will be reply asynchronously */
423 release an ip address old v4 style
425 int32_t ctdb_control_release_ipv4(struct ctdb_context *ctdb,
426 struct ctdb_req_control *c,
432 data.dsize = sizeof(struct ctdb_public_ip);
433 data.dptr = (uint8_t *)talloc_zero(c, struct ctdb_public_ip);
434 CTDB_NO_MEMORY(ctdb, data.dptr);
436 memcpy(data.dptr, indata.dptr, indata.dsize);
437 return ctdb_control_release_ip(ctdb, c, data, async_reply);
441 static int ctdb_add_public_address(struct ctdb_context *ctdb, ctdb_sock_addr *addr, unsigned mask, const char *iface)
443 struct ctdb_vnn *vnn;
445 /* Verify that we dont have an entry for this ip yet */
446 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
447 if (ctdb_same_sockaddr(addr, &vnn->public_address)) {
448 DEBUG(DEBUG_CRIT,("Same ip '%s' specified multiple times in the public address list \n",
449 ctdb_addr_to_str(addr)));
454 /* create a new vnn structure for this ip address */
455 vnn = talloc_zero(ctdb, struct ctdb_vnn);
456 CTDB_NO_MEMORY_FATAL(ctdb, vnn);
457 vnn->iface = talloc_strdup(vnn, iface);
458 CTDB_NO_MEMORY(ctdb, vnn->iface);
459 vnn->public_address = *addr;
460 vnn->public_netmask_bits = mask;
463 DLIST_ADD(ctdb->vnn, vnn);
470 setup the event script directory
472 int ctdb_set_event_script_dir(struct ctdb_context *ctdb, const char *script_dir)
474 ctdb->event_script_dir = talloc_strdup(ctdb, script_dir);
475 CTDB_NO_MEMORY(ctdb, ctdb->event_script_dir);
480 setup the public address lists from a file
482 int ctdb_set_public_addresses(struct ctdb_context *ctdb, const char *alist)
488 lines = file_lines_load(alist, &nlines, ctdb);
490 ctdb_set_error(ctdb, "Failed to load public address list '%s'\n", alist);
493 while (nlines > 0 && strcmp(lines[nlines-1], "") == 0) {
497 for (i=0;i<nlines;i++) {
505 while ((*line == ' ') || (*line == '\t')) {
511 if (strcmp(line, "") == 0) {
514 tok = strtok(line, " \t");
516 tok = strtok(NULL, " \t");
518 if (NULL == ctdb->default_public_interface) {
519 DEBUG(DEBUG_CRIT,("No default public interface and no interface specified at line %u of public address list\n",
524 iface = ctdb->default_public_interface;
529 if (!addrstr || !parse_ip_mask(addrstr, iface, &addr, &mask)) {
530 DEBUG(DEBUG_CRIT,("Badly formed line %u in public address list\n", i+1));
534 if (ctdb_add_public_address(ctdb, &addr, mask, iface)) {
535 DEBUG(DEBUG_CRIT,("Failed to add line %u to the public address list\n", i+1));
548 struct ctdb_public_ip_list {
549 struct ctdb_public_ip_list *next;
555 /* Given a physical node, return the number of
556 public addresses that is currently assigned to this node.
558 static int node_ip_coverage(struct ctdb_context *ctdb,
560 struct ctdb_public_ip_list *ips)
564 for (;ips;ips=ips->next) {
565 if (ips->pnn == pnn) {
573 /* Check if this is a public ip known to the node, i.e. can that
574 node takeover this ip ?
576 static int can_node_serve_ip(struct ctdb_context *ctdb, int32_t pnn,
577 struct ctdb_public_ip_list *ip)
579 struct ctdb_all_public_ips *public_ips;
582 public_ips = ctdb->nodes[pnn]->public_ips;
584 if (public_ips == NULL) {
588 for (i=0;i<public_ips->num;i++) {
589 if (ctdb_same_ip(&ip->addr, &public_ips->ips[i].addr)) {
590 /* yes, this node can serve this public ip */
599 /* search the node lists list for a node to takeover this ip.
600 pick the node that currently are serving the least number of ips
601 so that the ips get spread out evenly.
603 static int find_takeover_node(struct ctdb_context *ctdb,
604 struct ctdb_node_map *nodemap, uint32_t mask,
605 struct ctdb_public_ip_list *ip,
606 struct ctdb_public_ip_list *all_ips)
612 for (i=0;i<nodemap->num;i++) {
613 if (nodemap->nodes[i].flags & mask) {
614 /* This node is not healty and can not be used to serve
620 /* verify that this node can serve this ip */
621 if (can_node_serve_ip(ctdb, i, ip)) {
622 /* no it couldnt so skip to the next node */
626 num = node_ip_coverage(ctdb, i, all_ips);
627 /* was this the first node we checked ? */
639 DEBUG(DEBUG_WARNING,(__location__ " Could not find node to take over public address '%s'\n",
640 ctdb_addr_to_str(&ip->addr)));
650 static uint32_t *ip_key(ctdb_sock_addr *ip)
652 static uint32_t key[IP_KEYLEN];
654 bzero(key, sizeof(key));
656 switch (ip->sa.sa_family) {
658 key[3] = htonl(ip->ip.sin_addr.s_addr);
661 key[0] = htonl(ip->ip6.sin6_addr.s6_addr32[0]);
662 key[1] = htonl(ip->ip6.sin6_addr.s6_addr32[1]);
663 key[2] = htonl(ip->ip6.sin6_addr.s6_addr32[2]);
664 key[3] = htonl(ip->ip6.sin6_addr.s6_addr32[3]);
667 DEBUG(DEBUG_ERR, (__location__ " ERROR, unknown family passed :%u\n", ip->sa.sa_family));
674 static void *add_ip_callback(void *parm, void *data)
679 void getips_count_callback(void *param, void *data)
681 struct ctdb_public_ip_list **ip_list = (struct ctdb_public_ip_list **)param;
682 struct ctdb_public_ip_list *new_ip = (struct ctdb_public_ip_list *)data;
684 new_ip->next = *ip_list;
688 static struct ctdb_public_ip_list *
689 create_merged_ip_list(struct ctdb_context *ctdb)
692 struct ctdb_public_ip_list *ip_list;
693 struct ctdb_all_public_ips *public_ips;
695 if (ctdb->ip_tree != NULL) {
696 talloc_free(ctdb->ip_tree);
697 ctdb->ip_tree = NULL;
699 ctdb->ip_tree = trbt_create(ctdb, 0);
701 for (i=0;i<ctdb->num_nodes;i++) {
702 public_ips = ctdb->nodes[i]->public_ips;
704 if (ctdb->nodes[i]->flags & NODE_FLAGS_DELETED) {
708 /* there were no public ips for this node */
709 if (public_ips == NULL) {
713 for (j=0;j<public_ips->num;j++) {
714 struct ctdb_public_ip_list *tmp_ip;
716 tmp_ip = talloc_zero(ctdb->ip_tree, struct ctdb_public_ip_list);
717 CTDB_NO_MEMORY_NULL(ctdb, tmp_ip);
718 tmp_ip->pnn = public_ips->ips[j].pnn;
719 tmp_ip->addr = public_ips->ips[j].addr;
722 trbt_insertarray32_callback(ctdb->ip_tree,
723 IP_KEYLEN, ip_key(&public_ips->ips[j].addr),
730 trbt_traversearray32(ctdb->ip_tree, IP_KEYLEN, getips_count_callback, &ip_list);
736 make any IP alias changes for public addresses that are necessary
738 int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap)
740 int i, num_healthy, retries, num_ips;
741 struct ctdb_public_ip ip;
742 struct ctdb_public_ipv4 ipv4;
744 struct ctdb_public_ip_list *all_ips, *tmp_ip;
745 int maxnode, maxnum=0, minnode, minnum=0, num;
747 struct timeval timeout;
748 struct client_async_data *async_data;
749 struct ctdb_client_control_state *state;
750 TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
755 /* Count how many completely healthy nodes we have */
757 for (i=0;i<nodemap->num;i++) {
758 if (!(nodemap->nodes[i].flags & (NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED))) {
763 if (num_healthy > 0) {
764 /* We have healthy nodes, so only consider them for
765 serving public addresses
767 mask = NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED;
769 /* We didnt have any completely healthy nodes so
770 use "disabled" nodes as a fallback
772 mask = NODE_FLAGS_INACTIVE;
775 /* since nodes only know about those public addresses that
776 can be served by that particular node, no single node has
777 a full list of all public addresses that exist in the cluster.
778 Walk over all node structures and create a merged list of
779 all public addresses that exist in the cluster.
781 keep the tree of ips around as ctdb->ip_tree
783 all_ips = create_merged_ip_list(ctdb);
785 /* Count how many ips we have */
787 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
791 /* If we want deterministic ip allocations, i.e. that the ip addresses
792 will always be allocated the same way for a specific set of
793 available/unavailable nodes.
795 if (1 == ctdb->tunable.deterministic_public_ips) {
796 DEBUG(DEBUG_NOTICE,("Deterministic IPs enabled. Resetting all ip allocations\n"));
797 for (i=0,tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next,i++) {
800 tmp_pnn = i%nodemap->num;
801 if (can_node_serve_ip(ctdb, tmp_pnn, tmp_ip) == 0) {
802 tmp_ip->pnn = tmp_pnn;
808 /* mark all public addresses with a masked node as being served by
811 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
812 if (tmp_ip->pnn == -1) {
815 if (nodemap->nodes[tmp_ip->pnn].flags & mask) {
820 /* verify that the assigned nodes can serve that public ip
821 and set it to -1 if not
823 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
824 if (tmp_ip->pnn == -1) {
827 if (can_node_serve_ip(ctdb, tmp_ip->pnn, tmp_ip) != 0) {
828 /* this node can not serve this ip. */
834 /* now we must redistribute all public addresses with takeover node
835 -1 among the nodes available
839 /* loop over all ip's and find a physical node to cover for
842 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
843 if (tmp_ip->pnn == -1) {
844 if (find_takeover_node(ctdb, nodemap, mask, tmp_ip, all_ips)) {
845 DEBUG(DEBUG_WARNING,("Failed to find node to cover ip %s\n",
846 ctdb_addr_to_str(&tmp_ip->addr)));
851 /* If we dont want ips to fail back after a node becomes healthy
852 again, we wont even try to reallocat the ip addresses so that
853 they are evenly spread out.
854 This can NOT be used at the same time as DeterministicIPs !
856 if (1 == ctdb->tunable.no_ip_failback) {
857 if (1 == ctdb->tunable.deterministic_public_ips) {
858 DEBUG(DEBUG_ERR, ("ERROR: You can not use 'DeterministicIPs' and 'NoIPFailback' at the same time\n"));
864 /* now, try to make sure the ip adresses are evenly distributed
866 for each ip address, loop over all nodes that can serve this
867 ip and make sure that the difference between the node
868 serving the most and the node serving the least ip's are not greater
871 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
872 if (tmp_ip->pnn == -1) {
876 /* Get the highest and lowest number of ips's served by any
877 valid node which can serve this ip.
881 for (i=0;i<nodemap->num;i++) {
882 if (nodemap->nodes[i].flags & mask) {
886 /* only check nodes that can actually serve this ip */
887 if (can_node_serve_ip(ctdb, i, tmp_ip)) {
888 /* no it couldnt so skip to the next node */
892 num = node_ip_coverage(ctdb, i, all_ips);
913 DEBUG(DEBUG_WARNING,(__location__ " Could not find maxnode. May not be able to serve ip '%s'\n",
914 ctdb_addr_to_str(&tmp_ip->addr)));
919 /* If we want deterministic IPs then dont try to reallocate
920 them to spread out the load.
922 if (1 == ctdb->tunable.deterministic_public_ips) {
926 /* if the spread between the smallest and largest coverage by
927 a node is >=2 we steal one of the ips from the node with
928 most coverage to even things out a bit.
929 try to do this a limited number of times since we dont
930 want to spend too much time balancing the ip coverage.
932 if ( (maxnum > minnum+1)
933 && (retries < (num_ips + 5)) ){
934 struct ctdb_public_ip_list *tmp;
936 /* mark one of maxnode's vnn's as unassigned and try
939 for (tmp=all_ips;tmp;tmp=tmp->next) {
940 if (tmp->pnn == maxnode) {
950 /* finished distributing the public addresses, now just send the
951 info out to the nodes
955 /* at this point ->pnn is the node which will own each IP
956 or -1 if there is no node that can cover this ip
959 /* now tell all nodes to delete any alias that they should not
960 have. This will be a NOOP on nodes that don't currently
961 hold the given alias */
962 async_data = talloc_zero(tmp_ctx, struct client_async_data);
963 CTDB_NO_MEMORY_FATAL(ctdb, async_data);
965 for (i=0;i<nodemap->num;i++) {
966 /* don't talk to unconnected nodes, but do talk to banned nodes */
967 if (nodemap->nodes[i].flags & NODE_FLAGS_DISCONNECTED) {
971 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
972 if (tmp_ip->pnn == nodemap->nodes[i].pnn) {
973 /* This node should be serving this
974 vnn so dont tell it to release the ip
978 if (tmp_ip->addr.sa.sa_family == AF_INET) {
979 ipv4.pnn = tmp_ip->pnn;
980 ipv4.sin = tmp_ip->addr.ip;
982 timeout = TAKEOVER_TIMEOUT();
983 data.dsize = sizeof(ipv4);
984 data.dptr = (uint8_t *)&ipv4;
985 state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
986 0, CTDB_CONTROL_RELEASE_IPv4, 0,
990 ip.pnn = tmp_ip->pnn;
991 ip.addr = tmp_ip->addr;
993 timeout = TAKEOVER_TIMEOUT();
994 data.dsize = sizeof(ip);
995 data.dptr = (uint8_t *)&ip;
996 state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
997 0, CTDB_CONTROL_RELEASE_IP, 0,
1002 if (state == NULL) {
1003 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_RELEASE_IP to node %u\n", nodemap->nodes[i].pnn));
1004 talloc_free(tmp_ctx);
1008 ctdb_client_async_add(async_data, state);
1011 if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1012 DEBUG(DEBUG_ERR,(__location__ " Async control CTDB_CONTROL_RELEASE_IP failed\n"));
1013 talloc_free(tmp_ctx);
1016 talloc_free(async_data);
1019 /* tell all nodes to get their own IPs */
1020 async_data = talloc_zero(tmp_ctx, struct client_async_data);
1021 CTDB_NO_MEMORY_FATAL(ctdb, async_data);
1022 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1023 if (tmp_ip->pnn == -1) {
1024 /* this IP won't be taken over */
1028 if (tmp_ip->addr.sa.sa_family == AF_INET) {
1029 ipv4.pnn = tmp_ip->pnn;
1030 ipv4.sin = tmp_ip->addr.ip;
1032 timeout = TAKEOVER_TIMEOUT();
1033 data.dsize = sizeof(ipv4);
1034 data.dptr = (uint8_t *)&ipv4;
1035 state = ctdb_control_send(ctdb, tmp_ip->pnn,
1036 0, CTDB_CONTROL_TAKEOVER_IPv4, 0,
1040 ip.pnn = tmp_ip->pnn;
1041 ip.addr = tmp_ip->addr;
1043 timeout = TAKEOVER_TIMEOUT();
1044 data.dsize = sizeof(ip);
1045 data.dptr = (uint8_t *)&ip;
1046 state = ctdb_control_send(ctdb, tmp_ip->pnn,
1047 0, CTDB_CONTROL_TAKEOVER_IP, 0,
1051 if (state == NULL) {
1052 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_TAKEOVER_IP to node %u\n", tmp_ip->pnn));
1053 talloc_free(tmp_ctx);
1057 ctdb_client_async_add(async_data, state);
1059 if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1060 DEBUG(DEBUG_ERR,(__location__ " Async control CTDB_CONTROL_TAKEOVER_IP failed\n"));
1061 talloc_free(tmp_ctx);
1065 talloc_free(tmp_ctx);
1071 destroy a ctdb_client_ip structure
1073 static int ctdb_client_ip_destructor(struct ctdb_client_ip *ip)
1075 DEBUG(DEBUG_DEBUG,("destroying client tcp for %s:%u (client_id %u)\n",
1076 ctdb_addr_to_str(&ip->addr),
1077 ntohs(ip->addr.ip.sin_port),
1080 DLIST_REMOVE(ip->ctdb->client_ip_list, ip);
1085 called by a client to inform us of a TCP connection that it is managing
1086 that should tickled with an ACK when IP takeover is done
1087 we handle both the old ipv4 style of packets as well as the new ipv4/6
1090 int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id,
1093 struct ctdb_client *client = ctdb_reqid_find(ctdb, client_id, struct ctdb_client);
1094 struct ctdb_control_tcp *old_addr = NULL;
1095 struct ctdb_control_tcp_addr new_addr;
1096 struct ctdb_control_tcp_addr *tcp_sock = NULL;
1097 struct ctdb_tcp_list *tcp;
1098 struct ctdb_control_tcp_vnn t;
1101 struct ctdb_client_ip *ip;
1102 struct ctdb_vnn *vnn;
1103 ctdb_sock_addr addr;
1105 switch (indata.dsize) {
1106 case sizeof(struct ctdb_control_tcp):
1107 old_addr = (struct ctdb_control_tcp *)indata.dptr;
1108 ZERO_STRUCT(new_addr);
1109 tcp_sock = &new_addr;
1110 tcp_sock->src.ip = old_addr->src;
1111 tcp_sock->dest.ip = old_addr->dest;
1113 case sizeof(struct ctdb_control_tcp_addr):
1114 tcp_sock = (struct ctdb_control_tcp_addr *)indata.dptr;
1117 DEBUG(DEBUG_ERR,(__location__ " Invalid data structure passed "
1118 "to ctdb_control_tcp_client. size was %d but "
1119 "only allowed sizes are %lu and %lu\n",
1121 (long unsigned)sizeof(struct ctdb_control_tcp),
1122 (long unsigned)sizeof(struct ctdb_control_tcp_addr)));
1126 addr = tcp_sock->src;
1127 ctdb_canonicalize_ip(&addr, &tcp_sock->src);
1128 addr = tcp_sock->dest;
1129 ctdb_canonicalize_ip(&addr, &tcp_sock->dest);
1132 memcpy(&addr, &tcp_sock->dest, sizeof(addr));
1133 vnn = find_public_ip_vnn(ctdb, &addr);
1135 switch (addr.sa.sa_family) {
1137 if (ntohl(addr.ip.sin_addr.s_addr) != INADDR_LOOPBACK) {
1138 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public address.\n",
1139 ctdb_addr_to_str(&addr)));
1143 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public ipv6 address.\n",
1144 ctdb_addr_to_str(&addr)));
1147 DEBUG(DEBUG_ERR,(__location__ " Unknown family type %d\n", addr.sa.sa_family));
1153 if (vnn->pnn != ctdb->pnn) {
1154 DEBUG(DEBUG_ERR,("Attempt to register tcp client for IP %s we don't hold - failing (client_id %u pid %u)\n",
1155 ctdb_addr_to_str(&addr),
1156 client_id, client->pid));
1157 /* failing this call will tell smbd to die */
1161 ip = talloc(client, struct ctdb_client_ip);
1162 CTDB_NO_MEMORY(ctdb, ip);
1166 ip->client_id = client_id;
1167 talloc_set_destructor(ip, ctdb_client_ip_destructor);
1168 DLIST_ADD(ctdb->client_ip_list, ip);
1170 tcp = talloc(client, struct ctdb_tcp_list);
1171 CTDB_NO_MEMORY(ctdb, tcp);
1173 tcp->connection.src_addr = tcp_sock->src;
1174 tcp->connection.dst_addr = tcp_sock->dest;
1176 DLIST_ADD(client->tcp_list, tcp);
1178 t.src = tcp_sock->src;
1179 t.dest = tcp_sock->dest;
1181 data.dptr = (uint8_t *)&t;
1182 data.dsize = sizeof(t);
1184 switch (addr.sa.sa_family) {
1186 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1187 (unsigned)ntohs(tcp_sock->dest.ip.sin_port),
1188 ctdb_addr_to_str(&tcp_sock->src),
1189 (unsigned)ntohs(tcp_sock->src.ip.sin_port), client_id, client->pid));
1192 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1193 (unsigned)ntohs(tcp_sock->dest.ip6.sin6_port),
1194 ctdb_addr_to_str(&tcp_sock->src),
1195 (unsigned)ntohs(tcp_sock->src.ip6.sin6_port), client_id, client->pid));
1198 DEBUG(DEBUG_ERR,(__location__ " Unknown family %d\n", addr.sa.sa_family));
1202 /* tell all nodes about this tcp connection */
1203 ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0,
1204 CTDB_CONTROL_TCP_ADD,
1205 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
1207 DEBUG(DEBUG_ERR,(__location__ " Failed to send CTDB_CONTROL_TCP_ADD\n"));
1215 find a tcp address on a list
1217 static struct ctdb_tcp_connection *ctdb_tcp_find(struct ctdb_tcp_array *array,
1218 struct ctdb_tcp_connection *tcp)
1222 if (array == NULL) {
1226 for (i=0;i<array->num;i++) {
1227 if (ctdb_same_sockaddr(&array->connections[i].src_addr, &tcp->src_addr) &&
1228 ctdb_same_sockaddr(&array->connections[i].dst_addr, &tcp->dst_addr)) {
1229 return &array->connections[i];
1236 called by a daemon to inform us of a TCP connection that one of its
1237 clients managing that should tickled with an ACK when IP takeover is
1240 int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata)
1242 struct ctdb_control_tcp_vnn *p = (struct ctdb_control_tcp_vnn *)indata.dptr;
1243 struct ctdb_tcp_array *tcparray;
1244 struct ctdb_tcp_connection tcp;
1245 struct ctdb_vnn *vnn;
1247 vnn = find_public_ip_vnn(ctdb, &p->dest);
1249 DEBUG(DEBUG_INFO,(__location__ " got TCP_ADD control for an address which is not a public address '%s'\n",
1250 ctdb_addr_to_str(&p->dest)));
1256 tcparray = vnn->tcp_array;
1258 /* If this is the first tickle */
1259 if (tcparray == NULL) {
1260 tcparray = talloc_size(ctdb->nodes,
1261 offsetof(struct ctdb_tcp_array, connections) +
1262 sizeof(struct ctdb_tcp_connection) * 1);
1263 CTDB_NO_MEMORY(ctdb, tcparray);
1264 vnn->tcp_array = tcparray;
1267 tcparray->connections = talloc_size(tcparray, sizeof(struct ctdb_tcp_connection));
1268 CTDB_NO_MEMORY(ctdb, tcparray->connections);
1270 tcparray->connections[tcparray->num].src_addr = p->src;
1271 tcparray->connections[tcparray->num].dst_addr = p->dest;
1277 /* Do we already have this tickle ?*/
1278 tcp.src_addr = p->src;
1279 tcp.dst_addr = p->dest;
1280 if (ctdb_tcp_find(vnn->tcp_array, &tcp) != NULL) {
1281 DEBUG(DEBUG_DEBUG,("Already had tickle info for %s:%u for vnn:%u\n",
1282 ctdb_addr_to_str(&tcp.dst_addr),
1283 ntohs(tcp.dst_addr.ip.sin_port),
1288 /* A new tickle, we must add it to the array */
1289 tcparray->connections = talloc_realloc(tcparray, tcparray->connections,
1290 struct ctdb_tcp_connection,
1292 CTDB_NO_MEMORY(ctdb, tcparray->connections);
1294 vnn->tcp_array = tcparray;
1295 tcparray->connections[tcparray->num].src_addr = p->src;
1296 tcparray->connections[tcparray->num].dst_addr = p->dest;
1299 DEBUG(DEBUG_INFO,("Added tickle info for %s:%u from vnn %u\n",
1300 ctdb_addr_to_str(&tcp.dst_addr),
1301 ntohs(tcp.dst_addr.ip.sin_port),
1309 called by a daemon to inform us of a TCP connection that one of its
1310 clients managing that should tickled with an ACK when IP takeover is
1313 static void ctdb_remove_tcp_connection(struct ctdb_context *ctdb, struct ctdb_tcp_connection *conn)
1315 struct ctdb_tcp_connection *tcpp;
1316 struct ctdb_vnn *vnn = find_public_ip_vnn(ctdb, &conn->dst_addr);
1319 DEBUG(DEBUG_ERR,(__location__ " unable to find public address %s\n",
1320 ctdb_addr_to_str(&conn->dst_addr)));
1324 /* if the array is empty we cant remove it
1325 and we dont need to do anything
1327 if (vnn->tcp_array == NULL) {
1328 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist (array is empty) %s:%u\n",
1329 ctdb_addr_to_str(&conn->dst_addr),
1330 ntohs(conn->dst_addr.ip.sin_port)));
1335 /* See if we know this connection
1336 if we dont know this connection then we dont need to do anything
1338 tcpp = ctdb_tcp_find(vnn->tcp_array, conn);
1340 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist %s:%u\n",
1341 ctdb_addr_to_str(&conn->dst_addr),
1342 ntohs(conn->dst_addr.ip.sin_port)));
1347 /* We need to remove this entry from the array.
1348 Instead of allocating a new array and copying data to it
1349 we cheat and just copy the last entry in the existing array
1350 to the entry that is to be removed and just shring the
1353 *tcpp = vnn->tcp_array->connections[vnn->tcp_array->num - 1];
1354 vnn->tcp_array->num--;
1356 /* If we deleted the last entry we also need to remove the entire array
1358 if (vnn->tcp_array->num == 0) {
1359 talloc_free(vnn->tcp_array);
1360 vnn->tcp_array = NULL;
1363 vnn->tcp_update_needed = true;
1365 DEBUG(DEBUG_INFO,("Removed tickle info for %s:%u\n",
1366 ctdb_addr_to_str(&conn->src_addr),
1367 ntohs(conn->src_addr.ip.sin_port)));
1372 called when a daemon restarts - send all tickes for all public addresses
1373 we are serving immediately to the new node.
1375 int32_t ctdb_control_startup(struct ctdb_context *ctdb, uint32_t vnn)
1377 /*XXX here we should send all tickes we are serving to the new node */
1383 called when a client structure goes away - hook to remove
1384 elements from the tcp_list in all daemons
1386 void ctdb_takeover_client_destructor_hook(struct ctdb_client *client)
1388 while (client->tcp_list) {
1389 struct ctdb_tcp_list *tcp = client->tcp_list;
1390 DLIST_REMOVE(client->tcp_list, tcp);
1391 ctdb_remove_tcp_connection(client->ctdb, &tcp->connection);
1397 release all IPs on shutdown
1399 void ctdb_release_all_ips(struct ctdb_context *ctdb)
1401 struct ctdb_vnn *vnn;
1403 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1404 if (!ctdb_sys_have_ip(&vnn->public_address)) {
1407 if (vnn->pnn == ctdb->pnn) {
1410 ctdb_event_script_args(ctdb, CTDB_EVENT_RELEASE_IP, "%s %s %u",
1412 talloc_strdup(ctdb, ctdb_addr_to_str(&vnn->public_address)),
1413 vnn->public_netmask_bits);
1414 release_kill_clients(ctdb, &vnn->public_address);
1420 get list of public IPs
1422 int32_t ctdb_control_get_public_ips(struct ctdb_context *ctdb,
1423 struct ctdb_req_control *c, TDB_DATA *outdata)
1426 struct ctdb_all_public_ips *ips;
1427 struct ctdb_vnn *vnn;
1429 /* count how many public ip structures we have */
1431 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1435 len = offsetof(struct ctdb_all_public_ips, ips) +
1436 num*sizeof(struct ctdb_public_ip);
1437 ips = talloc_zero_size(outdata, len);
1438 CTDB_NO_MEMORY(ctdb, ips);
1440 outdata->dsize = len;
1441 outdata->dptr = (uint8_t *)ips;
1445 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1446 ips->ips[i].pnn = vnn->pnn;
1447 ips->ips[i].addr = vnn->public_address;
1456 get list of public IPs, old ipv4 style. only returns ipv4 addresses
1458 int32_t ctdb_control_get_public_ipsv4(struct ctdb_context *ctdb,
1459 struct ctdb_req_control *c, TDB_DATA *outdata)
1462 struct ctdb_all_public_ipsv4 *ips;
1463 struct ctdb_vnn *vnn;
1465 /* count how many public ip structures we have */
1467 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1468 if (vnn->public_address.sa.sa_family != AF_INET) {
1474 len = offsetof(struct ctdb_all_public_ipsv4, ips) +
1475 num*sizeof(struct ctdb_public_ipv4);
1476 ips = talloc_zero_size(outdata, len);
1477 CTDB_NO_MEMORY(ctdb, ips);
1479 outdata->dsize = len;
1480 outdata->dptr = (uint8_t *)ips;
1484 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1485 if (vnn->public_address.sa.sa_family != AF_INET) {
1488 ips->ips[i].pnn = vnn->pnn;
1489 ips->ips[i].sin = vnn->public_address.ip;
1498 structure containing the listening socket and the list of tcp connections
1499 that the ctdb daemon is to kill
1501 struct ctdb_kill_tcp {
1502 struct ctdb_vnn *vnn;
1503 struct ctdb_context *ctdb;
1505 struct fd_event *fde;
1506 trbt_tree_t *connections;
1511 a tcp connection that is to be killed
1513 struct ctdb_killtcp_con {
1514 ctdb_sock_addr src_addr;
1515 ctdb_sock_addr dst_addr;
1517 struct ctdb_kill_tcp *killtcp;
1520 /* this function is used to create a key to represent this socketpair
1521 in the killtcp tree.
1522 this key is used to insert and lookup matching socketpairs that are
1523 to be tickled and RST
1525 #define KILLTCP_KEYLEN 10
1526 static uint32_t *killtcp_key(ctdb_sock_addr *src, ctdb_sock_addr *dst)
1528 static uint32_t key[KILLTCP_KEYLEN];
1530 bzero(key, sizeof(key));
1532 if (src->sa.sa_family != dst->sa.sa_family) {
1533 DEBUG(DEBUG_ERR, (__location__ " ERROR, different families passed :%u vs %u\n", src->sa.sa_family, dst->sa.sa_family));
1537 switch (src->sa.sa_family) {
1539 key[0] = dst->ip.sin_addr.s_addr;
1540 key[1] = src->ip.sin_addr.s_addr;
1541 key[2] = dst->ip.sin_port;
1542 key[3] = src->ip.sin_port;
1545 key[0] = dst->ip6.sin6_addr.s6_addr32[3];
1546 key[1] = src->ip6.sin6_addr.s6_addr32[3];
1547 key[2] = dst->ip6.sin6_addr.s6_addr32[2];
1548 key[3] = src->ip6.sin6_addr.s6_addr32[2];
1549 key[4] = dst->ip6.sin6_addr.s6_addr32[1];
1550 key[5] = src->ip6.sin6_addr.s6_addr32[1];
1551 key[6] = dst->ip6.sin6_addr.s6_addr32[0];
1552 key[7] = src->ip6.sin6_addr.s6_addr32[0];
1553 key[8] = dst->ip6.sin6_port;
1554 key[9] = src->ip6.sin6_port;
1557 DEBUG(DEBUG_ERR, (__location__ " ERROR, unknown family passed :%u\n", src->sa.sa_family));
1565 called when we get a read event on the raw socket
1567 static void capture_tcp_handler(struct event_context *ev, struct fd_event *fde,
1568 uint16_t flags, void *private_data)
1570 struct ctdb_kill_tcp *killtcp = talloc_get_type(private_data, struct ctdb_kill_tcp);
1571 struct ctdb_killtcp_con *con;
1572 ctdb_sock_addr src, dst;
1573 uint32_t ack_seq, seq;
1575 if (!(flags & EVENT_FD_READ)) {
1579 if (ctdb_sys_read_tcp_packet(killtcp->capture_fd,
1580 killtcp->private_data,
1582 &ack_seq, &seq) != 0) {
1583 /* probably a non-tcp ACK packet */
1587 /* check if we have this guy in our list of connections
1590 con = trbt_lookuparray32(killtcp->connections,
1591 KILLTCP_KEYLEN, killtcp_key(&src, &dst));
1593 /* no this was some other packet we can just ignore */
1597 /* This one has been tickled !
1598 now reset him and remove him from the list.
1600 DEBUG(DEBUG_INFO, ("sending a tcp reset to kill connection :%d -> %s:%d\n",
1601 ntohs(con->dst_addr.ip.sin_port),
1602 ctdb_addr_to_str(&con->src_addr),
1603 ntohs(con->src_addr.ip.sin_port)));
1605 ctdb_sys_send_tcp(&con->dst_addr, &con->src_addr, ack_seq, seq, 1);
1610 /* when traversing the list of all tcp connections to send tickle acks to
1611 (so that we can capture the ack coming back and kill the connection
1613 this callback is called for each connection we are currently trying to kill
1615 static void tickle_connection_traverse(void *param, void *data)
1617 struct ctdb_killtcp_con *con = talloc_get_type(data, struct ctdb_killtcp_con);
1619 /* have tried too many times, just give up */
1620 if (con->count >= 5) {
1621 /* can't delete in traverse: reparent to delete_cons */
1622 talloc_steal(param, con);
1626 /* othervise, try tickling it again */
1629 (ctdb_sock_addr *)&con->dst_addr,
1630 (ctdb_sock_addr *)&con->src_addr,
1636 called every second until all sentenced connections have been reset
1638 static void ctdb_tickle_sentenced_connections(struct event_context *ev, struct timed_event *te,
1639 struct timeval t, void *private_data)
1641 struct ctdb_kill_tcp *killtcp = talloc_get_type(private_data, struct ctdb_kill_tcp);
1642 void *delete_cons = talloc_new(NULL);
1644 /* loop over all connections sending tickle ACKs */
1645 trbt_traversearray32(killtcp->connections, KILLTCP_KEYLEN, tickle_connection_traverse, delete_cons);
1647 /* now we've finished traverse, it's safe to do deletion. */
1648 talloc_free(delete_cons);
1650 /* If there are no more connections to kill we can remove the
1651 entire killtcp structure
1653 if ( (killtcp->connections == NULL) ||
1654 (killtcp->connections->root == NULL) ) {
1655 talloc_free(killtcp);
1659 /* try tickling them again in a seconds time
1661 event_add_timed(killtcp->ctdb->ev, killtcp, timeval_current_ofs(1, 0),
1662 ctdb_tickle_sentenced_connections, killtcp);
1666 destroy the killtcp structure
1668 static int ctdb_killtcp_destructor(struct ctdb_kill_tcp *killtcp)
1671 killtcp->vnn->killtcp = NULL;
1677 /* nothing fancy here, just unconditionally replace any existing
1678 connection structure with the new one.
1680 dont even free the old one if it did exist, that one is talloc_stolen
1681 by the same node in the tree anyway and will be deleted when the new data
1684 static void *add_killtcp_callback(void *parm, void *data)
1690 add a tcp socket to the list of connections we want to RST
1692 static int ctdb_killtcp_add_connection(struct ctdb_context *ctdb,
1696 ctdb_sock_addr src, dst;
1697 struct ctdb_kill_tcp *killtcp;
1698 struct ctdb_killtcp_con *con;
1699 struct ctdb_vnn *vnn;
1701 ctdb_canonicalize_ip(s, &src);
1702 ctdb_canonicalize_ip(d, &dst);
1704 vnn = find_public_ip_vnn(ctdb, &dst);
1706 vnn = find_public_ip_vnn(ctdb, &src);
1709 /* if it is not a public ip it could be our 'single ip' */
1710 if (ctdb->single_ip_vnn) {
1711 if (ctdb_same_ip(&ctdb->single_ip_vnn->public_address, &dst)) {
1712 vnn = ctdb->single_ip_vnn;
1717 DEBUG(DEBUG_ERR,(__location__ " Could not killtcp, not a public address\n"));
1721 killtcp = vnn->killtcp;
1723 /* If this is the first connection to kill we must allocate
1726 if (killtcp == NULL) {
1727 killtcp = talloc_zero(ctdb, struct ctdb_kill_tcp);
1728 CTDB_NO_MEMORY(ctdb, killtcp);
1731 killtcp->ctdb = ctdb;
1732 killtcp->capture_fd = -1;
1733 killtcp->connections = trbt_create(killtcp, 0);
1735 vnn->killtcp = killtcp;
1736 talloc_set_destructor(killtcp, ctdb_killtcp_destructor);
1741 /* create a structure that describes this connection we want to
1742 RST and store it in killtcp->connections
1744 con = talloc(killtcp, struct ctdb_killtcp_con);
1745 CTDB_NO_MEMORY(ctdb, con);
1746 con->src_addr = src;
1747 con->dst_addr = dst;
1749 con->killtcp = killtcp;
1752 trbt_insertarray32_callback(killtcp->connections,
1753 KILLTCP_KEYLEN, killtcp_key(&con->dst_addr, &con->src_addr),
1754 add_killtcp_callback, con);
1757 If we dont have a socket to listen on yet we must create it
1759 if (killtcp->capture_fd == -1) {
1760 killtcp->capture_fd = ctdb_sys_open_capture_socket(vnn->iface, &killtcp->private_data);
1761 if (killtcp->capture_fd == -1) {
1762 DEBUG(DEBUG_CRIT,(__location__ " Failed to open capturing socket for killtcp\n"));
1768 if (killtcp->fde == NULL) {
1769 killtcp->fde = event_add_fd(ctdb->ev, killtcp, killtcp->capture_fd,
1770 EVENT_FD_READ | EVENT_FD_AUTOCLOSE,
1771 capture_tcp_handler, killtcp);
1773 /* We also need to set up some events to tickle all these connections
1774 until they are all reset
1776 event_add_timed(ctdb->ev, killtcp, timeval_current_ofs(1, 0),
1777 ctdb_tickle_sentenced_connections, killtcp);
1780 /* tickle him once now */
1789 talloc_free(vnn->killtcp);
1790 vnn->killtcp = NULL;
1795 kill a TCP connection.
1797 int32_t ctdb_control_kill_tcp(struct ctdb_context *ctdb, TDB_DATA indata)
1799 struct ctdb_control_killtcp *killtcp = (struct ctdb_control_killtcp *)indata.dptr;
1801 return ctdb_killtcp_add_connection(ctdb, &killtcp->src_addr, &killtcp->dst_addr);
1805 called by a daemon to inform us of the entire list of TCP tickles for
1806 a particular public address.
1807 this control should only be sent by the node that is currently serving
1808 that public address.
1810 int32_t ctdb_control_set_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata)
1812 struct ctdb_control_tcp_tickle_list *list = (struct ctdb_control_tcp_tickle_list *)indata.dptr;
1813 struct ctdb_tcp_array *tcparray;
1814 struct ctdb_vnn *vnn;
1816 /* We must at least have tickles.num or else we cant verify the size
1817 of the received data blob
1819 if (indata.dsize < offsetof(struct ctdb_control_tcp_tickle_list,
1820 tickles.connections)) {
1821 DEBUG(DEBUG_ERR,("Bad indata in ctdb_control_set_tcp_tickle_list. Not enough data for the tickle.num field\n"));
1825 /* verify that the size of data matches what we expect */
1826 if (indata.dsize < offsetof(struct ctdb_control_tcp_tickle_list,
1827 tickles.connections)
1828 + sizeof(struct ctdb_tcp_connection)
1829 * list->tickles.num) {
1830 DEBUG(DEBUG_ERR,("Bad indata in ctdb_control_set_tcp_tickle_list\n"));
1834 vnn = find_public_ip_vnn(ctdb, &list->addr);
1836 DEBUG(DEBUG_INFO,(__location__ " Could not set tcp tickle list, '%s' is not a public address\n",
1837 ctdb_addr_to_str(&list->addr)));
1842 /* remove any old ticklelist we might have */
1843 talloc_free(vnn->tcp_array);
1844 vnn->tcp_array = NULL;
1846 tcparray = talloc(ctdb->nodes, struct ctdb_tcp_array);
1847 CTDB_NO_MEMORY(ctdb, tcparray);
1849 tcparray->num = list->tickles.num;
1851 tcparray->connections = talloc_array(tcparray, struct ctdb_tcp_connection, tcparray->num);
1852 CTDB_NO_MEMORY(ctdb, tcparray->connections);
1854 memcpy(tcparray->connections, &list->tickles.connections[0],
1855 sizeof(struct ctdb_tcp_connection)*tcparray->num);
1857 /* We now have a new fresh tickle list array for this vnn */
1858 vnn->tcp_array = talloc_steal(vnn, tcparray);
1864 called to return the full list of tickles for the puclic address associated
1865 with the provided vnn
1867 int32_t ctdb_control_get_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata)
1869 ctdb_sock_addr *addr = (ctdb_sock_addr *)indata.dptr;
1870 struct ctdb_control_tcp_tickle_list *list;
1871 struct ctdb_tcp_array *tcparray;
1873 struct ctdb_vnn *vnn;
1875 vnn = find_public_ip_vnn(ctdb, addr);
1877 DEBUG(DEBUG_ERR,(__location__ " Could not get tcp tickle list, '%s' is not a public address\n",
1878 ctdb_addr_to_str(addr)));
1883 tcparray = vnn->tcp_array;
1885 num = tcparray->num;
1890 outdata->dsize = offsetof(struct ctdb_control_tcp_tickle_list,
1891 tickles.connections)
1892 + sizeof(struct ctdb_tcp_connection) * num;
1894 outdata->dptr = talloc_size(outdata, outdata->dsize);
1895 CTDB_NO_MEMORY(ctdb, outdata->dptr);
1896 list = (struct ctdb_control_tcp_tickle_list *)outdata->dptr;
1899 list->tickles.num = num;
1901 memcpy(&list->tickles.connections[0], tcparray->connections,
1902 sizeof(struct ctdb_tcp_connection) * num);
1910 set the list of all tcp tickles for a public address
1912 static int ctdb_ctrl_set_tcp_tickles(struct ctdb_context *ctdb,
1913 struct timeval timeout, uint32_t destnode,
1914 ctdb_sock_addr *addr,
1915 struct ctdb_tcp_array *tcparray)
1919 struct ctdb_control_tcp_tickle_list *list;
1922 num = tcparray->num;
1927 data.dsize = offsetof(struct ctdb_control_tcp_tickle_list,
1928 tickles.connections) +
1929 sizeof(struct ctdb_tcp_connection) * num;
1930 data.dptr = talloc_size(ctdb, data.dsize);
1931 CTDB_NO_MEMORY(ctdb, data.dptr);
1933 list = (struct ctdb_control_tcp_tickle_list *)data.dptr;
1935 list->tickles.num = num;
1937 memcpy(&list->tickles.connections[0], tcparray->connections, sizeof(struct ctdb_tcp_connection) * num);
1940 ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0,
1941 CTDB_CONTROL_SET_TCP_TICKLE_LIST,
1942 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
1944 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set tcp tickles failed\n"));
1948 talloc_free(data.dptr);
1955 perform tickle updates if required
1957 static void ctdb_update_tcp_tickles(struct event_context *ev,
1958 struct timed_event *te,
1959 struct timeval t, void *private_data)
1961 struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
1963 struct ctdb_vnn *vnn;
1965 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1966 /* we only send out updates for public addresses that
1969 if (ctdb->pnn != vnn->pnn) {
1972 /* We only send out the updates if we need to */
1973 if (!vnn->tcp_update_needed) {
1976 ret = ctdb_ctrl_set_tcp_tickles(ctdb,
1978 CTDB_BROADCAST_CONNECTED,
1979 &vnn->public_address,
1982 DEBUG(DEBUG_ERR,("Failed to send the tickle update for public address %s\n",
1983 ctdb_addr_to_str(&vnn->public_address)));
1987 event_add_timed(ctdb->ev, ctdb->tickle_update_context,
1988 timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
1989 ctdb_update_tcp_tickles, ctdb);
1994 start periodic update of tcp tickles
1996 void ctdb_start_tcp_tickle_update(struct ctdb_context *ctdb)
1998 ctdb->tickle_update_context = talloc_new(ctdb);
2000 event_add_timed(ctdb->ev, ctdb->tickle_update_context,
2001 timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
2002 ctdb_update_tcp_tickles, ctdb);
2008 struct control_gratious_arp {
2009 struct ctdb_context *ctdb;
2010 ctdb_sock_addr addr;
2016 send a control_gratuitous arp
2018 static void send_gratious_arp(struct event_context *ev, struct timed_event *te,
2019 struct timeval t, void *private_data)
2022 struct control_gratious_arp *arp = talloc_get_type(private_data,
2023 struct control_gratious_arp);
2025 ret = ctdb_sys_send_arp(&arp->addr, arp->iface);
2027 DEBUG(DEBUG_ERR,(__location__ " sending of gratious arp failed (%s)\n", strerror(errno)));
2032 if (arp->count == CTDB_ARP_REPEAT) {
2037 event_add_timed(arp->ctdb->ev, arp,
2038 timeval_current_ofs(CTDB_ARP_INTERVAL, 0),
2039 send_gratious_arp, arp);
2046 int32_t ctdb_control_send_gratious_arp(struct ctdb_context *ctdb, TDB_DATA indata)
2048 struct ctdb_control_gratious_arp *gratious_arp = (struct ctdb_control_gratious_arp *)indata.dptr;
2049 struct control_gratious_arp *arp;
2051 /* verify the size of indata */
2052 if (indata.dsize < offsetof(struct ctdb_control_gratious_arp, iface)) {
2053 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_gratious_arp structure. Got %u require %u bytes\n",
2054 (unsigned)indata.dsize,
2055 (unsigned)offsetof(struct ctdb_control_gratious_arp, iface)));
2059 ( offsetof(struct ctdb_control_gratious_arp, iface)
2060 + gratious_arp->len ) ){
2062 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2063 "but should be %u bytes\n",
2064 (unsigned)indata.dsize,
2065 (unsigned)(offsetof(struct ctdb_control_gratious_arp, iface)+gratious_arp->len)));
2070 arp = talloc(ctdb, struct control_gratious_arp);
2071 CTDB_NO_MEMORY(ctdb, arp);
2074 arp->addr = gratious_arp->addr;
2075 arp->iface = talloc_strdup(arp, gratious_arp->iface);
2076 CTDB_NO_MEMORY(ctdb, arp->iface);
2079 event_add_timed(arp->ctdb->ev, arp,
2080 timeval_zero(), send_gratious_arp, arp);
2085 int32_t ctdb_control_add_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2087 struct ctdb_control_ip_iface *pub = (struct ctdb_control_ip_iface *)indata.dptr;
2090 /* verify the size of indata */
2091 if (indata.dsize < offsetof(struct ctdb_control_ip_iface, iface)) {
2092 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_ip_iface structure\n"));
2096 ( offsetof(struct ctdb_control_ip_iface, iface)
2099 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2100 "but should be %u bytes\n",
2101 (unsigned)indata.dsize,
2102 (unsigned)(offsetof(struct ctdb_control_ip_iface, iface)+pub->len)));
2106 ret = ctdb_add_public_address(ctdb, &pub->addr, pub->mask, &pub->iface[0]);
2109 DEBUG(DEBUG_ERR,(__location__ " Failed to add public address\n"));
2117 called when releaseip event finishes for del_public_address
2119 static void delete_ip_callback(struct ctdb_context *ctdb, int status,
2122 talloc_free(private_data);
2125 int32_t ctdb_control_del_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2127 struct ctdb_control_ip_iface *pub = (struct ctdb_control_ip_iface *)indata.dptr;
2128 struct ctdb_vnn *vnn;
2131 /* verify the size of indata */
2132 if (indata.dsize < offsetof(struct ctdb_control_ip_iface, iface)) {
2133 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_ip_iface structure\n"));
2137 ( offsetof(struct ctdb_control_ip_iface, iface)
2140 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2141 "but should be %u bytes\n",
2142 (unsigned)indata.dsize,
2143 (unsigned)(offsetof(struct ctdb_control_ip_iface, iface)+pub->len)));
2147 /* walk over all public addresses until we find a match */
2148 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2149 if (ctdb_same_ip(&vnn->public_address, &pub->addr)) {
2150 TALLOC_CTX *mem_ctx = talloc_new(ctdb);
2152 DLIST_REMOVE(ctdb->vnn, vnn);
2154 ret = ctdb_event_script_callback(ctdb,
2155 mem_ctx, delete_ip_callback, mem_ctx,
2157 CTDB_EVENT_RELEASE_IP,
2160 talloc_strdup(mem_ctx, ctdb_addr_to_str(&vnn->public_address)),
2161 vnn->public_netmask_bits);
2163 vnn->killtcp->vnn = NULL;
2176 /* This function is called from the recovery daemon to verify that a remote
2177 node has the expected ip allocation.
2178 This is verified against ctdb->ip_tree
2180 int verify_remote_ip_allocation(struct ctdb_context *ctdb, struct ctdb_all_public_ips *ips)
2182 struct ctdb_public_ip_list *tmp_ip;
2185 if (ctdb->ip_tree == NULL) {
2186 /* dont know the expected allocation yet, assume remote node
2195 for (i=0; i<ips->num; i++) {
2196 tmp_ip = trbt_lookuparray32(ctdb->ip_tree, IP_KEYLEN, ip_key(&ips->ips[i].addr));
2197 if (tmp_ip == NULL) {
2198 DEBUG(DEBUG_ERR,(__location__ " Could not find host for address %s, reassign ips\n", ctdb_addr_to_str(&ips->ips[i].addr)));
2202 if (tmp_ip->pnn == -1 || ips->ips[i].pnn == -1) {
2206 if (tmp_ip->pnn != ips->ips[i].pnn) {
2207 DEBUG(DEBUG_ERR,("Inconsistent ip allocation. Trigger reallocation. Thinks %s is held by node %u while it is held by node %u\n", ctdb_addr_to_str(&ips->ips[i].addr), ips->ips[i].pnn, tmp_ip->pnn));
2215 int update_ip_assignment_tree(struct ctdb_context *ctdb, struct ctdb_public_ip *ip)
2217 struct ctdb_public_ip_list *tmp_ip;
2219 if (ctdb->ip_tree == NULL) {
2220 DEBUG(DEBUG_ERR,("No ctdb->ip_tree yet. Failed to update ip assignment\n"));
2224 tmp_ip = trbt_lookuparray32(ctdb->ip_tree, IP_KEYLEN, ip_key(&ip->addr));
2225 if (tmp_ip == NULL) {
2226 DEBUG(DEBUG_ERR,(__location__ " Could not find record for address %s, update ip\n", ctdb_addr_to_str(&ip->addr)));
2230 DEBUG(DEBUG_NOTICE,("Updated ip assignment tree for ip : %s from node %u to node %u\n", ctdb_addr_to_str(&ip->addr), tmp_ip->pnn, ip->pnn));
2231 tmp_ip->pnn = ip->pnn;