4 Copyright (C) Ronnie Sahlberg 2007
5 Copyright (C) Andrew Tridgell 2007
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, see <http://www.gnu.org/licenses/>.
21 #include "lib/events/events.h"
22 #include "lib/tdb/include/tdb.h"
23 #include "lib/util/dlinklist.h"
24 #include "system/network.h"
25 #include "system/filesys.h"
26 #include "system/wait.h"
27 #include "../include/ctdb_private.h"
28 #include "../common/rb_tree.h"
31 #define TAKEOVER_TIMEOUT() timeval_current_ofs(ctdb->tunable.takeover_timeout,0)
33 #define CTDB_ARP_INTERVAL 1
34 #define CTDB_ARP_REPEAT 3
36 struct ctdb_takeover_arp {
37 struct ctdb_context *ctdb;
40 struct ctdb_tcp_array *tcparray;
46 lists of tcp endpoints
48 struct ctdb_tcp_list {
49 struct ctdb_tcp_list *prev, *next;
50 struct ctdb_tcp_connection connection;
54 list of clients to kill on IP release
56 struct ctdb_client_ip {
57 struct ctdb_client_ip *prev, *next;
58 struct ctdb_context *ctdb;
67 static void ctdb_control_send_arp(struct event_context *ev, struct timed_event *te,
68 struct timeval t, void *private_data)
70 struct ctdb_takeover_arp *arp = talloc_get_type(private_data,
71 struct ctdb_takeover_arp);
73 struct ctdb_tcp_array *tcparray;
75 ret = ctdb_sys_send_arp(&arp->addr, arp->vnn->iface);
77 DEBUG(DEBUG_CRIT,(__location__ " sending of arp failed (%s)\n", strerror(errno)));
80 tcparray = arp->tcparray;
82 for (i=0;i<tcparray->num;i++) {
83 struct ctdb_tcp_connection *tcon;
85 tcon = &tcparray->connections[i];
86 DEBUG(DEBUG_INFO,("sending tcp tickle ack for %u->%s:%u\n",
87 (unsigned)ntohs(tcon->dst_addr.ip.sin_port),
88 ctdb_addr_to_str(&tcon->src_addr),
89 (unsigned)ntohs(tcon->src_addr.ip.sin_port)));
90 ret = ctdb_sys_send_tcp(
95 DEBUG(DEBUG_CRIT,(__location__ " Failed to send tcp tickle ack for %s\n",
96 ctdb_addr_to_str(&tcon->src_addr)));
103 if (arp->count == CTDB_ARP_REPEAT) {
108 event_add_timed(arp->ctdb->ev, arp->vnn->takeover_ctx,
109 timeval_current_ofs(CTDB_ARP_INTERVAL, 100000),
110 ctdb_control_send_arp, arp);
113 struct takeover_callback_state {
114 struct ctdb_req_control *c;
115 ctdb_sock_addr *addr;
116 struct ctdb_vnn *vnn;
120 called when takeip event finishes
122 static void takeover_ip_callback(struct ctdb_context *ctdb, int status,
125 struct takeover_callback_state *state =
126 talloc_get_type(private_data, struct takeover_callback_state);
127 struct ctdb_takeover_arp *arp;
128 struct ctdb_tcp_array *tcparray;
131 if (status == -ETIME) {
134 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
135 ctdb_addr_to_str(state->addr),
137 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
142 if (!state->vnn->takeover_ctx) {
143 state->vnn->takeover_ctx = talloc_new(state->vnn);
144 if (!state->vnn->takeover_ctx) {
149 arp = talloc_zero(state->vnn->takeover_ctx, struct ctdb_takeover_arp);
150 if (!arp) goto failed;
153 arp->addr = *state->addr;
154 arp->vnn = state->vnn;
156 tcparray = state->vnn->tcp_array;
158 /* add all of the known tcp connections for this IP to the
159 list of tcp connections to send tickle acks for */
160 arp->tcparray = talloc_steal(arp, tcparray);
162 state->vnn->tcp_array = NULL;
163 state->vnn->tcp_update_needed = true;
166 event_add_timed(arp->ctdb->ev, state->vnn->takeover_ctx,
167 timeval_zero(), ctdb_control_send_arp, arp);
169 /* the control succeeded */
170 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
175 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
181 Find the vnn of the node that has a public ip address
182 returns -1 if the address is not known as a public address
184 static struct ctdb_vnn *find_public_ip_vnn(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
186 struct ctdb_vnn *vnn;
188 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
189 if (ctdb_same_ip(&vnn->public_address, addr)) {
199 take over an ip address
201 int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb,
202 struct ctdb_req_control *c,
207 struct takeover_callback_state *state;
208 struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
209 struct ctdb_vnn *vnn;
211 /* update out vnn list */
212 vnn = find_public_ip_vnn(ctdb, &pip->addr);
214 DEBUG(DEBUG_INFO,("takeoverip called for an ip '%s' that is not a public address\n",
215 ctdb_addr_to_str(&pip->addr)));
220 /* if our kernel already has this IP, do nothing */
221 if (ctdb_sys_have_ip(&pip->addr)) {
225 state = talloc(vnn, struct takeover_callback_state);
226 CTDB_NO_MEMORY(ctdb, state);
228 state->c = talloc_steal(ctdb, c);
229 state->addr = talloc(ctdb, ctdb_sock_addr);
230 CTDB_NO_MEMORY(ctdb, state->addr);
232 *state->addr = pip->addr;
235 DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u on interface %s\n",
236 ctdb_addr_to_str(&pip->addr),
237 vnn->public_netmask_bits,
240 ret = ctdb_event_script_callback(ctdb,
241 state, takeover_ip_callback, state,
246 talloc_strdup(state, ctdb_addr_to_str(&pip->addr)),
247 vnn->public_netmask_bits);
250 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
251 ctdb_addr_to_str(&pip->addr),
257 /* tell ctdb_control.c that we will be replying asynchronously */
264 takeover an ip address old v4 style
266 int32_t ctdb_control_takeover_ipv4(struct ctdb_context *ctdb,
267 struct ctdb_req_control *c,
273 data.dsize = sizeof(struct ctdb_public_ip);
274 data.dptr = (uint8_t *)talloc_zero(c, struct ctdb_public_ip);
275 CTDB_NO_MEMORY(ctdb, data.dptr);
277 memcpy(data.dptr, indata.dptr, indata.dsize);
278 return ctdb_control_takeover_ip(ctdb, c, data, async_reply);
282 kill any clients that are registered with a IP that is being released
284 static void release_kill_clients(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
286 struct ctdb_client_ip *ip;
288 DEBUG(DEBUG_INFO,("release_kill_clients for ip %s\n",
289 ctdb_addr_to_str(addr)));
291 for (ip=ctdb->client_ip_list; ip; ip=ip->next) {
292 ctdb_sock_addr tmp_addr;
295 DEBUG(DEBUG_INFO,("checking for client %u with IP %s\n",
297 ctdb_addr_to_str(&ip->addr)));
299 if (ctdb_same_ip(&tmp_addr, addr)) {
300 struct ctdb_client *client = ctdb_reqid_find(ctdb,
303 DEBUG(DEBUG_INFO,("matched client %u with IP %s and pid %u\n",
305 ctdb_addr_to_str(&ip->addr),
308 if (client->pid != 0) {
309 DEBUG(DEBUG_INFO,(__location__ " Killing client pid %u for IP %s on client_id %u\n",
310 (unsigned)client->pid,
311 ctdb_addr_to_str(addr),
313 kill(client->pid, SIGKILL);
320 called when releaseip event finishes
322 static void release_ip_callback(struct ctdb_context *ctdb, int status,
325 struct takeover_callback_state *state =
326 talloc_get_type(private_data, struct takeover_callback_state);
329 if (status == -ETIME) {
333 /* send a message to all clients of this node telling them
334 that the cluster has been reconfigured and they should
335 release any sockets on this IP */
336 data.dptr = (uint8_t *)talloc_strdup(state, ctdb_addr_to_str(state->addr));
337 CTDB_NO_MEMORY_VOID(ctdb, data.dptr);
338 data.dsize = strlen((char *)data.dptr)+1;
340 DEBUG(DEBUG_INFO,(__location__ " sending RELEASE_IP for '%s'\n", data.dptr));
342 ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_RELEASE_IP, data);
344 /* kill clients that have registered with this IP */
345 release_kill_clients(ctdb, state->addr);
347 /* the control succeeded */
348 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
353 release an ip address
355 int32_t ctdb_control_release_ip(struct ctdb_context *ctdb,
356 struct ctdb_req_control *c,
361 struct takeover_callback_state *state;
362 struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
363 struct ctdb_vnn *vnn;
365 /* update our vnn list */
366 vnn = find_public_ip_vnn(ctdb, &pip->addr);
368 DEBUG(DEBUG_INFO,("releaseip called for an ip '%s' that is not a public address\n",
369 ctdb_addr_to_str(&pip->addr)));
374 /* stop any previous arps */
375 talloc_free(vnn->takeover_ctx);
376 vnn->takeover_ctx = NULL;
378 if (!ctdb_sys_have_ip(&pip->addr)) {
379 DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u on interface %s (ip not held)\n",
380 ctdb_addr_to_str(&pip->addr),
381 vnn->public_netmask_bits,
386 DEBUG(DEBUG_NOTICE,("Release of IP %s/%u on interface %s node:%u\n",
387 ctdb_addr_to_str(&pip->addr),
388 vnn->public_netmask_bits,
392 state = talloc(ctdb, struct takeover_callback_state);
393 CTDB_NO_MEMORY(ctdb, state);
395 state->c = talloc_steal(state, c);
396 state->addr = talloc(state, ctdb_sock_addr);
397 CTDB_NO_MEMORY(ctdb, state->addr);
398 *state->addr = pip->addr;
401 ret = ctdb_event_script_callback(ctdb,
402 state, release_ip_callback, state,
404 CTDB_EVENT_RELEASE_IP,
407 talloc_strdup(state, ctdb_addr_to_str(&pip->addr)),
408 vnn->public_netmask_bits);
410 DEBUG(DEBUG_ERR,(__location__ " Failed to release IP %s on interface %s\n",
411 ctdb_addr_to_str(&pip->addr),
417 /* tell the control that we will be reply asynchronously */
423 release an ip address old v4 style
425 int32_t ctdb_control_release_ipv4(struct ctdb_context *ctdb,
426 struct ctdb_req_control *c,
432 data.dsize = sizeof(struct ctdb_public_ip);
433 data.dptr = (uint8_t *)talloc_zero(c, struct ctdb_public_ip);
434 CTDB_NO_MEMORY(ctdb, data.dptr);
436 memcpy(data.dptr, indata.dptr, indata.dsize);
437 return ctdb_control_release_ip(ctdb, c, data, async_reply);
441 static int ctdb_add_public_address(struct ctdb_context *ctdb, ctdb_sock_addr *addr, unsigned mask, const char *iface)
443 struct ctdb_vnn *vnn;
445 /* Verify that we dont have an entry for this ip yet */
446 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
447 if (ctdb_same_sockaddr(addr, &vnn->public_address)) {
448 DEBUG(DEBUG_CRIT,("Same ip '%s' specified multiple times in the public address list \n",
449 ctdb_addr_to_str(addr)));
454 /* create a new vnn structure for this ip address */
455 vnn = talloc_zero(ctdb, struct ctdb_vnn);
456 CTDB_NO_MEMORY_FATAL(ctdb, vnn);
457 vnn->iface = talloc_strdup(vnn, iface);
458 CTDB_NO_MEMORY(ctdb, vnn->iface);
459 vnn->public_address = *addr;
460 vnn->public_netmask_bits = mask;
463 DLIST_ADD(ctdb->vnn, vnn);
470 setup the event script directory
472 int ctdb_set_event_script_dir(struct ctdb_context *ctdb, const char *script_dir)
474 ctdb->event_script_dir = talloc_strdup(ctdb, script_dir);
475 CTDB_NO_MEMORY(ctdb, ctdb->event_script_dir);
480 setup the public address lists from a file
482 int ctdb_set_public_addresses(struct ctdb_context *ctdb, const char *alist)
488 lines = file_lines_load(alist, &nlines, ctdb);
490 ctdb_set_error(ctdb, "Failed to load public address list '%s'\n", alist);
493 while (nlines > 0 && strcmp(lines[nlines-1], "") == 0) {
497 for (i=0;i<nlines;i++) {
505 while ((*line == ' ') || (*line == '\t')) {
511 if (strcmp(line, "") == 0) {
514 tok = strtok(line, " \t");
516 tok = strtok(NULL, " \t");
518 if (NULL == ctdb->default_public_interface) {
519 DEBUG(DEBUG_CRIT,("No default public interface and no interface specified at line %u of public address list\n",
524 iface = ctdb->default_public_interface;
529 if (!addrstr || !parse_ip_mask(addrstr, iface, &addr, &mask)) {
530 DEBUG(DEBUG_CRIT,("Badly formed line %u in public address list\n", i+1));
534 if (ctdb_add_public_address(ctdb, &addr, mask, iface)) {
535 DEBUG(DEBUG_CRIT,("Failed to add line %u to the public address list\n", i+1));
548 struct ctdb_public_ip_list {
549 struct ctdb_public_ip_list *next;
555 /* Given a physical node, return the number of
556 public addresses that is currently assigned to this node.
558 static int node_ip_coverage(struct ctdb_context *ctdb,
560 struct ctdb_public_ip_list *ips)
564 for (;ips;ips=ips->next) {
565 if (ips->pnn == pnn) {
573 /* Check if this is a public ip known to the node, i.e. can that
574 node takeover this ip ?
576 static int can_node_serve_ip(struct ctdb_context *ctdb, int32_t pnn,
577 struct ctdb_public_ip_list *ip)
579 struct ctdb_all_public_ips *public_ips;
582 public_ips = ctdb->nodes[pnn]->public_ips;
584 if (public_ips == NULL) {
588 for (i=0;i<public_ips->num;i++) {
589 if (ctdb_same_ip(&ip->addr, &public_ips->ips[i].addr)) {
590 /* yes, this node can serve this public ip */
599 /* search the node lists list for a node to takeover this ip.
600 pick the node that currently are serving the least number of ips
601 so that the ips get spread out evenly.
603 static int find_takeover_node(struct ctdb_context *ctdb,
604 struct ctdb_node_map *nodemap, uint32_t mask,
605 struct ctdb_public_ip_list *ip,
606 struct ctdb_public_ip_list *all_ips)
612 for (i=0;i<nodemap->num;i++) {
613 if (nodemap->nodes[i].flags & mask) {
614 /* This node is not healty and can not be used to serve
620 /* verify that this node can serve this ip */
621 if (can_node_serve_ip(ctdb, i, ip)) {
622 /* no it couldnt so skip to the next node */
626 num = node_ip_coverage(ctdb, i, all_ips);
627 /* was this the first node we checked ? */
639 DEBUG(DEBUG_WARNING,(__location__ " Could not find node to take over public address '%s'\n",
640 ctdb_addr_to_str(&ip->addr)));
650 static uint32_t *ip_key(ctdb_sock_addr *ip)
652 static uint32_t key[IP_KEYLEN];
654 bzero(key, sizeof(key));
656 switch (ip->sa.sa_family) {
658 key[3] = htonl(ip->ip.sin_addr.s_addr);
661 key[0] = htonl(ip->ip6.sin6_addr.s6_addr32[0]);
662 key[1] = htonl(ip->ip6.sin6_addr.s6_addr32[1]);
663 key[2] = htonl(ip->ip6.sin6_addr.s6_addr32[2]);
664 key[3] = htonl(ip->ip6.sin6_addr.s6_addr32[3]);
667 DEBUG(DEBUG_ERR, (__location__ " ERROR, unknown family passed :%u\n", ip->sa.sa_family));
674 static void *add_ip_callback(void *parm, void *data)
679 void getips_count_callback(void *param, void *data)
681 struct ctdb_public_ip_list **ip_list = (struct ctdb_public_ip_list **)param;
682 struct ctdb_public_ip_list *new_ip = (struct ctdb_public_ip_list *)data;
684 new_ip->next = *ip_list;
688 struct ctdb_public_ip_list *
689 create_merged_ip_list(struct ctdb_context *ctdb)
692 struct ctdb_public_ip_list *ip_list;
693 struct ctdb_all_public_ips *public_ips;
695 if (ctdb->ip_tree != NULL) {
696 talloc_free(ctdb->ip_tree);
697 ctdb->ip_tree = NULL;
699 ctdb->ip_tree = trbt_create(ctdb, 0);
701 for (i=0;i<ctdb->num_nodes;i++) {
702 public_ips = ctdb->nodes[i]->public_ips;
704 if (ctdb->nodes[i]->flags & NODE_FLAGS_DELETED) {
708 /* there were no public ips for this node */
709 if (public_ips == NULL) {
713 for (j=0;j<public_ips->num;j++) {
714 struct ctdb_public_ip_list *tmp_ip;
716 tmp_ip = talloc_zero(ctdb->ip_tree, struct ctdb_public_ip_list);
717 CTDB_NO_MEMORY_NULL(ctdb, tmp_ip);
718 tmp_ip->pnn = public_ips->ips[j].pnn;
719 tmp_ip->addr = public_ips->ips[j].addr;
722 trbt_insertarray32_callback(ctdb->ip_tree,
723 IP_KEYLEN, ip_key(&public_ips->ips[j].addr),
730 trbt_traversearray32(ctdb->ip_tree, IP_KEYLEN, getips_count_callback, &ip_list);
736 make any IP alias changes for public addresses that are necessary
738 int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap)
740 int i, num_healthy, retries;
741 struct ctdb_public_ip ip;
742 struct ctdb_public_ipv4 ipv4;
744 struct ctdb_public_ip_list *all_ips, *tmp_ip;
745 int maxnode, maxnum=0, minnode, minnum=0, num;
747 struct timeval timeout;
748 struct client_async_data *async_data;
749 struct ctdb_client_control_state *state;
750 TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
755 /* Count how many completely healthy nodes we have */
757 for (i=0;i<nodemap->num;i++) {
758 if (!(nodemap->nodes[i].flags & (NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED))) {
763 if (num_healthy > 0) {
764 /* We have healthy nodes, so only consider them for
765 serving public addresses
767 mask = NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED;
769 /* We didnt have any completely healthy nodes so
770 use "disabled" nodes as a fallback
772 mask = NODE_FLAGS_INACTIVE;
775 /* since nodes only know about those public addresses that
776 can be served by that particular node, no single node has
777 a full list of all public addresses that exist in the cluster.
778 Walk over all node structures and create a merged list of
779 all public addresses that exist in the cluster.
781 keep the tree of ips around as ctdb->ip_tree
783 all_ips = create_merged_ip_list(ctdb);
785 /* If we want deterministic ip allocations, i.e. that the ip addresses
786 will always be allocated the same way for a specific set of
787 available/unavailable nodes.
789 if (1 == ctdb->tunable.deterministic_public_ips) {
790 DEBUG(DEBUG_NOTICE,("Deterministic IPs enabled. Resetting all ip allocations\n"));
791 for (i=0,tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next,i++) {
792 tmp_ip->pnn = i%nodemap->num;
797 /* mark all public addresses with a masked node as being served by
800 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
801 if (tmp_ip->pnn == -1) {
804 if (nodemap->nodes[tmp_ip->pnn].flags & mask) {
809 /* verify that the assigned nodes can serve that public ip
810 and set it to -1 if not
812 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
813 if (tmp_ip->pnn == -1) {
816 if (can_node_serve_ip(ctdb, tmp_ip->pnn, tmp_ip) != 0) {
817 /* this node can not serve this ip. */
823 /* now we must redistribute all public addresses with takeover node
824 -1 among the nodes available
828 /* loop over all ip's and find a physical node to cover for
831 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
832 if (tmp_ip->pnn == -1) {
833 if (find_takeover_node(ctdb, nodemap, mask, tmp_ip, all_ips)) {
834 DEBUG(DEBUG_WARNING,("Failed to find node to cover ip %s\n",
835 ctdb_addr_to_str(&tmp_ip->addr)));
840 /* If we dont want ips to fail back after a node becomes healthy
841 again, we wont even try to reallocat the ip addresses so that
842 they are evenly spread out.
843 This can NOT be used at the same time as DeterministicIPs !
845 if (1 == ctdb->tunable.no_ip_failback) {
846 if (1 == ctdb->tunable.deterministic_public_ips) {
847 DEBUG(DEBUG_ERR, ("ERROR: You can not use 'DeterministicIPs' and 'NoIPFailback' at the same time\n"));
853 /* now, try to make sure the ip adresses are evenly distributed
855 for each ip address, loop over all nodes that can serve this
856 ip and make sure that the difference between the node
857 serving the most and the node serving the least ip's are not greater
860 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
861 if (tmp_ip->pnn == -1) {
865 /* Get the highest and lowest number of ips's served by any
866 valid node which can serve this ip.
870 for (i=0;i<nodemap->num;i++) {
871 if (nodemap->nodes[i].flags & mask) {
875 /* only check nodes that can actually serve this ip */
876 if (can_node_serve_ip(ctdb, i, tmp_ip)) {
877 /* no it couldnt so skip to the next node */
881 num = node_ip_coverage(ctdb, i, all_ips);
902 DEBUG(DEBUG_WARNING,(__location__ " Could not find maxnode. May not be able to serve ip '%s'\n",
903 ctdb_addr_to_str(&tmp_ip->addr)));
908 /* If we want deterministic IPs then dont try to reallocate
909 them to spread out the load.
911 if (1 == ctdb->tunable.deterministic_public_ips) {
915 /* if the spread between the smallest and largest coverage by
916 a node is >=2 we steal one of the ips from the node with
917 most coverage to even things out a bit.
918 try to do this at most 5 times since we dont want to spend
919 too much time balancing the ip coverage.
921 if ( (maxnum > minnum+1)
923 struct ctdb_public_ip_list *tmp;
925 /* mark one of maxnode's vnn's as unassigned and try
928 for (tmp=all_ips;tmp;tmp=tmp->next) {
929 if (tmp->pnn == maxnode) {
939 /* finished distributing the public addresses, now just send the
940 info out to the nodes
944 /* at this point ->pnn is the node which will own each IP
945 or -1 if there is no node that can cover this ip
948 /* now tell all nodes to delete any alias that they should not
949 have. This will be a NOOP on nodes that don't currently
950 hold the given alias */
951 async_data = talloc_zero(tmp_ctx, struct client_async_data);
952 CTDB_NO_MEMORY_FATAL(ctdb, async_data);
954 for (i=0;i<nodemap->num;i++) {
955 /* don't talk to unconnected nodes, but do talk to banned nodes */
956 if (nodemap->nodes[i].flags & NODE_FLAGS_DISCONNECTED) {
960 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
961 if (tmp_ip->pnn == nodemap->nodes[i].pnn) {
962 /* This node should be serving this
963 vnn so dont tell it to release the ip
967 if (tmp_ip->addr.sa.sa_family == AF_INET) {
968 ipv4.pnn = tmp_ip->pnn;
969 ipv4.sin = tmp_ip->addr.ip;
971 timeout = TAKEOVER_TIMEOUT();
972 data.dsize = sizeof(ipv4);
973 data.dptr = (uint8_t *)&ipv4;
974 state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
975 0, CTDB_CONTROL_RELEASE_IPv4, 0,
979 ip.pnn = tmp_ip->pnn;
980 ip.addr = tmp_ip->addr;
982 timeout = TAKEOVER_TIMEOUT();
983 data.dsize = sizeof(ip);
984 data.dptr = (uint8_t *)&ip;
985 state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
986 0, CTDB_CONTROL_RELEASE_IP, 0,
992 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_RELEASE_IP to node %u\n", nodemap->nodes[i].pnn));
993 talloc_free(tmp_ctx);
997 ctdb_client_async_add(async_data, state);
1000 if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1001 DEBUG(DEBUG_ERR,(__location__ " Async control CTDB_CONTROL_RELEASE_IP failed\n"));
1002 talloc_free(tmp_ctx);
1005 talloc_free(async_data);
1008 /* tell all nodes to get their own IPs */
1009 async_data = talloc_zero(tmp_ctx, struct client_async_data);
1010 CTDB_NO_MEMORY_FATAL(ctdb, async_data);
1011 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1012 if (tmp_ip->pnn == -1) {
1013 /* this IP won't be taken over */
1017 if (tmp_ip->addr.sa.sa_family == AF_INET) {
1018 ipv4.pnn = tmp_ip->pnn;
1019 ipv4.sin = tmp_ip->addr.ip;
1021 timeout = TAKEOVER_TIMEOUT();
1022 data.dsize = sizeof(ipv4);
1023 data.dptr = (uint8_t *)&ipv4;
1024 state = ctdb_control_send(ctdb, tmp_ip->pnn,
1025 0, CTDB_CONTROL_TAKEOVER_IPv4, 0,
1029 ip.pnn = tmp_ip->pnn;
1030 ip.addr = tmp_ip->addr;
1032 timeout = TAKEOVER_TIMEOUT();
1033 data.dsize = sizeof(ip);
1034 data.dptr = (uint8_t *)&ip;
1035 state = ctdb_control_send(ctdb, tmp_ip->pnn,
1036 0, CTDB_CONTROL_TAKEOVER_IP, 0,
1040 if (state == NULL) {
1041 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_TAKEOVER_IP to node %u\n", tmp_ip->pnn));
1042 talloc_free(tmp_ctx);
1046 ctdb_client_async_add(async_data, state);
1048 if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1049 DEBUG(DEBUG_ERR,(__location__ " Async control CTDB_CONTROL_TAKEOVER_IP failed\n"));
1050 talloc_free(tmp_ctx);
1054 talloc_free(tmp_ctx);
1060 destroy a ctdb_client_ip structure
1062 static int ctdb_client_ip_destructor(struct ctdb_client_ip *ip)
1064 DEBUG(DEBUG_DEBUG,("destroying client tcp for %s:%u (client_id %u)\n",
1065 ctdb_addr_to_str(&ip->addr),
1066 ntohs(ip->addr.ip.sin_port),
1069 DLIST_REMOVE(ip->ctdb->client_ip_list, ip);
1074 called by a client to inform us of a TCP connection that it is managing
1075 that should tickled with an ACK when IP takeover is done
1076 we handle both the old ipv4 style of packets as well as the new ipv4/6
1079 int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id,
1082 struct ctdb_client *client = ctdb_reqid_find(ctdb, client_id, struct ctdb_client);
1083 struct ctdb_control_tcp *old_addr = NULL;
1084 struct ctdb_control_tcp_addr new_addr;
1085 struct ctdb_control_tcp_addr *tcp_sock = NULL;
1086 struct ctdb_tcp_list *tcp;
1087 struct ctdb_control_tcp_vnn t;
1090 struct ctdb_client_ip *ip;
1091 struct ctdb_vnn *vnn;
1092 ctdb_sock_addr addr;
1094 switch (indata.dsize) {
1095 case sizeof(struct ctdb_control_tcp):
1096 old_addr = (struct ctdb_control_tcp *)indata.dptr;
1097 ZERO_STRUCT(new_addr);
1098 tcp_sock = &new_addr;
1099 tcp_sock->src.ip = old_addr->src;
1100 tcp_sock->dest.ip = old_addr->dest;
1102 case sizeof(struct ctdb_control_tcp_addr):
1103 tcp_sock = (struct ctdb_control_tcp_addr *)indata.dptr;
1106 DEBUG(DEBUG_ERR,(__location__ " Invalid data structure passed "
1107 "to ctdb_control_tcp_client. size was %d but "
1108 "only allowed sizes are %lu and %lu\n",
1110 (long unsigned)sizeof(struct ctdb_control_tcp),
1111 (long unsigned)sizeof(struct ctdb_control_tcp_addr)));
1115 addr = tcp_sock->src;
1116 ctdb_canonicalize_ip(&addr, &tcp_sock->src);
1117 addr = tcp_sock->dest;
1118 ctdb_canonicalize_ip(&addr, &tcp_sock->dest);
1121 memcpy(&addr, &tcp_sock->dest, sizeof(addr));
1122 vnn = find_public_ip_vnn(ctdb, &addr);
1124 switch (addr.sa.sa_family) {
1126 if (ntohl(addr.ip.sin_addr.s_addr) != INADDR_LOOPBACK) {
1127 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public address.\n",
1128 ctdb_addr_to_str(&addr)));
1132 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public ipv6 address.\n",
1133 ctdb_addr_to_str(&addr)));
1136 DEBUG(DEBUG_ERR,(__location__ " Unknown family type %d\n", addr.sa.sa_family));
1142 if (vnn->pnn != ctdb->pnn) {
1143 DEBUG(DEBUG_ERR,("Attempt to register tcp client for IP %s we don't hold - failing (client_id %u pid %u)\n",
1144 ctdb_addr_to_str(&addr),
1145 client_id, client->pid));
1146 /* failing this call will tell smbd to die */
1150 ip = talloc(client, struct ctdb_client_ip);
1151 CTDB_NO_MEMORY(ctdb, ip);
1155 ip->client_id = client_id;
1156 talloc_set_destructor(ip, ctdb_client_ip_destructor);
1157 DLIST_ADD(ctdb->client_ip_list, ip);
1159 tcp = talloc(client, struct ctdb_tcp_list);
1160 CTDB_NO_MEMORY(ctdb, tcp);
1162 tcp->connection.src_addr = tcp_sock->src;
1163 tcp->connection.dst_addr = tcp_sock->dest;
1165 DLIST_ADD(client->tcp_list, tcp);
1167 t.src = tcp_sock->src;
1168 t.dest = tcp_sock->dest;
1170 data.dptr = (uint8_t *)&t;
1171 data.dsize = sizeof(t);
1173 switch (addr.sa.sa_family) {
1175 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1176 (unsigned)ntohs(tcp_sock->dest.ip.sin_port),
1177 ctdb_addr_to_str(&tcp_sock->src),
1178 (unsigned)ntohs(tcp_sock->src.ip.sin_port), client_id, client->pid));
1181 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1182 (unsigned)ntohs(tcp_sock->dest.ip6.sin6_port),
1183 ctdb_addr_to_str(&tcp_sock->src),
1184 (unsigned)ntohs(tcp_sock->src.ip6.sin6_port), client_id, client->pid));
1187 DEBUG(DEBUG_ERR,(__location__ " Unknown family %d\n", addr.sa.sa_family));
1191 /* tell all nodes about this tcp connection */
1192 ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0,
1193 CTDB_CONTROL_TCP_ADD,
1194 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
1196 DEBUG(DEBUG_ERR,(__location__ " Failed to send CTDB_CONTROL_TCP_ADD\n"));
1204 find a tcp address on a list
1206 static struct ctdb_tcp_connection *ctdb_tcp_find(struct ctdb_tcp_array *array,
1207 struct ctdb_tcp_connection *tcp)
1211 if (array == NULL) {
1215 for (i=0;i<array->num;i++) {
1216 if (ctdb_same_sockaddr(&array->connections[i].src_addr, &tcp->src_addr) &&
1217 ctdb_same_sockaddr(&array->connections[i].dst_addr, &tcp->dst_addr)) {
1218 return &array->connections[i];
1225 called by a daemon to inform us of a TCP connection that one of its
1226 clients managing that should tickled with an ACK when IP takeover is
1229 int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata)
1231 struct ctdb_control_tcp_vnn *p = (struct ctdb_control_tcp_vnn *)indata.dptr;
1232 struct ctdb_tcp_array *tcparray;
1233 struct ctdb_tcp_connection tcp;
1234 struct ctdb_vnn *vnn;
1236 vnn = find_public_ip_vnn(ctdb, &p->dest);
1238 DEBUG(DEBUG_INFO,(__location__ " got TCP_ADD control for an address which is not a public address '%s'\n",
1239 ctdb_addr_to_str(&p->dest)));
1245 tcparray = vnn->tcp_array;
1247 /* If this is the first tickle */
1248 if (tcparray == NULL) {
1249 tcparray = talloc_size(ctdb->nodes,
1250 offsetof(struct ctdb_tcp_array, connections) +
1251 sizeof(struct ctdb_tcp_connection) * 1);
1252 CTDB_NO_MEMORY(ctdb, tcparray);
1253 vnn->tcp_array = tcparray;
1256 tcparray->connections = talloc_size(tcparray, sizeof(struct ctdb_tcp_connection));
1257 CTDB_NO_MEMORY(ctdb, tcparray->connections);
1259 tcparray->connections[tcparray->num].src_addr = p->src;
1260 tcparray->connections[tcparray->num].dst_addr = p->dest;
1266 /* Do we already have this tickle ?*/
1267 tcp.src_addr = p->src;
1268 tcp.dst_addr = p->dest;
1269 if (ctdb_tcp_find(vnn->tcp_array, &tcp) != NULL) {
1270 DEBUG(DEBUG_DEBUG,("Already had tickle info for %s:%u for vnn:%u\n",
1271 ctdb_addr_to_str(&tcp.dst_addr),
1272 ntohs(tcp.dst_addr.ip.sin_port),
1277 /* A new tickle, we must add it to the array */
1278 tcparray->connections = talloc_realloc(tcparray, tcparray->connections,
1279 struct ctdb_tcp_connection,
1281 CTDB_NO_MEMORY(ctdb, tcparray->connections);
1283 vnn->tcp_array = tcparray;
1284 tcparray->connections[tcparray->num].src_addr = p->src;
1285 tcparray->connections[tcparray->num].dst_addr = p->dest;
1288 DEBUG(DEBUG_INFO,("Added tickle info for %s:%u from vnn %u\n",
1289 ctdb_addr_to_str(&tcp.dst_addr),
1290 ntohs(tcp.dst_addr.ip.sin_port),
1298 called by a daemon to inform us of a TCP connection that one of its
1299 clients managing that should tickled with an ACK when IP takeover is
1302 static void ctdb_remove_tcp_connection(struct ctdb_context *ctdb, struct ctdb_tcp_connection *conn)
1304 struct ctdb_tcp_connection *tcpp;
1305 struct ctdb_vnn *vnn = find_public_ip_vnn(ctdb, &conn->dst_addr);
1308 DEBUG(DEBUG_ERR,(__location__ " unable to find public address %s\n",
1309 ctdb_addr_to_str(&conn->dst_addr)));
1313 /* if the array is empty we cant remove it
1314 and we dont need to do anything
1316 if (vnn->tcp_array == NULL) {
1317 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist (array is empty) %s:%u\n",
1318 ctdb_addr_to_str(&conn->dst_addr),
1319 ntohs(conn->dst_addr.ip.sin_port)));
1324 /* See if we know this connection
1325 if we dont know this connection then we dont need to do anything
1327 tcpp = ctdb_tcp_find(vnn->tcp_array, conn);
1329 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist %s:%u\n",
1330 ctdb_addr_to_str(&conn->dst_addr),
1331 ntohs(conn->dst_addr.ip.sin_port)));
1336 /* We need to remove this entry from the array.
1337 Instead of allocating a new array and copying data to it
1338 we cheat and just copy the last entry in the existing array
1339 to the entry that is to be removed and just shring the
1342 *tcpp = vnn->tcp_array->connections[vnn->tcp_array->num - 1];
1343 vnn->tcp_array->num--;
1345 /* If we deleted the last entry we also need to remove the entire array
1347 if (vnn->tcp_array->num == 0) {
1348 talloc_free(vnn->tcp_array);
1349 vnn->tcp_array = NULL;
1352 vnn->tcp_update_needed = true;
1354 DEBUG(DEBUG_INFO,("Removed tickle info for %s:%u\n",
1355 ctdb_addr_to_str(&conn->src_addr),
1356 ntohs(conn->src_addr.ip.sin_port)));
1361 called when a daemon restarts - send all tickes for all public addresses
1362 we are serving immediately to the new node.
1364 int32_t ctdb_control_startup(struct ctdb_context *ctdb, uint32_t vnn)
1366 /*XXX here we should send all tickes we are serving to the new node */
1372 called when a client structure goes away - hook to remove
1373 elements from the tcp_list in all daemons
1375 void ctdb_takeover_client_destructor_hook(struct ctdb_client *client)
1377 while (client->tcp_list) {
1378 struct ctdb_tcp_list *tcp = client->tcp_list;
1379 DLIST_REMOVE(client->tcp_list, tcp);
1380 ctdb_remove_tcp_connection(client->ctdb, &tcp->connection);
1386 release all IPs on shutdown
1388 void ctdb_release_all_ips(struct ctdb_context *ctdb)
1390 struct ctdb_vnn *vnn;
1392 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1393 if (!ctdb_sys_have_ip(&vnn->public_address)) {
1396 if (vnn->pnn == ctdb->pnn) {
1399 ctdb_event_script_args(ctdb, CTDB_EVENT_RELEASE_IP, "%s %s %u",
1401 talloc_strdup(ctdb, ctdb_addr_to_str(&vnn->public_address)),
1402 vnn->public_netmask_bits);
1403 release_kill_clients(ctdb, &vnn->public_address);
1409 get list of public IPs
1411 int32_t ctdb_control_get_public_ips(struct ctdb_context *ctdb,
1412 struct ctdb_req_control *c, TDB_DATA *outdata)
1415 struct ctdb_all_public_ips *ips;
1416 struct ctdb_vnn *vnn;
1418 /* count how many public ip structures we have */
1420 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1424 len = offsetof(struct ctdb_all_public_ips, ips) +
1425 num*sizeof(struct ctdb_public_ip);
1426 ips = talloc_zero_size(outdata, len);
1427 CTDB_NO_MEMORY(ctdb, ips);
1429 outdata->dsize = len;
1430 outdata->dptr = (uint8_t *)ips;
1434 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1435 ips->ips[i].pnn = vnn->pnn;
1436 ips->ips[i].addr = vnn->public_address;
1445 get list of public IPs, old ipv4 style. only returns ipv4 addresses
1447 int32_t ctdb_control_get_public_ipsv4(struct ctdb_context *ctdb,
1448 struct ctdb_req_control *c, TDB_DATA *outdata)
1451 struct ctdb_all_public_ipsv4 *ips;
1452 struct ctdb_vnn *vnn;
1454 /* count how many public ip structures we have */
1456 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1457 if (vnn->public_address.sa.sa_family != AF_INET) {
1463 len = offsetof(struct ctdb_all_public_ipsv4, ips) +
1464 num*sizeof(struct ctdb_public_ipv4);
1465 ips = talloc_zero_size(outdata, len);
1466 CTDB_NO_MEMORY(ctdb, ips);
1468 outdata->dsize = len;
1469 outdata->dptr = (uint8_t *)ips;
1473 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1474 if (vnn->public_address.sa.sa_family != AF_INET) {
1477 ips->ips[i].pnn = vnn->pnn;
1478 ips->ips[i].sin = vnn->public_address.ip;
1487 structure containing the listening socket and the list of tcp connections
1488 that the ctdb daemon is to kill
1490 struct ctdb_kill_tcp {
1491 struct ctdb_vnn *vnn;
1492 struct ctdb_context *ctdb;
1494 struct fd_event *fde;
1495 trbt_tree_t *connections;
1500 a tcp connection that is to be killed
1502 struct ctdb_killtcp_con {
1503 ctdb_sock_addr src_addr;
1504 ctdb_sock_addr dst_addr;
1506 struct ctdb_kill_tcp *killtcp;
1509 /* this function is used to create a key to represent this socketpair
1510 in the killtcp tree.
1511 this key is used to insert and lookup matching socketpairs that are
1512 to be tickled and RST
1514 #define KILLTCP_KEYLEN 10
1515 static uint32_t *killtcp_key(ctdb_sock_addr *src, ctdb_sock_addr *dst)
1517 static uint32_t key[KILLTCP_KEYLEN];
1519 bzero(key, sizeof(key));
1521 if (src->sa.sa_family != dst->sa.sa_family) {
1522 DEBUG(DEBUG_ERR, (__location__ " ERROR, different families passed :%u vs %u\n", src->sa.sa_family, dst->sa.sa_family));
1526 switch (src->sa.sa_family) {
1528 key[0] = dst->ip.sin_addr.s_addr;
1529 key[1] = src->ip.sin_addr.s_addr;
1530 key[2] = dst->ip.sin_port;
1531 key[3] = src->ip.sin_port;
1534 key[0] = dst->ip6.sin6_addr.s6_addr32[3];
1535 key[1] = src->ip6.sin6_addr.s6_addr32[3];
1536 key[2] = dst->ip6.sin6_addr.s6_addr32[2];
1537 key[3] = src->ip6.sin6_addr.s6_addr32[2];
1538 key[4] = dst->ip6.sin6_addr.s6_addr32[1];
1539 key[5] = src->ip6.sin6_addr.s6_addr32[1];
1540 key[6] = dst->ip6.sin6_addr.s6_addr32[0];
1541 key[7] = src->ip6.sin6_addr.s6_addr32[0];
1542 key[8] = dst->ip6.sin6_port;
1543 key[9] = src->ip6.sin6_port;
1546 DEBUG(DEBUG_ERR, (__location__ " ERROR, unknown family passed :%u\n", src->sa.sa_family));
1554 called when we get a read event on the raw socket
1556 static void capture_tcp_handler(struct event_context *ev, struct fd_event *fde,
1557 uint16_t flags, void *private_data)
1559 struct ctdb_kill_tcp *killtcp = talloc_get_type(private_data, struct ctdb_kill_tcp);
1560 struct ctdb_killtcp_con *con;
1561 ctdb_sock_addr src, dst;
1562 uint32_t ack_seq, seq;
1564 if (!(flags & EVENT_FD_READ)) {
1568 if (ctdb_sys_read_tcp_packet(killtcp->capture_fd,
1569 killtcp->private_data,
1571 &ack_seq, &seq) != 0) {
1572 /* probably a non-tcp ACK packet */
1576 /* check if we have this guy in our list of connections
1579 con = trbt_lookuparray32(killtcp->connections,
1580 KILLTCP_KEYLEN, killtcp_key(&src, &dst));
1582 /* no this was some other packet we can just ignore */
1586 /* This one has been tickled !
1587 now reset him and remove him from the list.
1589 DEBUG(DEBUG_INFO, ("sending a tcp reset to kill connection :%d -> %s:%d\n",
1590 ntohs(con->dst_addr.ip.sin_port),
1591 ctdb_addr_to_str(&con->src_addr),
1592 ntohs(con->src_addr.ip.sin_port)));
1594 ctdb_sys_send_tcp(&con->dst_addr, &con->src_addr, ack_seq, seq, 1);
1599 /* when traversing the list of all tcp connections to send tickle acks to
1600 (so that we can capture the ack coming back and kill the connection
1602 this callback is called for each connection we are currently trying to kill
1604 static void tickle_connection_traverse(void *param, void *data)
1606 struct ctdb_killtcp_con *con = talloc_get_type(data, struct ctdb_killtcp_con);
1608 /* have tried too many times, just give up */
1609 if (con->count >= 5) {
1614 /* othervise, try tickling it again */
1617 (ctdb_sock_addr *)&con->dst_addr,
1618 (ctdb_sock_addr *)&con->src_addr,
1624 called every second until all sentenced connections have been reset
1626 static void ctdb_tickle_sentenced_connections(struct event_context *ev, struct timed_event *te,
1627 struct timeval t, void *private_data)
1629 struct ctdb_kill_tcp *killtcp = talloc_get_type(private_data, struct ctdb_kill_tcp);
1632 /* loop over all connections sending tickle ACKs */
1633 trbt_traversearray32(killtcp->connections, KILLTCP_KEYLEN, tickle_connection_traverse, NULL);
1636 /* If there are no more connections to kill we can remove the
1637 entire killtcp structure
1639 if ( (killtcp->connections == NULL) ||
1640 (killtcp->connections->root == NULL) ) {
1641 talloc_free(killtcp);
1645 /* try tickling them again in a seconds time
1647 event_add_timed(killtcp->ctdb->ev, killtcp, timeval_current_ofs(1, 0),
1648 ctdb_tickle_sentenced_connections, killtcp);
1652 destroy the killtcp structure
1654 static int ctdb_killtcp_destructor(struct ctdb_kill_tcp *killtcp)
1656 killtcp->vnn->killtcp = NULL;
1661 /* nothing fancy here, just unconditionally replace any existing
1662 connection structure with the new one.
1664 dont even free the old one if it did exist, that one is talloc_stolen
1665 by the same node in the tree anyway and will be deleted when the new data
1668 static void *add_killtcp_callback(void *parm, void *data)
1674 add a tcp socket to the list of connections we want to RST
1676 static int ctdb_killtcp_add_connection(struct ctdb_context *ctdb,
1680 ctdb_sock_addr src, dst;
1681 struct ctdb_kill_tcp *killtcp;
1682 struct ctdb_killtcp_con *con;
1683 struct ctdb_vnn *vnn;
1685 ctdb_canonicalize_ip(s, &src);
1686 ctdb_canonicalize_ip(d, &dst);
1688 vnn = find_public_ip_vnn(ctdb, &dst);
1690 vnn = find_public_ip_vnn(ctdb, &src);
1693 /* if it is not a public ip it could be our 'single ip' */
1694 if (ctdb->single_ip_vnn) {
1695 if (ctdb_same_ip(&ctdb->single_ip_vnn->public_address, &dst)) {
1696 vnn = ctdb->single_ip_vnn;
1701 DEBUG(DEBUG_ERR,(__location__ " Could not killtcp, not a public address\n"));
1705 killtcp = vnn->killtcp;
1707 /* If this is the first connection to kill we must allocate
1710 if (killtcp == NULL) {
1711 killtcp = talloc_zero(ctdb, struct ctdb_kill_tcp);
1712 CTDB_NO_MEMORY(ctdb, killtcp);
1715 killtcp->ctdb = ctdb;
1716 killtcp->capture_fd = -1;
1717 killtcp->connections = trbt_create(killtcp, 0);
1719 vnn->killtcp = killtcp;
1720 talloc_set_destructor(killtcp, ctdb_killtcp_destructor);
1725 /* create a structure that describes this connection we want to
1726 RST and store it in killtcp->connections
1728 con = talloc(killtcp, struct ctdb_killtcp_con);
1729 CTDB_NO_MEMORY(ctdb, con);
1730 con->src_addr = src;
1731 con->dst_addr = dst;
1733 con->killtcp = killtcp;
1736 trbt_insertarray32_callback(killtcp->connections,
1737 KILLTCP_KEYLEN, killtcp_key(&con->dst_addr, &con->src_addr),
1738 add_killtcp_callback, con);
1741 If we dont have a socket to listen on yet we must create it
1743 if (killtcp->capture_fd == -1) {
1744 killtcp->capture_fd = ctdb_sys_open_capture_socket(vnn->iface, &killtcp->private_data);
1745 if (killtcp->capture_fd == -1) {
1746 DEBUG(DEBUG_CRIT,(__location__ " Failed to open capturing socket for killtcp\n"));
1752 if (killtcp->fde == NULL) {
1753 killtcp->fde = event_add_fd(ctdb->ev, killtcp, killtcp->capture_fd,
1754 EVENT_FD_READ | EVENT_FD_AUTOCLOSE,
1755 capture_tcp_handler, killtcp);
1757 /* We also need to set up some events to tickle all these connections
1758 until they are all reset
1760 event_add_timed(ctdb->ev, killtcp, timeval_current_ofs(1, 0),
1761 ctdb_tickle_sentenced_connections, killtcp);
1764 /* tickle him once now */
1773 talloc_free(vnn->killtcp);
1774 vnn->killtcp = NULL;
1779 kill a TCP connection.
1781 int32_t ctdb_control_kill_tcp(struct ctdb_context *ctdb, TDB_DATA indata)
1783 struct ctdb_control_killtcp *killtcp = (struct ctdb_control_killtcp *)indata.dptr;
1785 return ctdb_killtcp_add_connection(ctdb, &killtcp->src_addr, &killtcp->dst_addr);
1789 called by a daemon to inform us of the entire list of TCP tickles for
1790 a particular public address.
1791 this control should only be sent by the node that is currently serving
1792 that public address.
1794 int32_t ctdb_control_set_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata)
1796 struct ctdb_control_tcp_tickle_list *list = (struct ctdb_control_tcp_tickle_list *)indata.dptr;
1797 struct ctdb_tcp_array *tcparray;
1798 struct ctdb_vnn *vnn;
1800 /* We must at least have tickles.num or else we cant verify the size
1801 of the received data blob
1803 if (indata.dsize < offsetof(struct ctdb_control_tcp_tickle_list,
1804 tickles.connections)) {
1805 DEBUG(DEBUG_ERR,("Bad indata in ctdb_control_set_tcp_tickle_list. Not enough data for the tickle.num field\n"));
1809 /* verify that the size of data matches what we expect */
1810 if (indata.dsize < offsetof(struct ctdb_control_tcp_tickle_list,
1811 tickles.connections)
1812 + sizeof(struct ctdb_tcp_connection)
1813 * list->tickles.num) {
1814 DEBUG(DEBUG_ERR,("Bad indata in ctdb_control_set_tcp_tickle_list\n"));
1818 vnn = find_public_ip_vnn(ctdb, &list->addr);
1820 DEBUG(DEBUG_INFO,(__location__ " Could not set tcp tickle list, '%s' is not a public address\n",
1821 ctdb_addr_to_str(&list->addr)));
1826 /* remove any old ticklelist we might have */
1827 talloc_free(vnn->tcp_array);
1828 vnn->tcp_array = NULL;
1830 tcparray = talloc(ctdb->nodes, struct ctdb_tcp_array);
1831 CTDB_NO_MEMORY(ctdb, tcparray);
1833 tcparray->num = list->tickles.num;
1835 tcparray->connections = talloc_array(tcparray, struct ctdb_tcp_connection, tcparray->num);
1836 CTDB_NO_MEMORY(ctdb, tcparray->connections);
1838 memcpy(tcparray->connections, &list->tickles.connections[0],
1839 sizeof(struct ctdb_tcp_connection)*tcparray->num);
1841 /* We now have a new fresh tickle list array for this vnn */
1842 vnn->tcp_array = talloc_steal(vnn, tcparray);
1848 called to return the full list of tickles for the puclic address associated
1849 with the provided vnn
1851 int32_t ctdb_control_get_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata)
1853 ctdb_sock_addr *addr = (ctdb_sock_addr *)indata.dptr;
1854 struct ctdb_control_tcp_tickle_list *list;
1855 struct ctdb_tcp_array *tcparray;
1857 struct ctdb_vnn *vnn;
1859 vnn = find_public_ip_vnn(ctdb, addr);
1861 DEBUG(DEBUG_ERR,(__location__ " Could not get tcp tickle list, '%s' is not a public address\n",
1862 ctdb_addr_to_str(addr)));
1867 tcparray = vnn->tcp_array;
1869 num = tcparray->num;
1874 outdata->dsize = offsetof(struct ctdb_control_tcp_tickle_list,
1875 tickles.connections)
1876 + sizeof(struct ctdb_tcp_connection) * num;
1878 outdata->dptr = talloc_size(outdata, outdata->dsize);
1879 CTDB_NO_MEMORY(ctdb, outdata->dptr);
1880 list = (struct ctdb_control_tcp_tickle_list *)outdata->dptr;
1883 list->tickles.num = num;
1885 memcpy(&list->tickles.connections[0], tcparray->connections,
1886 sizeof(struct ctdb_tcp_connection) * num);
1894 set the list of all tcp tickles for a public address
1896 static int ctdb_ctrl_set_tcp_tickles(struct ctdb_context *ctdb,
1897 struct timeval timeout, uint32_t destnode,
1898 ctdb_sock_addr *addr,
1899 struct ctdb_tcp_array *tcparray)
1903 struct ctdb_control_tcp_tickle_list *list;
1906 num = tcparray->num;
1911 data.dsize = offsetof(struct ctdb_control_tcp_tickle_list,
1912 tickles.connections) +
1913 sizeof(struct ctdb_tcp_connection) * num;
1914 data.dptr = talloc_size(ctdb, data.dsize);
1915 CTDB_NO_MEMORY(ctdb, data.dptr);
1917 list = (struct ctdb_control_tcp_tickle_list *)data.dptr;
1919 list->tickles.num = num;
1921 memcpy(&list->tickles.connections[0], tcparray->connections, sizeof(struct ctdb_tcp_connection) * num);
1924 ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0,
1925 CTDB_CONTROL_SET_TCP_TICKLE_LIST,
1926 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
1928 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set tcp tickles failed\n"));
1932 talloc_free(data.dptr);
1939 perform tickle updates if required
1941 static void ctdb_update_tcp_tickles(struct event_context *ev,
1942 struct timed_event *te,
1943 struct timeval t, void *private_data)
1945 struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
1947 struct ctdb_vnn *vnn;
1949 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1950 /* we only send out updates for public addresses that
1953 if (ctdb->pnn != vnn->pnn) {
1956 /* We only send out the updates if we need to */
1957 if (!vnn->tcp_update_needed) {
1960 ret = ctdb_ctrl_set_tcp_tickles(ctdb,
1962 CTDB_BROADCAST_CONNECTED,
1963 &vnn->public_address,
1966 DEBUG(DEBUG_ERR,("Failed to send the tickle update for public address %s\n",
1967 ctdb_addr_to_str(&vnn->public_address)));
1971 event_add_timed(ctdb->ev, ctdb->tickle_update_context,
1972 timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
1973 ctdb_update_tcp_tickles, ctdb);
1978 start periodic update of tcp tickles
1980 void ctdb_start_tcp_tickle_update(struct ctdb_context *ctdb)
1982 ctdb->tickle_update_context = talloc_new(ctdb);
1984 event_add_timed(ctdb->ev, ctdb->tickle_update_context,
1985 timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
1986 ctdb_update_tcp_tickles, ctdb);
1992 struct control_gratious_arp {
1993 struct ctdb_context *ctdb;
1994 ctdb_sock_addr addr;
2000 send a control_gratuitous arp
2002 static void send_gratious_arp(struct event_context *ev, struct timed_event *te,
2003 struct timeval t, void *private_data)
2006 struct control_gratious_arp *arp = talloc_get_type(private_data,
2007 struct control_gratious_arp);
2009 ret = ctdb_sys_send_arp(&arp->addr, arp->iface);
2011 DEBUG(DEBUG_ERR,(__location__ " sending of gratious arp failed (%s)\n", strerror(errno)));
2016 if (arp->count == CTDB_ARP_REPEAT) {
2021 event_add_timed(arp->ctdb->ev, arp,
2022 timeval_current_ofs(CTDB_ARP_INTERVAL, 0),
2023 send_gratious_arp, arp);
2030 int32_t ctdb_control_send_gratious_arp(struct ctdb_context *ctdb, TDB_DATA indata)
2032 struct ctdb_control_gratious_arp *gratious_arp = (struct ctdb_control_gratious_arp *)indata.dptr;
2033 struct control_gratious_arp *arp;
2035 /* verify the size of indata */
2036 if (indata.dsize < offsetof(struct ctdb_control_gratious_arp, iface)) {
2037 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_gratious_arp structure. Got %u require %u bytes\n",
2038 (unsigned)indata.dsize,
2039 (unsigned)offsetof(struct ctdb_control_gratious_arp, iface)));
2043 ( offsetof(struct ctdb_control_gratious_arp, iface)
2044 + gratious_arp->len ) ){
2046 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2047 "but should be %u bytes\n",
2048 (unsigned)indata.dsize,
2049 (unsigned)(offsetof(struct ctdb_control_gratious_arp, iface)+gratious_arp->len)));
2054 arp = talloc(ctdb, struct control_gratious_arp);
2055 CTDB_NO_MEMORY(ctdb, arp);
2058 arp->addr = gratious_arp->addr;
2059 arp->iface = talloc_strdup(arp, gratious_arp->iface);
2060 CTDB_NO_MEMORY(ctdb, arp->iface);
2063 event_add_timed(arp->ctdb->ev, arp,
2064 timeval_zero(), send_gratious_arp, arp);
2069 int32_t ctdb_control_add_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2071 struct ctdb_control_ip_iface *pub = (struct ctdb_control_ip_iface *)indata.dptr;
2074 /* verify the size of indata */
2075 if (indata.dsize < offsetof(struct ctdb_control_ip_iface, iface)) {
2076 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_ip_iface structure\n"));
2080 ( offsetof(struct ctdb_control_ip_iface, iface)
2083 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2084 "but should be %u bytes\n",
2085 (unsigned)indata.dsize,
2086 (unsigned)(offsetof(struct ctdb_control_ip_iface, iface)+pub->len)));
2090 ret = ctdb_add_public_address(ctdb, &pub->addr, pub->mask, &pub->iface[0]);
2093 DEBUG(DEBUG_ERR,(__location__ " Failed to add public address\n"));
2101 called when releaseip event finishes for del_public_address
2103 static void delete_ip_callback(struct ctdb_context *ctdb, int status,
2106 talloc_free(private_data);
2109 int32_t ctdb_control_del_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2111 struct ctdb_control_ip_iface *pub = (struct ctdb_control_ip_iface *)indata.dptr;
2112 struct ctdb_vnn *vnn;
2115 /* verify the size of indata */
2116 if (indata.dsize < offsetof(struct ctdb_control_ip_iface, iface)) {
2117 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_ip_iface structure\n"));
2121 ( offsetof(struct ctdb_control_ip_iface, iface)
2124 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2125 "but should be %u bytes\n",
2126 (unsigned)indata.dsize,
2127 (unsigned)(offsetof(struct ctdb_control_ip_iface, iface)+pub->len)));
2131 /* walk over all public addresses until we find a match */
2132 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2133 if (ctdb_same_ip(&vnn->public_address, &pub->addr)) {
2134 TALLOC_CTX *mem_ctx = talloc_new(ctdb);
2136 DLIST_REMOVE(ctdb->vnn, vnn);
2138 ret = ctdb_event_script_callback(ctdb,
2139 mem_ctx, delete_ip_callback, mem_ctx,
2141 CTDB_EVENT_RELEASE_IP,
2144 talloc_strdup(mem_ctx, ctdb_addr_to_str(&vnn->public_address)),
2145 vnn->public_netmask_bits);
2157 /* This function is called from the recovery daemon to verify that a remote
2158 node has the expected ip allocation.
2159 This is verified against ctdb->ip_tree
2161 int verify_remote_ip_allocation(struct ctdb_context *ctdb, struct ctdb_all_public_ips *ips)
2163 struct ctdb_public_ip_list *tmp_ip;
2166 if (ctdb->ip_tree == NULL) {
2167 /* dont know the expected allocation yet, assume remote node
2176 for (i=0; i<ips->num; i++) {
2177 tmp_ip = trbt_lookuparray32(ctdb->ip_tree, IP_KEYLEN, ip_key(&ips->ips[i].addr));
2178 if (tmp_ip == NULL) {
2179 DEBUG(DEBUG_ERR,(__location__ " Could not find host for address %s, reassign ips\n", ctdb_addr_to_str(&ips->ips[i].addr)));
2183 if (tmp_ip->pnn == -1 || ips->ips[i].pnn == -1) {
2187 if (tmp_ip->pnn != ips->ips[i].pnn) {
2188 DEBUG(DEBUG_ERR,("Inconsistent ip allocation. Trigger reallocation. Thinks %s is held by node %u while it is held by node %u\n", ctdb_addr_to_str(&ips->ips[i].addr), ips->ips[i].pnn, tmp_ip->pnn));