4 Copyright (C) Ronnie Sahlberg 2007
5 Copyright (C) Andrew Tridgell 2007
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, see <http://www.gnu.org/licenses/>.
21 #include "lib/events/events.h"
22 #include "lib/tdb/include/tdb.h"
23 #include "lib/util/dlinklist.h"
24 #include "system/network.h"
25 #include "system/filesys.h"
26 #include "system/wait.h"
27 #include "../include/ctdb_private.h"
28 #include "../common/rb_tree.h"
31 #define TAKEOVER_TIMEOUT() timeval_current_ofs(ctdb->tunable.takeover_timeout,0)
33 #define CTDB_ARP_INTERVAL 1
34 #define CTDB_ARP_REPEAT 3
36 struct ctdb_takeover_arp {
37 struct ctdb_context *ctdb;
40 struct ctdb_tcp_array *tcparray;
46 lists of tcp endpoints
48 struct ctdb_tcp_list {
49 struct ctdb_tcp_list *prev, *next;
50 struct ctdb_tcp_connection connection;
54 list of clients to kill on IP release
56 struct ctdb_client_ip {
57 struct ctdb_client_ip *prev, *next;
58 struct ctdb_context *ctdb;
67 static void ctdb_control_send_arp(struct event_context *ev, struct timed_event *te,
68 struct timeval t, void *private_data)
70 struct ctdb_takeover_arp *arp = talloc_get_type(private_data,
71 struct ctdb_takeover_arp);
73 struct ctdb_tcp_array *tcparray;
75 ret = ctdb_sys_send_arp(&arp->addr, arp->vnn->iface);
77 DEBUG(DEBUG_CRIT,(__location__ " sending of arp failed (%s)\n", strerror(errno)));
80 tcparray = arp->tcparray;
82 for (i=0;i<tcparray->num;i++) {
83 struct ctdb_tcp_connection *tcon;
85 tcon = &tcparray->connections[i];
86 DEBUG(DEBUG_INFO,("sending tcp tickle ack for %u->%s:%u\n",
87 (unsigned)ntohs(tcon->dst_addr.ip.sin_port),
88 ctdb_addr_to_str(&tcon->src_addr),
89 (unsigned)ntohs(tcon->src_addr.ip.sin_port)));
90 ret = ctdb_sys_send_tcp(
95 DEBUG(DEBUG_CRIT,(__location__ " Failed to send tcp tickle ack for %s\n",
96 ctdb_addr_to_str(&tcon->src_addr)));
103 if (arp->count == CTDB_ARP_REPEAT) {
108 event_add_timed(arp->ctdb->ev, arp->vnn->takeover_ctx,
109 timeval_current_ofs(CTDB_ARP_INTERVAL, 100000),
110 ctdb_control_send_arp, arp);
113 struct takeover_callback_state {
114 struct ctdb_req_control *c;
115 ctdb_sock_addr *addr;
116 struct ctdb_vnn *vnn;
120 called when takeip event finishes
122 static void takeover_ip_callback(struct ctdb_context *ctdb, int status,
125 struct takeover_callback_state *state =
126 talloc_get_type(private_data, struct takeover_callback_state);
127 struct ctdb_takeover_arp *arp;
128 struct ctdb_tcp_array *tcparray;
131 if (status == -ETIME) {
134 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
135 ctdb_addr_to_str(state->addr),
137 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
142 if (!state->vnn->takeover_ctx) {
143 state->vnn->takeover_ctx = talloc_new(state->vnn);
144 if (!state->vnn->takeover_ctx) {
149 arp = talloc_zero(state->vnn->takeover_ctx, struct ctdb_takeover_arp);
150 if (!arp) goto failed;
153 arp->addr = *state->addr;
154 arp->vnn = state->vnn;
156 tcparray = state->vnn->tcp_array;
158 /* add all of the known tcp connections for this IP to the
159 list of tcp connections to send tickle acks for */
160 arp->tcparray = talloc_steal(arp, tcparray);
162 state->vnn->tcp_array = NULL;
163 state->vnn->tcp_update_needed = true;
166 event_add_timed(arp->ctdb->ev, state->vnn->takeover_ctx,
167 timeval_zero(), ctdb_control_send_arp, arp);
169 /* the control succeeded */
170 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
175 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
181 Find the vnn of the node that has a public ip address
182 returns -1 if the address is not known as a public address
184 static struct ctdb_vnn *find_public_ip_vnn(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
186 struct ctdb_vnn *vnn;
188 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
189 if (ctdb_same_ip(&vnn->public_address, addr)) {
199 take over an ip address
201 int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb,
202 struct ctdb_req_control *c,
207 struct takeover_callback_state *state;
208 struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
209 struct ctdb_vnn *vnn;
211 /* update out vnn list */
212 vnn = find_public_ip_vnn(ctdb, &pip->addr);
214 DEBUG(DEBUG_INFO,("takeoverip called for an ip '%s' that is not a public address\n",
215 ctdb_addr_to_str(&pip->addr)));
220 /* if our kernel already has this IP, do nothing */
221 if (ctdb_sys_have_ip(&pip->addr)) {
225 state = talloc(vnn, struct takeover_callback_state);
226 CTDB_NO_MEMORY(ctdb, state);
228 state->c = talloc_steal(ctdb, c);
229 state->addr = talloc(ctdb, ctdb_sock_addr);
230 CTDB_NO_MEMORY(ctdb, state->addr);
232 *state->addr = pip->addr;
235 DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u on interface %s\n",
236 ctdb_addr_to_str(&pip->addr),
237 vnn->public_netmask_bits,
240 ret = ctdb_event_script_callback(ctdb,
241 state, takeover_ip_callback, state,
246 talloc_strdup(state, ctdb_addr_to_str(&pip->addr)),
247 vnn->public_netmask_bits);
250 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
251 ctdb_addr_to_str(&pip->addr),
257 /* tell ctdb_control.c that we will be replying asynchronously */
264 takeover an ip address old v4 style
266 int32_t ctdb_control_takeover_ipv4(struct ctdb_context *ctdb,
267 struct ctdb_req_control *c,
273 data.dsize = sizeof(struct ctdb_public_ip);
274 data.dptr = (uint8_t *)talloc_zero(c, struct ctdb_public_ip);
275 CTDB_NO_MEMORY(ctdb, data.dptr);
277 memcpy(data.dptr, indata.dptr, indata.dsize);
278 return ctdb_control_takeover_ip(ctdb, c, data, async_reply);
282 kill any clients that are registered with a IP that is being released
284 static void release_kill_clients(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
286 struct ctdb_client_ip *ip;
288 DEBUG(DEBUG_INFO,("release_kill_clients for ip %s\n",
289 ctdb_addr_to_str(addr)));
291 for (ip=ctdb->client_ip_list; ip; ip=ip->next) {
292 ctdb_sock_addr tmp_addr;
295 DEBUG(DEBUG_INFO,("checking for client %u with IP %s\n",
297 ctdb_addr_to_str(&ip->addr)));
299 if (ctdb_same_ip(&tmp_addr, addr)) {
300 struct ctdb_client *client = ctdb_reqid_find(ctdb,
303 DEBUG(DEBUG_INFO,("matched client %u with IP %s and pid %u\n",
305 ctdb_addr_to_str(&ip->addr),
308 if (client->pid != 0) {
309 DEBUG(DEBUG_INFO,(__location__ " Killing client pid %u for IP %s on client_id %u\n",
310 (unsigned)client->pid,
311 ctdb_addr_to_str(addr),
313 kill(client->pid, SIGKILL);
320 called when releaseip event finishes
322 static void release_ip_callback(struct ctdb_context *ctdb, int status,
325 struct takeover_callback_state *state =
326 talloc_get_type(private_data, struct takeover_callback_state);
329 if (status == -ETIME) {
333 /* send a message to all clients of this node telling them
334 that the cluster has been reconfigured and they should
335 release any sockets on this IP */
336 data.dptr = (uint8_t *)talloc_strdup(state, ctdb_addr_to_str(state->addr));
337 CTDB_NO_MEMORY_VOID(ctdb, data.dptr);
338 data.dsize = strlen((char *)data.dptr)+1;
340 DEBUG(DEBUG_INFO,(__location__ " sending RELEASE_IP for '%s'\n", data.dptr));
342 ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_RELEASE_IP, data);
344 /* kill clients that have registered with this IP */
345 release_kill_clients(ctdb, state->addr);
347 /* the control succeeded */
348 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
353 release an ip address
355 int32_t ctdb_control_release_ip(struct ctdb_context *ctdb,
356 struct ctdb_req_control *c,
361 struct takeover_callback_state *state;
362 struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
363 struct ctdb_vnn *vnn;
365 /* update our vnn list */
366 vnn = find_public_ip_vnn(ctdb, &pip->addr);
368 DEBUG(DEBUG_INFO,("releaseip called for an ip '%s' that is not a public address\n",
369 ctdb_addr_to_str(&pip->addr)));
374 /* stop any previous arps */
375 talloc_free(vnn->takeover_ctx);
376 vnn->takeover_ctx = NULL;
378 if (!ctdb_sys_have_ip(&pip->addr)) {
379 DEBUG(DEBUG_NOTICE,("Redundant release of IP %s/%u on interface %s (ip not held)\n",
380 ctdb_addr_to_str(&pip->addr),
381 vnn->public_netmask_bits,
386 DEBUG(DEBUG_NOTICE,("Release of IP %s/%u on interface %s node:%u\n",
387 ctdb_addr_to_str(&pip->addr),
388 vnn->public_netmask_bits,
392 state = talloc(ctdb, struct takeover_callback_state);
393 CTDB_NO_MEMORY(ctdb, state);
395 state->c = talloc_steal(state, c);
396 state->addr = talloc(state, ctdb_sock_addr);
397 CTDB_NO_MEMORY(ctdb, state->addr);
398 *state->addr = pip->addr;
401 ret = ctdb_event_script_callback(ctdb,
402 state, release_ip_callback, state,
404 CTDB_EVENT_RELEASE_IP,
407 talloc_strdup(state, ctdb_addr_to_str(&pip->addr)),
408 vnn->public_netmask_bits);
410 DEBUG(DEBUG_ERR,(__location__ " Failed to release IP %s on interface %s\n",
411 ctdb_addr_to_str(&pip->addr),
417 /* tell the control that we will be reply asynchronously */
423 release an ip address old v4 style
425 int32_t ctdb_control_release_ipv4(struct ctdb_context *ctdb,
426 struct ctdb_req_control *c,
432 data.dsize = sizeof(struct ctdb_public_ip);
433 data.dptr = (uint8_t *)talloc_zero(c, struct ctdb_public_ip);
434 CTDB_NO_MEMORY(ctdb, data.dptr);
436 memcpy(data.dptr, indata.dptr, indata.dsize);
437 return ctdb_control_release_ip(ctdb, c, data, async_reply);
441 static int ctdb_add_public_address(struct ctdb_context *ctdb, ctdb_sock_addr *addr, unsigned mask, const char *iface)
443 struct ctdb_vnn *vnn;
445 /* Verify that we dont have an entry for this ip yet */
446 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
447 if (ctdb_same_sockaddr(addr, &vnn->public_address)) {
448 DEBUG(DEBUG_CRIT,("Same ip '%s' specified multiple times in the public address list \n",
449 ctdb_addr_to_str(addr)));
454 /* create a new vnn structure for this ip address */
455 vnn = talloc_zero(ctdb, struct ctdb_vnn);
456 CTDB_NO_MEMORY_FATAL(ctdb, vnn);
457 vnn->iface = talloc_strdup(vnn, iface);
458 CTDB_NO_MEMORY(ctdb, vnn->iface);
459 vnn->public_address = *addr;
460 vnn->public_netmask_bits = mask;
463 DLIST_ADD(ctdb->vnn, vnn);
470 setup the event script directory
472 int ctdb_set_event_script_dir(struct ctdb_context *ctdb, const char *script_dir)
474 ctdb->event_script_dir = talloc_strdup(ctdb, script_dir);
475 CTDB_NO_MEMORY(ctdb, ctdb->event_script_dir);
480 setup the public address lists from a file
482 int ctdb_set_public_addresses(struct ctdb_context *ctdb, const char *alist)
488 lines = file_lines_load(alist, &nlines, ctdb);
490 ctdb_set_error(ctdb, "Failed to load public address list '%s'\n", alist);
493 while (nlines > 0 && strcmp(lines[nlines-1], "") == 0) {
497 for (i=0;i<nlines;i++) {
505 while ((*line == ' ') || (*line == '\t')) {
511 if (strcmp(line, "") == 0) {
514 tok = strtok(line, " \t");
516 tok = strtok(NULL, " \t");
518 if (NULL == ctdb->default_public_interface) {
519 DEBUG(DEBUG_CRIT,("No default public interface and no interface specified at line %u of public address list\n",
524 iface = ctdb->default_public_interface;
529 if (!addrstr || !parse_ip_mask(addrstr, iface, &addr, &mask)) {
530 DEBUG(DEBUG_CRIT,("Badly formed line %u in public address list\n", i+1));
534 if (ctdb_add_public_address(ctdb, &addr, mask, iface)) {
535 DEBUG(DEBUG_CRIT,("Failed to add line %u to the public address list\n", i+1));
548 struct ctdb_public_ip_list {
549 struct ctdb_public_ip_list *next;
555 /* Given a physical node, return the number of
556 public addresses that is currently assigned to this node.
558 static int node_ip_coverage(struct ctdb_context *ctdb,
560 struct ctdb_public_ip_list *ips)
564 for (;ips;ips=ips->next) {
565 if (ips->pnn == pnn) {
573 /* Check if this is a public ip known to the node, i.e. can that
574 node takeover this ip ?
576 static int can_node_serve_ip(struct ctdb_context *ctdb, int32_t pnn,
577 struct ctdb_public_ip_list *ip)
579 struct ctdb_all_public_ips *public_ips;
582 public_ips = ctdb->nodes[pnn]->public_ips;
584 if (public_ips == NULL) {
588 for (i=0;i<public_ips->num;i++) {
589 if (ctdb_same_ip(&ip->addr, &public_ips->ips[i].addr)) {
590 /* yes, this node can serve this public ip */
599 /* search the node lists list for a node to takeover this ip.
600 pick the node that currently are serving the least number of ips
601 so that the ips get spread out evenly.
603 static int find_takeover_node(struct ctdb_context *ctdb,
604 struct ctdb_node_map *nodemap, uint32_t mask,
605 struct ctdb_public_ip_list *ip,
606 struct ctdb_public_ip_list *all_ips)
612 for (i=0;i<nodemap->num;i++) {
613 if (nodemap->nodes[i].flags & mask) {
614 /* This node is not healty and can not be used to serve
620 /* verify that this node can serve this ip */
621 if (can_node_serve_ip(ctdb, i, ip)) {
622 /* no it couldnt so skip to the next node */
626 num = node_ip_coverage(ctdb, i, all_ips);
627 /* was this the first node we checked ? */
639 DEBUG(DEBUG_WARNING,(__location__ " Could not find node to take over public address '%s'\n",
640 ctdb_addr_to_str(&ip->addr)));
650 static uint32_t *ip_key(ctdb_sock_addr *ip)
652 static uint32_t key[IP_KEYLEN];
654 bzero(key, sizeof(key));
656 switch (ip->sa.sa_family) {
658 key[3] = htonl(ip->ip.sin_addr.s_addr);
661 key[0] = htonl(ip->ip6.sin6_addr.s6_addr32[0]);
662 key[1] = htonl(ip->ip6.sin6_addr.s6_addr32[1]);
663 key[2] = htonl(ip->ip6.sin6_addr.s6_addr32[2]);
664 key[3] = htonl(ip->ip6.sin6_addr.s6_addr32[3]);
667 DEBUG(DEBUG_ERR, (__location__ " ERROR, unknown family passed :%u\n", ip->sa.sa_family));
674 static void *add_ip_callback(void *parm, void *data)
679 void getips_count_callback(void *param, void *data)
681 struct ctdb_public_ip_list **ip_list = (struct ctdb_public_ip_list **)param;
682 struct ctdb_public_ip_list *new_ip = (struct ctdb_public_ip_list *)data;
684 new_ip->next = *ip_list;
688 struct ctdb_public_ip_list *
689 create_merged_ip_list(struct ctdb_context *ctdb, TALLOC_CTX *tmp_ctx)
692 struct ctdb_public_ip_list *ip_list;
693 struct ctdb_all_public_ips *public_ips;
694 trbt_tree_t *ip_tree;
696 ip_tree = trbt_create(tmp_ctx, 0);
698 for (i=0;i<ctdb->num_nodes;i++) {
699 public_ips = ctdb->nodes[i]->public_ips;
701 if (ctdb->nodes[i]->flags & NODE_FLAGS_DELETED) {
705 /* there were no public ips for this node */
706 if (public_ips == NULL) {
710 for (j=0;j<public_ips->num;j++) {
711 struct ctdb_public_ip_list *tmp_ip;
713 tmp_ip = talloc_zero(tmp_ctx, struct ctdb_public_ip_list);
714 CTDB_NO_MEMORY_NULL(ctdb, tmp_ip);
715 tmp_ip->pnn = public_ips->ips[j].pnn;
716 tmp_ip->addr = public_ips->ips[j].addr;
719 trbt_insertarray32_callback(ip_tree,
720 IP_KEYLEN, ip_key(&public_ips->ips[j].addr),
727 trbt_traversearray32(ip_tree, IP_KEYLEN, getips_count_callback, &ip_list);
733 make any IP alias changes for public addresses that are necessary
735 int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap)
737 int i, num_healthy, retries;
738 struct ctdb_public_ip ip;
739 struct ctdb_public_ipv4 ipv4;
741 struct ctdb_public_ip_list *all_ips, *tmp_ip;
742 int maxnode, maxnum=0, minnode, minnum=0, num;
744 struct timeval timeout;
745 struct client_async_data *async_data;
746 struct ctdb_client_control_state *state;
747 TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
752 /* Count how many completely healthy nodes we have */
754 for (i=0;i<nodemap->num;i++) {
755 if (!(nodemap->nodes[i].flags & (NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED))) {
760 if (num_healthy > 0) {
761 /* We have healthy nodes, so only consider them for
762 serving public addresses
764 mask = NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED;
766 /* We didnt have any completely healthy nodes so
767 use "disabled" nodes as a fallback
769 mask = NODE_FLAGS_INACTIVE;
772 /* since nodes only know about those public addresses that
773 can be served by that particular node, no single node has
774 a full list of all public addresses that exist in the cluster.
775 Walk over all node structures and create a merged list of
776 all public addresses that exist in the cluster.
778 all_ips = create_merged_ip_list(ctdb, tmp_ctx);
780 /* If we want deterministic ip allocations, i.e. that the ip addresses
781 will always be allocated the same way for a specific set of
782 available/unavailable nodes.
784 if (1 == ctdb->tunable.deterministic_public_ips) {
785 DEBUG(DEBUG_NOTICE,("Deterministic IPs enabled. Resetting all ip allocations\n"));
786 for (i=0,tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next,i++) {
787 tmp_ip->pnn = i%nodemap->num;
792 /* mark all public addresses with a masked node as being served by
795 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
796 if (tmp_ip->pnn == -1) {
799 if (nodemap->nodes[tmp_ip->pnn].flags & mask) {
804 /* verify that the assigned nodes can serve that public ip
805 and set it to -1 if not
807 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
808 if (tmp_ip->pnn == -1) {
811 if (can_node_serve_ip(ctdb, tmp_ip->pnn, tmp_ip) != 0) {
812 /* this node can not serve this ip. */
818 /* now we must redistribute all public addresses with takeover node
819 -1 among the nodes available
823 /* loop over all ip's and find a physical node to cover for
826 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
827 if (tmp_ip->pnn == -1) {
828 if (find_takeover_node(ctdb, nodemap, mask, tmp_ip, all_ips)) {
829 DEBUG(DEBUG_WARNING,("Failed to find node to cover ip %s\n",
830 ctdb_addr_to_str(&tmp_ip->addr)));
835 /* If we dont want ips to fail back after a node becomes healthy
836 again, we wont even try to reallocat the ip addresses so that
837 they are evenly spread out.
838 This can NOT be used at the same time as DeterministicIPs !
840 if (1 == ctdb->tunable.no_ip_failback) {
841 if (1 == ctdb->tunable.deterministic_public_ips) {
842 DEBUG(DEBUG_ERR, ("ERROR: You can not use 'DeterministicIPs' and 'NoIPFailback' at the same time\n"));
848 /* now, try to make sure the ip adresses are evenly distributed
850 for each ip address, loop over all nodes that can serve this
851 ip and make sure that the difference between the node
852 serving the most and the node serving the least ip's are not greater
855 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
856 if (tmp_ip->pnn == -1) {
860 /* Get the highest and lowest number of ips's served by any
861 valid node which can serve this ip.
865 for (i=0;i<nodemap->num;i++) {
866 if (nodemap->nodes[i].flags & mask) {
870 /* only check nodes that can actually serve this ip */
871 if (can_node_serve_ip(ctdb, i, tmp_ip)) {
872 /* no it couldnt so skip to the next node */
876 num = node_ip_coverage(ctdb, i, all_ips);
897 DEBUG(DEBUG_WARNING,(__location__ " Could not find maxnode. May not be able to serve ip '%s'\n",
898 ctdb_addr_to_str(&tmp_ip->addr)));
903 /* If we want deterministic IPs then dont try to reallocate
904 them to spread out the load.
906 if (1 == ctdb->tunable.deterministic_public_ips) {
910 /* if the spread between the smallest and largest coverage by
911 a node is >=2 we steal one of the ips from the node with
912 most coverage to even things out a bit.
913 try to do this at most 5 times since we dont want to spend
914 too much time balancing the ip coverage.
916 if ( (maxnum > minnum+1)
918 struct ctdb_public_ip_list *tmp;
920 /* mark one of maxnode's vnn's as unassigned and try
923 for (tmp=all_ips;tmp;tmp=tmp->next) {
924 if (tmp->pnn == maxnode) {
934 /* finished distributing the public addresses, now just send the
935 info out to the nodes
939 /* at this point ->pnn is the node which will own each IP
940 or -1 if there is no node that can cover this ip
943 /* now tell all nodes to delete any alias that they should not
944 have. This will be a NOOP on nodes that don't currently
945 hold the given alias */
946 async_data = talloc_zero(tmp_ctx, struct client_async_data);
947 CTDB_NO_MEMORY_FATAL(ctdb, async_data);
949 for (i=0;i<nodemap->num;i++) {
950 /* don't talk to unconnected nodes, but do talk to banned nodes */
951 if (nodemap->nodes[i].flags & NODE_FLAGS_DISCONNECTED) {
955 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
956 if (tmp_ip->pnn == nodemap->nodes[i].pnn) {
957 /* This node should be serving this
958 vnn so dont tell it to release the ip
962 if (tmp_ip->addr.sa.sa_family == AF_INET) {
963 ipv4.pnn = tmp_ip->pnn;
964 ipv4.sin = tmp_ip->addr.ip;
966 timeout = TAKEOVER_TIMEOUT();
967 data.dsize = sizeof(ipv4);
968 data.dptr = (uint8_t *)&ipv4;
969 state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
970 0, CTDB_CONTROL_RELEASE_IPv4, 0,
974 ip.pnn = tmp_ip->pnn;
975 ip.addr = tmp_ip->addr;
977 timeout = TAKEOVER_TIMEOUT();
978 data.dsize = sizeof(ip);
979 data.dptr = (uint8_t *)&ip;
980 state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
981 0, CTDB_CONTROL_RELEASE_IP, 0,
987 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_RELEASE_IP to node %u\n", nodemap->nodes[i].pnn));
988 talloc_free(tmp_ctx);
992 ctdb_client_async_add(async_data, state);
995 if (ctdb_client_async_wait(ctdb, async_data) != 0) {
996 DEBUG(DEBUG_ERR,(__location__ " Async control CTDB_CONTROL_RELEASE_IP failed\n"));
997 talloc_free(tmp_ctx);
1000 talloc_free(async_data);
1003 /* tell all nodes to get their own IPs */
1004 async_data = talloc_zero(tmp_ctx, struct client_async_data);
1005 CTDB_NO_MEMORY_FATAL(ctdb, async_data);
1006 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1007 if (tmp_ip->pnn == -1) {
1008 /* this IP won't be taken over */
1012 if (tmp_ip->addr.sa.sa_family == AF_INET) {
1013 ipv4.pnn = tmp_ip->pnn;
1014 ipv4.sin = tmp_ip->addr.ip;
1016 timeout = TAKEOVER_TIMEOUT();
1017 data.dsize = sizeof(ipv4);
1018 data.dptr = (uint8_t *)&ipv4;
1019 state = ctdb_control_send(ctdb, tmp_ip->pnn,
1020 0, CTDB_CONTROL_TAKEOVER_IPv4, 0,
1024 ip.pnn = tmp_ip->pnn;
1025 ip.addr = tmp_ip->addr;
1027 timeout = TAKEOVER_TIMEOUT();
1028 data.dsize = sizeof(ip);
1029 data.dptr = (uint8_t *)&ip;
1030 state = ctdb_control_send(ctdb, tmp_ip->pnn,
1031 0, CTDB_CONTROL_TAKEOVER_IP, 0,
1035 if (state == NULL) {
1036 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_TAKEOVER_IP to node %u\n", tmp_ip->pnn));
1037 talloc_free(tmp_ctx);
1041 ctdb_client_async_add(async_data, state);
1043 if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1044 DEBUG(DEBUG_ERR,(__location__ " Async control CTDB_CONTROL_TAKEOVER_IP failed\n"));
1045 talloc_free(tmp_ctx);
1049 talloc_free(tmp_ctx);
1055 destroy a ctdb_client_ip structure
1057 static int ctdb_client_ip_destructor(struct ctdb_client_ip *ip)
1059 DEBUG(DEBUG_DEBUG,("destroying client tcp for %s:%u (client_id %u)\n",
1060 ctdb_addr_to_str(&ip->addr),
1061 ntohs(ip->addr.ip.sin_port),
1064 DLIST_REMOVE(ip->ctdb->client_ip_list, ip);
1069 called by a client to inform us of a TCP connection that it is managing
1070 that should tickled with an ACK when IP takeover is done
1071 we handle both the old ipv4 style of packets as well as the new ipv4/6
1074 int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id,
1077 struct ctdb_client *client = ctdb_reqid_find(ctdb, client_id, struct ctdb_client);
1078 struct ctdb_control_tcp *old_addr = NULL;
1079 struct ctdb_control_tcp_addr new_addr;
1080 struct ctdb_control_tcp_addr *tcp_sock = NULL;
1081 struct ctdb_tcp_list *tcp;
1082 struct ctdb_control_tcp_vnn t;
1085 struct ctdb_client_ip *ip;
1086 struct ctdb_vnn *vnn;
1087 ctdb_sock_addr addr;
1089 switch (indata.dsize) {
1090 case sizeof(struct ctdb_control_tcp):
1091 old_addr = (struct ctdb_control_tcp *)indata.dptr;
1092 ZERO_STRUCT(new_addr);
1093 tcp_sock = &new_addr;
1094 tcp_sock->src.ip = old_addr->src;
1095 tcp_sock->dest.ip = old_addr->dest;
1097 case sizeof(struct ctdb_control_tcp_addr):
1098 tcp_sock = (struct ctdb_control_tcp_addr *)indata.dptr;
1101 DEBUG(DEBUG_ERR,(__location__ " Invalid data structure passed "
1102 "to ctdb_control_tcp_client. size was %d but "
1103 "only allowed sizes are %lu and %lu\n",
1105 (long unsigned)sizeof(struct ctdb_control_tcp),
1106 (long unsigned)sizeof(struct ctdb_control_tcp_addr)));
1110 addr = tcp_sock->src;
1111 ctdb_canonicalize_ip(&addr, &tcp_sock->src);
1112 addr = tcp_sock->dest;
1113 ctdb_canonicalize_ip(&addr, &tcp_sock->dest);
1116 memcpy(&addr, &tcp_sock->dest, sizeof(addr));
1117 vnn = find_public_ip_vnn(ctdb, &addr);
1119 switch (addr.sa.sa_family) {
1121 if (ntohl(addr.ip.sin_addr.s_addr) != INADDR_LOOPBACK) {
1122 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public address.\n",
1123 ctdb_addr_to_str(&addr)));
1127 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public ipv6 address.\n",
1128 ctdb_addr_to_str(&addr)));
1131 DEBUG(DEBUG_ERR,(__location__ " Unknown family type %d\n", addr.sa.sa_family));
1137 if (vnn->pnn != ctdb->pnn) {
1138 DEBUG(DEBUG_ERR,("Attempt to register tcp client for IP %s we don't hold - failing (client_id %u pid %u)\n",
1139 ctdb_addr_to_str(&addr),
1140 client_id, client->pid));
1141 /* failing this call will tell smbd to die */
1145 ip = talloc(client, struct ctdb_client_ip);
1146 CTDB_NO_MEMORY(ctdb, ip);
1150 ip->client_id = client_id;
1151 talloc_set_destructor(ip, ctdb_client_ip_destructor);
1152 DLIST_ADD(ctdb->client_ip_list, ip);
1154 tcp = talloc(client, struct ctdb_tcp_list);
1155 CTDB_NO_MEMORY(ctdb, tcp);
1157 tcp->connection.src_addr = tcp_sock->src;
1158 tcp->connection.dst_addr = tcp_sock->dest;
1160 DLIST_ADD(client->tcp_list, tcp);
1162 t.src = tcp_sock->src;
1163 t.dest = tcp_sock->dest;
1165 data.dptr = (uint8_t *)&t;
1166 data.dsize = sizeof(t);
1168 switch (addr.sa.sa_family) {
1170 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1171 (unsigned)ntohs(tcp_sock->dest.ip.sin_port),
1172 ctdb_addr_to_str(&tcp_sock->src),
1173 (unsigned)ntohs(tcp_sock->src.ip.sin_port), client_id, client->pid));
1176 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1177 (unsigned)ntohs(tcp_sock->dest.ip6.sin6_port),
1178 ctdb_addr_to_str(&tcp_sock->src),
1179 (unsigned)ntohs(tcp_sock->src.ip6.sin6_port), client_id, client->pid));
1182 DEBUG(DEBUG_ERR,(__location__ " Unknown family %d\n", addr.sa.sa_family));
1186 /* tell all nodes about this tcp connection */
1187 ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0,
1188 CTDB_CONTROL_TCP_ADD,
1189 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
1191 DEBUG(DEBUG_ERR,(__location__ " Failed to send CTDB_CONTROL_TCP_ADD\n"));
1199 find a tcp address on a list
1201 static struct ctdb_tcp_connection *ctdb_tcp_find(struct ctdb_tcp_array *array,
1202 struct ctdb_tcp_connection *tcp)
1206 if (array == NULL) {
1210 for (i=0;i<array->num;i++) {
1211 if (ctdb_same_sockaddr(&array->connections[i].src_addr, &tcp->src_addr) &&
1212 ctdb_same_sockaddr(&array->connections[i].dst_addr, &tcp->dst_addr)) {
1213 return &array->connections[i];
1220 called by a daemon to inform us of a TCP connection that one of its
1221 clients managing that should tickled with an ACK when IP takeover is
1224 int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata)
1226 struct ctdb_control_tcp_vnn *p = (struct ctdb_control_tcp_vnn *)indata.dptr;
1227 struct ctdb_tcp_array *tcparray;
1228 struct ctdb_tcp_connection tcp;
1229 struct ctdb_vnn *vnn;
1231 vnn = find_public_ip_vnn(ctdb, &p->dest);
1233 DEBUG(DEBUG_INFO,(__location__ " got TCP_ADD control for an address which is not a public address '%s'\n",
1234 ctdb_addr_to_str(&p->dest)));
1240 tcparray = vnn->tcp_array;
1242 /* If this is the first tickle */
1243 if (tcparray == NULL) {
1244 tcparray = talloc_size(ctdb->nodes,
1245 offsetof(struct ctdb_tcp_array, connections) +
1246 sizeof(struct ctdb_tcp_connection) * 1);
1247 CTDB_NO_MEMORY(ctdb, tcparray);
1248 vnn->tcp_array = tcparray;
1251 tcparray->connections = talloc_size(tcparray, sizeof(struct ctdb_tcp_connection));
1252 CTDB_NO_MEMORY(ctdb, tcparray->connections);
1254 tcparray->connections[tcparray->num].src_addr = p->src;
1255 tcparray->connections[tcparray->num].dst_addr = p->dest;
1261 /* Do we already have this tickle ?*/
1262 tcp.src_addr = p->src;
1263 tcp.dst_addr = p->dest;
1264 if (ctdb_tcp_find(vnn->tcp_array, &tcp) != NULL) {
1265 DEBUG(DEBUG_DEBUG,("Already had tickle info for %s:%u for vnn:%u\n",
1266 ctdb_addr_to_str(&tcp.dst_addr),
1267 ntohs(tcp.dst_addr.ip.sin_port),
1272 /* A new tickle, we must add it to the array */
1273 tcparray->connections = talloc_realloc(tcparray, tcparray->connections,
1274 struct ctdb_tcp_connection,
1276 CTDB_NO_MEMORY(ctdb, tcparray->connections);
1278 vnn->tcp_array = tcparray;
1279 tcparray->connections[tcparray->num].src_addr = p->src;
1280 tcparray->connections[tcparray->num].dst_addr = p->dest;
1283 DEBUG(DEBUG_INFO,("Added tickle info for %s:%u from vnn %u\n",
1284 ctdb_addr_to_str(&tcp.dst_addr),
1285 ntohs(tcp.dst_addr.ip.sin_port),
1293 called by a daemon to inform us of a TCP connection that one of its
1294 clients managing that should tickled with an ACK when IP takeover is
1297 static void ctdb_remove_tcp_connection(struct ctdb_context *ctdb, struct ctdb_tcp_connection *conn)
1299 struct ctdb_tcp_connection *tcpp;
1300 struct ctdb_vnn *vnn = find_public_ip_vnn(ctdb, &conn->dst_addr);
1303 DEBUG(DEBUG_ERR,(__location__ " unable to find public address %s\n",
1304 ctdb_addr_to_str(&conn->dst_addr)));
1308 /* if the array is empty we cant remove it
1309 and we dont need to do anything
1311 if (vnn->tcp_array == NULL) {
1312 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist (array is empty) %s:%u\n",
1313 ctdb_addr_to_str(&conn->dst_addr),
1314 ntohs(conn->dst_addr.ip.sin_port)));
1319 /* See if we know this connection
1320 if we dont know this connection then we dont need to do anything
1322 tcpp = ctdb_tcp_find(vnn->tcp_array, conn);
1324 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist %s:%u\n",
1325 ctdb_addr_to_str(&conn->dst_addr),
1326 ntohs(conn->dst_addr.ip.sin_port)));
1331 /* We need to remove this entry from the array.
1332 Instead of allocating a new array and copying data to it
1333 we cheat and just copy the last entry in the existing array
1334 to the entry that is to be removed and just shring the
1337 *tcpp = vnn->tcp_array->connections[vnn->tcp_array->num - 1];
1338 vnn->tcp_array->num--;
1340 /* If we deleted the last entry we also need to remove the entire array
1342 if (vnn->tcp_array->num == 0) {
1343 talloc_free(vnn->tcp_array);
1344 vnn->tcp_array = NULL;
1347 vnn->tcp_update_needed = true;
1349 DEBUG(DEBUG_INFO,("Removed tickle info for %s:%u\n",
1350 ctdb_addr_to_str(&conn->src_addr),
1351 ntohs(conn->src_addr.ip.sin_port)));
1356 called when a daemon restarts - send all tickes for all public addresses
1357 we are serving immediately to the new node.
1359 int32_t ctdb_control_startup(struct ctdb_context *ctdb, uint32_t vnn)
1361 /*XXX here we should send all tickes we are serving to the new node */
1367 called when a client structure goes away - hook to remove
1368 elements from the tcp_list in all daemons
1370 void ctdb_takeover_client_destructor_hook(struct ctdb_client *client)
1372 while (client->tcp_list) {
1373 struct ctdb_tcp_list *tcp = client->tcp_list;
1374 DLIST_REMOVE(client->tcp_list, tcp);
1375 ctdb_remove_tcp_connection(client->ctdb, &tcp->connection);
1381 release all IPs on shutdown
1383 void ctdb_release_all_ips(struct ctdb_context *ctdb)
1385 struct ctdb_vnn *vnn;
1387 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1388 if (!ctdb_sys_have_ip(&vnn->public_address)) {
1391 if (vnn->pnn == ctdb->pnn) {
1394 ctdb_event_script_args(ctdb, CTDB_EVENT_RELEASE_IP, "%s %s %u",
1396 talloc_strdup(ctdb, ctdb_addr_to_str(&vnn->public_address)),
1397 vnn->public_netmask_bits);
1398 release_kill_clients(ctdb, &vnn->public_address);
1404 get list of public IPs
1406 int32_t ctdb_control_get_public_ips(struct ctdb_context *ctdb,
1407 struct ctdb_req_control *c, TDB_DATA *outdata)
1410 struct ctdb_all_public_ips *ips;
1411 struct ctdb_vnn *vnn;
1413 /* count how many public ip structures we have */
1415 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1419 len = offsetof(struct ctdb_all_public_ips, ips) +
1420 num*sizeof(struct ctdb_public_ip);
1421 ips = talloc_zero_size(outdata, len);
1422 CTDB_NO_MEMORY(ctdb, ips);
1424 outdata->dsize = len;
1425 outdata->dptr = (uint8_t *)ips;
1429 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1430 ips->ips[i].pnn = vnn->pnn;
1431 ips->ips[i].addr = vnn->public_address;
1440 get list of public IPs, old ipv4 style. only returns ipv4 addresses
1442 int32_t ctdb_control_get_public_ipsv4(struct ctdb_context *ctdb,
1443 struct ctdb_req_control *c, TDB_DATA *outdata)
1446 struct ctdb_all_public_ipsv4 *ips;
1447 struct ctdb_vnn *vnn;
1449 /* count how many public ip structures we have */
1451 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1452 if (vnn->public_address.sa.sa_family != AF_INET) {
1458 len = offsetof(struct ctdb_all_public_ipsv4, ips) +
1459 num*sizeof(struct ctdb_public_ipv4);
1460 ips = talloc_zero_size(outdata, len);
1461 CTDB_NO_MEMORY(ctdb, ips);
1463 outdata->dsize = len;
1464 outdata->dptr = (uint8_t *)ips;
1468 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1469 if (vnn->public_address.sa.sa_family != AF_INET) {
1472 ips->ips[i].pnn = vnn->pnn;
1473 ips->ips[i].sin = vnn->public_address.ip;
1482 structure containing the listening socket and the list of tcp connections
1483 that the ctdb daemon is to kill
1485 struct ctdb_kill_tcp {
1486 struct ctdb_vnn *vnn;
1487 struct ctdb_context *ctdb;
1489 struct fd_event *fde;
1490 trbt_tree_t *connections;
1495 a tcp connection that is to be killed
1497 struct ctdb_killtcp_con {
1498 ctdb_sock_addr src_addr;
1499 ctdb_sock_addr dst_addr;
1501 struct ctdb_kill_tcp *killtcp;
1504 /* this function is used to create a key to represent this socketpair
1505 in the killtcp tree.
1506 this key is used to insert and lookup matching socketpairs that are
1507 to be tickled and RST
1509 #define KILLTCP_KEYLEN 10
1510 static uint32_t *killtcp_key(ctdb_sock_addr *src, ctdb_sock_addr *dst)
1512 static uint32_t key[KILLTCP_KEYLEN];
1514 bzero(key, sizeof(key));
1516 if (src->sa.sa_family != dst->sa.sa_family) {
1517 DEBUG(DEBUG_ERR, (__location__ " ERROR, different families passed :%u vs %u\n", src->sa.sa_family, dst->sa.sa_family));
1521 switch (src->sa.sa_family) {
1523 key[0] = dst->ip.sin_addr.s_addr;
1524 key[1] = src->ip.sin_addr.s_addr;
1525 key[2] = dst->ip.sin_port;
1526 key[3] = src->ip.sin_port;
1529 key[0] = dst->ip6.sin6_addr.s6_addr32[3];
1530 key[1] = src->ip6.sin6_addr.s6_addr32[3];
1531 key[2] = dst->ip6.sin6_addr.s6_addr32[2];
1532 key[3] = src->ip6.sin6_addr.s6_addr32[2];
1533 key[4] = dst->ip6.sin6_addr.s6_addr32[1];
1534 key[5] = src->ip6.sin6_addr.s6_addr32[1];
1535 key[6] = dst->ip6.sin6_addr.s6_addr32[0];
1536 key[7] = src->ip6.sin6_addr.s6_addr32[0];
1537 key[8] = dst->ip6.sin6_port;
1538 key[9] = src->ip6.sin6_port;
1541 DEBUG(DEBUG_ERR, (__location__ " ERROR, unknown family passed :%u\n", src->sa.sa_family));
1549 called when we get a read event on the raw socket
1551 static void capture_tcp_handler(struct event_context *ev, struct fd_event *fde,
1552 uint16_t flags, void *private_data)
1554 struct ctdb_kill_tcp *killtcp = talloc_get_type(private_data, struct ctdb_kill_tcp);
1555 struct ctdb_killtcp_con *con;
1556 ctdb_sock_addr src, dst;
1557 uint32_t ack_seq, seq;
1559 if (!(flags & EVENT_FD_READ)) {
1563 if (ctdb_sys_read_tcp_packet(killtcp->capture_fd,
1564 killtcp->private_data,
1566 &ack_seq, &seq) != 0) {
1567 /* probably a non-tcp ACK packet */
1571 /* check if we have this guy in our list of connections
1574 con = trbt_lookuparray32(killtcp->connections,
1575 KILLTCP_KEYLEN, killtcp_key(&src, &dst));
1577 /* no this was some other packet we can just ignore */
1581 /* This one has been tickled !
1582 now reset him and remove him from the list.
1584 DEBUG(DEBUG_INFO, ("sending a tcp reset to kill connection :%d -> %s:%d\n",
1585 ntohs(con->dst_addr.ip.sin_port),
1586 ctdb_addr_to_str(&con->src_addr),
1587 ntohs(con->src_addr.ip.sin_port)));
1589 ctdb_sys_send_tcp(&con->dst_addr, &con->src_addr, ack_seq, seq, 1);
1594 /* when traversing the list of all tcp connections to send tickle acks to
1595 (so that we can capture the ack coming back and kill the connection
1597 this callback is called for each connection we are currently trying to kill
1599 static void tickle_connection_traverse(void *param, void *data)
1601 struct ctdb_killtcp_con *con = talloc_get_type(data, struct ctdb_killtcp_con);
1603 /* have tried too many times, just give up */
1604 if (con->count >= 5) {
1609 /* othervise, try tickling it again */
1612 (ctdb_sock_addr *)&con->dst_addr,
1613 (ctdb_sock_addr *)&con->src_addr,
1619 called every second until all sentenced connections have been reset
1621 static void ctdb_tickle_sentenced_connections(struct event_context *ev, struct timed_event *te,
1622 struct timeval t, void *private_data)
1624 struct ctdb_kill_tcp *killtcp = talloc_get_type(private_data, struct ctdb_kill_tcp);
1627 /* loop over all connections sending tickle ACKs */
1628 trbt_traversearray32(killtcp->connections, KILLTCP_KEYLEN, tickle_connection_traverse, NULL);
1631 /* If there are no more connections to kill we can remove the
1632 entire killtcp structure
1634 if ( (killtcp->connections == NULL) ||
1635 (killtcp->connections->root == NULL) ) {
1636 talloc_free(killtcp);
1640 /* try tickling them again in a seconds time
1642 event_add_timed(killtcp->ctdb->ev, killtcp, timeval_current_ofs(1, 0),
1643 ctdb_tickle_sentenced_connections, killtcp);
1647 destroy the killtcp structure
1649 static int ctdb_killtcp_destructor(struct ctdb_kill_tcp *killtcp)
1651 killtcp->vnn->killtcp = NULL;
1656 /* nothing fancy here, just unconditionally replace any existing
1657 connection structure with the new one.
1659 dont even free the old one if it did exist, that one is talloc_stolen
1660 by the same node in the tree anyway and will be deleted when the new data
1663 static void *add_killtcp_callback(void *parm, void *data)
1669 add a tcp socket to the list of connections we want to RST
1671 static int ctdb_killtcp_add_connection(struct ctdb_context *ctdb,
1675 ctdb_sock_addr src, dst;
1676 struct ctdb_kill_tcp *killtcp;
1677 struct ctdb_killtcp_con *con;
1678 struct ctdb_vnn *vnn;
1680 ctdb_canonicalize_ip(s, &src);
1681 ctdb_canonicalize_ip(d, &dst);
1683 vnn = find_public_ip_vnn(ctdb, &dst);
1685 vnn = find_public_ip_vnn(ctdb, &src);
1688 /* if it is not a public ip it could be our 'single ip' */
1689 if (ctdb->single_ip_vnn) {
1690 if (ctdb_same_ip(&ctdb->single_ip_vnn->public_address, &dst)) {
1691 vnn = ctdb->single_ip_vnn;
1696 DEBUG(DEBUG_ERR,(__location__ " Could not killtcp, not a public address\n"));
1700 killtcp = vnn->killtcp;
1702 /* If this is the first connection to kill we must allocate
1705 if (killtcp == NULL) {
1706 killtcp = talloc_zero(ctdb, struct ctdb_kill_tcp);
1707 CTDB_NO_MEMORY(ctdb, killtcp);
1710 killtcp->ctdb = ctdb;
1711 killtcp->capture_fd = -1;
1712 killtcp->connections = trbt_create(killtcp, 0);
1714 vnn->killtcp = killtcp;
1715 talloc_set_destructor(killtcp, ctdb_killtcp_destructor);
1720 /* create a structure that describes this connection we want to
1721 RST and store it in killtcp->connections
1723 con = talloc(killtcp, struct ctdb_killtcp_con);
1724 CTDB_NO_MEMORY(ctdb, con);
1725 con->src_addr = src;
1726 con->dst_addr = dst;
1728 con->killtcp = killtcp;
1731 trbt_insertarray32_callback(killtcp->connections,
1732 KILLTCP_KEYLEN, killtcp_key(&con->dst_addr, &con->src_addr),
1733 add_killtcp_callback, con);
1736 If we dont have a socket to listen on yet we must create it
1738 if (killtcp->capture_fd == -1) {
1739 killtcp->capture_fd = ctdb_sys_open_capture_socket(vnn->iface, &killtcp->private_data);
1740 if (killtcp->capture_fd == -1) {
1741 DEBUG(DEBUG_CRIT,(__location__ " Failed to open capturing socket for killtcp\n"));
1747 if (killtcp->fde == NULL) {
1748 killtcp->fde = event_add_fd(ctdb->ev, killtcp, killtcp->capture_fd,
1749 EVENT_FD_READ | EVENT_FD_AUTOCLOSE,
1750 capture_tcp_handler, killtcp);
1752 /* We also need to set up some events to tickle all these connections
1753 until they are all reset
1755 event_add_timed(ctdb->ev, killtcp, timeval_current_ofs(1, 0),
1756 ctdb_tickle_sentenced_connections, killtcp);
1759 /* tickle him once now */
1768 talloc_free(vnn->killtcp);
1769 vnn->killtcp = NULL;
1774 kill a TCP connection.
1776 int32_t ctdb_control_kill_tcp(struct ctdb_context *ctdb, TDB_DATA indata)
1778 struct ctdb_control_killtcp *killtcp = (struct ctdb_control_killtcp *)indata.dptr;
1780 return ctdb_killtcp_add_connection(ctdb, &killtcp->src_addr, &killtcp->dst_addr);
1784 called by a daemon to inform us of the entire list of TCP tickles for
1785 a particular public address.
1786 this control should only be sent by the node that is currently serving
1787 that public address.
1789 int32_t ctdb_control_set_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata)
1791 struct ctdb_control_tcp_tickle_list *list = (struct ctdb_control_tcp_tickle_list *)indata.dptr;
1792 struct ctdb_tcp_array *tcparray;
1793 struct ctdb_vnn *vnn;
1795 /* We must at least have tickles.num or else we cant verify the size
1796 of the received data blob
1798 if (indata.dsize < offsetof(struct ctdb_control_tcp_tickle_list,
1799 tickles.connections)) {
1800 DEBUG(DEBUG_ERR,("Bad indata in ctdb_control_set_tcp_tickle_list. Not enough data for the tickle.num field\n"));
1804 /* verify that the size of data matches what we expect */
1805 if (indata.dsize < offsetof(struct ctdb_control_tcp_tickle_list,
1806 tickles.connections)
1807 + sizeof(struct ctdb_tcp_connection)
1808 * list->tickles.num) {
1809 DEBUG(DEBUG_ERR,("Bad indata in ctdb_control_set_tcp_tickle_list\n"));
1813 vnn = find_public_ip_vnn(ctdb, &list->addr);
1815 DEBUG(DEBUG_INFO,(__location__ " Could not set tcp tickle list, '%s' is not a public address\n",
1816 ctdb_addr_to_str(&list->addr)));
1821 /* remove any old ticklelist we might have */
1822 talloc_free(vnn->tcp_array);
1823 vnn->tcp_array = NULL;
1825 tcparray = talloc(ctdb->nodes, struct ctdb_tcp_array);
1826 CTDB_NO_MEMORY(ctdb, tcparray);
1828 tcparray->num = list->tickles.num;
1830 tcparray->connections = talloc_array(tcparray, struct ctdb_tcp_connection, tcparray->num);
1831 CTDB_NO_MEMORY(ctdb, tcparray->connections);
1833 memcpy(tcparray->connections, &list->tickles.connections[0],
1834 sizeof(struct ctdb_tcp_connection)*tcparray->num);
1836 /* We now have a new fresh tickle list array for this vnn */
1837 vnn->tcp_array = talloc_steal(vnn, tcparray);
1843 called to return the full list of tickles for the puclic address associated
1844 with the provided vnn
1846 int32_t ctdb_control_get_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata)
1848 ctdb_sock_addr *addr = (ctdb_sock_addr *)indata.dptr;
1849 struct ctdb_control_tcp_tickle_list *list;
1850 struct ctdb_tcp_array *tcparray;
1852 struct ctdb_vnn *vnn;
1854 vnn = find_public_ip_vnn(ctdb, addr);
1856 DEBUG(DEBUG_ERR,(__location__ " Could not get tcp tickle list, '%s' is not a public address\n",
1857 ctdb_addr_to_str(addr)));
1862 tcparray = vnn->tcp_array;
1864 num = tcparray->num;
1869 outdata->dsize = offsetof(struct ctdb_control_tcp_tickle_list,
1870 tickles.connections)
1871 + sizeof(struct ctdb_tcp_connection) * num;
1873 outdata->dptr = talloc_size(outdata, outdata->dsize);
1874 CTDB_NO_MEMORY(ctdb, outdata->dptr);
1875 list = (struct ctdb_control_tcp_tickle_list *)outdata->dptr;
1878 list->tickles.num = num;
1880 memcpy(&list->tickles.connections[0], tcparray->connections,
1881 sizeof(struct ctdb_tcp_connection) * num);
1889 set the list of all tcp tickles for a public address
1891 static int ctdb_ctrl_set_tcp_tickles(struct ctdb_context *ctdb,
1892 struct timeval timeout, uint32_t destnode,
1893 ctdb_sock_addr *addr,
1894 struct ctdb_tcp_array *tcparray)
1898 struct ctdb_control_tcp_tickle_list *list;
1901 num = tcparray->num;
1906 data.dsize = offsetof(struct ctdb_control_tcp_tickle_list,
1907 tickles.connections) +
1908 sizeof(struct ctdb_tcp_connection) * num;
1909 data.dptr = talloc_size(ctdb, data.dsize);
1910 CTDB_NO_MEMORY(ctdb, data.dptr);
1912 list = (struct ctdb_control_tcp_tickle_list *)data.dptr;
1914 list->tickles.num = num;
1916 memcpy(&list->tickles.connections[0], tcparray->connections, sizeof(struct ctdb_tcp_connection) * num);
1919 ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0,
1920 CTDB_CONTROL_SET_TCP_TICKLE_LIST,
1921 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
1923 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set tcp tickles failed\n"));
1927 talloc_free(data.dptr);
1934 perform tickle updates if required
1936 static void ctdb_update_tcp_tickles(struct event_context *ev,
1937 struct timed_event *te,
1938 struct timeval t, void *private_data)
1940 struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
1942 struct ctdb_vnn *vnn;
1944 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1945 /* we only send out updates for public addresses that
1948 if (ctdb->pnn != vnn->pnn) {
1951 /* We only send out the updates if we need to */
1952 if (!vnn->tcp_update_needed) {
1955 ret = ctdb_ctrl_set_tcp_tickles(ctdb,
1957 CTDB_BROADCAST_CONNECTED,
1958 &vnn->public_address,
1961 DEBUG(DEBUG_ERR,("Failed to send the tickle update for public address %s\n",
1962 ctdb_addr_to_str(&vnn->public_address)));
1966 event_add_timed(ctdb->ev, ctdb->tickle_update_context,
1967 timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
1968 ctdb_update_tcp_tickles, ctdb);
1973 start periodic update of tcp tickles
1975 void ctdb_start_tcp_tickle_update(struct ctdb_context *ctdb)
1977 ctdb->tickle_update_context = talloc_new(ctdb);
1979 event_add_timed(ctdb->ev, ctdb->tickle_update_context,
1980 timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
1981 ctdb_update_tcp_tickles, ctdb);
1987 struct control_gratious_arp {
1988 struct ctdb_context *ctdb;
1989 ctdb_sock_addr addr;
1995 send a control_gratuitous arp
1997 static void send_gratious_arp(struct event_context *ev, struct timed_event *te,
1998 struct timeval t, void *private_data)
2001 struct control_gratious_arp *arp = talloc_get_type(private_data,
2002 struct control_gratious_arp);
2004 ret = ctdb_sys_send_arp(&arp->addr, arp->iface);
2006 DEBUG(DEBUG_ERR,(__location__ " sending of gratious arp failed (%s)\n", strerror(errno)));
2011 if (arp->count == CTDB_ARP_REPEAT) {
2016 event_add_timed(arp->ctdb->ev, arp,
2017 timeval_current_ofs(CTDB_ARP_INTERVAL, 0),
2018 send_gratious_arp, arp);
2025 int32_t ctdb_control_send_gratious_arp(struct ctdb_context *ctdb, TDB_DATA indata)
2027 struct ctdb_control_gratious_arp *gratious_arp = (struct ctdb_control_gratious_arp *)indata.dptr;
2028 struct control_gratious_arp *arp;
2030 /* verify the size of indata */
2031 if (indata.dsize < offsetof(struct ctdb_control_gratious_arp, iface)) {
2032 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_gratious_arp structure. Got %u require %u bytes\n",
2033 (unsigned)indata.dsize,
2034 (unsigned)offsetof(struct ctdb_control_gratious_arp, iface)));
2038 ( offsetof(struct ctdb_control_gratious_arp, iface)
2039 + gratious_arp->len ) ){
2041 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2042 "but should be %u bytes\n",
2043 (unsigned)indata.dsize,
2044 (unsigned)(offsetof(struct ctdb_control_gratious_arp, iface)+gratious_arp->len)));
2049 arp = talloc(ctdb, struct control_gratious_arp);
2050 CTDB_NO_MEMORY(ctdb, arp);
2053 arp->addr = gratious_arp->addr;
2054 arp->iface = talloc_strdup(arp, gratious_arp->iface);
2055 CTDB_NO_MEMORY(ctdb, arp->iface);
2058 event_add_timed(arp->ctdb->ev, arp,
2059 timeval_zero(), send_gratious_arp, arp);
2064 int32_t ctdb_control_add_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2066 struct ctdb_control_ip_iface *pub = (struct ctdb_control_ip_iface *)indata.dptr;
2069 /* verify the size of indata */
2070 if (indata.dsize < offsetof(struct ctdb_control_ip_iface, iface)) {
2071 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_ip_iface structure\n"));
2075 ( offsetof(struct ctdb_control_ip_iface, iface)
2078 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2079 "but should be %u bytes\n",
2080 (unsigned)indata.dsize,
2081 (unsigned)(offsetof(struct ctdb_control_ip_iface, iface)+pub->len)));
2085 ret = ctdb_add_public_address(ctdb, &pub->addr, pub->mask, &pub->iface[0]);
2088 DEBUG(DEBUG_ERR,(__location__ " Failed to add public address\n"));
2096 called when releaseip event finishes for del_public_address
2098 static void delete_ip_callback(struct ctdb_context *ctdb, int status,
2101 talloc_free(private_data);
2104 int32_t ctdb_control_del_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2106 struct ctdb_control_ip_iface *pub = (struct ctdb_control_ip_iface *)indata.dptr;
2107 struct ctdb_vnn *vnn;
2110 /* verify the size of indata */
2111 if (indata.dsize < offsetof(struct ctdb_control_ip_iface, iface)) {
2112 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_ip_iface structure\n"));
2116 ( offsetof(struct ctdb_control_ip_iface, iface)
2119 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2120 "but should be %u bytes\n",
2121 (unsigned)indata.dsize,
2122 (unsigned)(offsetof(struct ctdb_control_ip_iface, iface)+pub->len)));
2126 /* walk over all public addresses until we find a match */
2127 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2128 if (ctdb_same_ip(&vnn->public_address, &pub->addr)) {
2129 TALLOC_CTX *mem_ctx = talloc_new(ctdb);
2131 DLIST_REMOVE(ctdb->vnn, vnn);
2133 ret = ctdb_event_script_callback(ctdb,
2134 mem_ctx, delete_ip_callback, mem_ctx,
2136 CTDB_EVENT_RELEASE_IP,
2139 talloc_strdup(mem_ctx, ctdb_addr_to_str(&vnn->public_address)),
2140 vnn->public_netmask_bits);