4 Copyright (C) Ronnie Sahlberg 2007
5 Copyright (C) Andrew Tridgell 2007
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, see <http://www.gnu.org/licenses/>.
21 #include "lib/events/events.h"
22 #include "lib/tdb/include/tdb.h"
23 #include "lib/util/dlinklist.h"
24 #include "system/network.h"
25 #include "system/filesys.h"
26 #include "system/wait.h"
27 #include "../include/ctdb_private.h"
28 #include "../common/rb_tree.h"
31 #define TAKEOVER_TIMEOUT() timeval_current_ofs(ctdb->tunable.takeover_timeout,0)
33 #define CTDB_ARP_INTERVAL 1
34 #define CTDB_ARP_REPEAT 3
36 struct ctdb_takeover_arp {
37 struct ctdb_context *ctdb;
40 struct ctdb_tcp_array *tcparray;
46 lists of tcp endpoints
48 struct ctdb_tcp_list {
49 struct ctdb_tcp_list *prev, *next;
50 struct ctdb_tcp_connection connection;
54 list of clients to kill on IP release
56 struct ctdb_client_ip {
57 struct ctdb_client_ip *prev, *next;
58 struct ctdb_context *ctdb;
67 static void ctdb_control_send_arp(struct event_context *ev, struct timed_event *te,
68 struct timeval t, void *private_data)
70 struct ctdb_takeover_arp *arp = talloc_get_type(private_data,
71 struct ctdb_takeover_arp);
73 struct ctdb_tcp_array *tcparray;
75 ret = ctdb_sys_send_arp(&arp->addr, arp->vnn->iface);
77 DEBUG(DEBUG_CRIT,(__location__ " sending of arp failed (%s)\n", strerror(errno)));
80 tcparray = arp->tcparray;
82 for (i=0;i<tcparray->num;i++) {
83 struct ctdb_tcp_connection *tcon;
85 tcon = &tcparray->connections[i];
86 DEBUG(DEBUG_INFO,("sending tcp tickle ack for %u->%s:%u\n",
87 (unsigned)ntohs(tcon->dst_addr.ip.sin_port),
88 ctdb_addr_to_str(&tcon->src_addr),
89 (unsigned)ntohs(tcon->src_addr.ip.sin_port)));
90 ret = ctdb_sys_send_tcp(
95 DEBUG(DEBUG_CRIT,(__location__ " Failed to send tcp tickle ack for %s\n",
96 ctdb_addr_to_str(&tcon->src_addr)));
103 if (arp->count == CTDB_ARP_REPEAT) {
108 event_add_timed(arp->ctdb->ev, arp->vnn->takeover_ctx,
109 timeval_current_ofs(CTDB_ARP_INTERVAL, 100000),
110 ctdb_control_send_arp, arp);
113 struct takeover_callback_state {
114 struct ctdb_req_control *c;
115 ctdb_sock_addr *addr;
116 struct ctdb_vnn *vnn;
120 called when takeip event finishes
122 static void takeover_ip_callback(struct ctdb_context *ctdb, int status,
125 struct takeover_callback_state *state =
126 talloc_get_type(private_data, struct takeover_callback_state);
127 struct ctdb_takeover_arp *arp;
128 struct ctdb_tcp_array *tcparray;
131 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
132 ctdb_addr_to_str(state->addr),
134 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
139 if (!state->vnn->takeover_ctx) {
140 state->vnn->takeover_ctx = talloc_new(state->vnn);
141 if (!state->vnn->takeover_ctx) {
146 arp = talloc_zero(state->vnn->takeover_ctx, struct ctdb_takeover_arp);
147 if (!arp) goto failed;
150 arp->addr = *state->addr;
151 arp->vnn = state->vnn;
153 tcparray = state->vnn->tcp_array;
155 /* add all of the known tcp connections for this IP to the
156 list of tcp connections to send tickle acks for */
157 arp->tcparray = talloc_steal(arp, tcparray);
159 state->vnn->tcp_array = NULL;
160 state->vnn->tcp_update_needed = true;
163 event_add_timed(arp->ctdb->ev, state->vnn->takeover_ctx,
164 timeval_zero(), ctdb_control_send_arp, arp);
166 /* the control succeeded */
167 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
172 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
178 Find the vnn of the node that has a public ip address
179 returns -1 if the address is not known as a public address
181 static struct ctdb_vnn *find_public_ip_vnn(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
183 struct ctdb_vnn *vnn;
185 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
186 if (ctdb_same_ip(&vnn->public_address, addr)) {
196 take over an ip address
198 int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb,
199 struct ctdb_req_control *c,
204 struct takeover_callback_state *state;
205 struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
206 struct ctdb_vnn *vnn;
208 /* update out vnn list */
209 vnn = find_public_ip_vnn(ctdb, &pip->addr);
211 DEBUG(DEBUG_INFO,("takeoverip called for an ip '%s' that is not a public address\n",
212 ctdb_addr_to_str(&pip->addr)));
217 /* if our kernel already has this IP, do nothing */
218 if (ctdb_sys_have_ip(&pip->addr)) {
222 state = talloc(vnn, struct takeover_callback_state);
223 CTDB_NO_MEMORY(ctdb, state);
225 state->c = talloc_steal(ctdb, c);
226 state->addr = talloc(ctdb, ctdb_sock_addr);
227 CTDB_NO_MEMORY(ctdb, state->addr);
229 *state->addr = pip->addr;
232 DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u on interface %s\n",
233 ctdb_addr_to_str(&pip->addr),
234 vnn->public_netmask_bits,
237 ret = ctdb_event_script_callback(ctdb,
238 timeval_set(ctdb->tunable.script_timeout, 0),
239 state, takeover_ip_callback, state,
242 talloc_strdup(state, ctdb_addr_to_str(&pip->addr)),
243 vnn->public_netmask_bits);
246 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
247 ctdb_addr_to_str(&pip->addr),
253 /* tell ctdb_control.c that we will be replying asynchronously */
260 takeover an ip address old v4 style
262 int32_t ctdb_control_takeover_ipv4(struct ctdb_context *ctdb,
263 struct ctdb_req_control *c,
269 data.dsize = sizeof(struct ctdb_public_ip);
270 data.dptr = (uint8_t *)talloc_zero(c, struct ctdb_public_ip);
271 CTDB_NO_MEMORY(ctdb, data.dptr);
273 memcpy(data.dptr, indata.dptr, indata.dsize);
274 return ctdb_control_takeover_ip(ctdb, c, data, async_reply);
278 kill any clients that are registered with a IP that is being released
280 static void release_kill_clients(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
282 struct ctdb_client_ip *ip;
284 DEBUG(DEBUG_INFO,("release_kill_clients for ip %s\n",
285 ctdb_addr_to_str(addr)));
287 for (ip=ctdb->client_ip_list; ip; ip=ip->next) {
288 ctdb_sock_addr tmp_addr;
291 DEBUG(DEBUG_INFO,("checking for client %u with IP %s\n",
293 ctdb_addr_to_str(&ip->addr)));
295 if (ctdb_same_ip(&tmp_addr, addr)) {
296 struct ctdb_client *client = ctdb_reqid_find(ctdb,
299 DEBUG(DEBUG_INFO,("matched client %u with IP %s and pid %u\n",
301 ctdb_addr_to_str(&ip->addr),
304 if (client->pid != 0) {
305 DEBUG(DEBUG_INFO,(__location__ " Killing client pid %u for IP %s on client_id %u\n",
306 (unsigned)client->pid,
307 ctdb_addr_to_str(addr),
309 kill(client->pid, SIGKILL);
316 called when releaseip event finishes
318 static void release_ip_callback(struct ctdb_context *ctdb, int status,
321 struct takeover_callback_state *state =
322 talloc_get_type(private_data, struct takeover_callback_state);
325 /* send a message to all clients of this node telling them
326 that the cluster has been reconfigured and they should
327 release any sockets on this IP */
328 data.dptr = (uint8_t *)talloc_strdup(state, ctdb_addr_to_str(state->addr));
329 CTDB_NO_MEMORY_VOID(ctdb, data.dptr);
330 data.dsize = strlen((char *)data.dptr)+1;
332 DEBUG(DEBUG_INFO,(__location__ " sending RELEASE_IP for '%s'\n", data.dptr));
334 ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_RELEASE_IP, data);
336 /* kill clients that have registered with this IP */
337 release_kill_clients(ctdb, state->addr);
339 /* the control succeeded */
340 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
345 release an ip address
347 int32_t ctdb_control_release_ip(struct ctdb_context *ctdb,
348 struct ctdb_req_control *c,
353 struct takeover_callback_state *state;
354 struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
355 struct ctdb_vnn *vnn;
357 /* update our vnn list */
358 vnn = find_public_ip_vnn(ctdb, &pip->addr);
360 DEBUG(DEBUG_INFO,("releaseip called for an ip '%s' that is not a public address\n",
361 ctdb_addr_to_str(&pip->addr)));
366 /* stop any previous arps */
367 talloc_free(vnn->takeover_ctx);
368 vnn->takeover_ctx = NULL;
370 if (!ctdb_sys_have_ip(&pip->addr)) {
371 DEBUG(DEBUG_NOTICE,("Redundant release of IP %s/%u on interface %s (ip not held)\n",
372 ctdb_addr_to_str(&pip->addr),
373 vnn->public_netmask_bits,
378 DEBUG(DEBUG_NOTICE,("Release of IP %s/%u on interface %s node:%u\n",
379 ctdb_addr_to_str(&pip->addr),
380 vnn->public_netmask_bits,
384 state = talloc(ctdb, struct takeover_callback_state);
385 CTDB_NO_MEMORY(ctdb, state);
387 state->c = talloc_steal(state, c);
388 state->addr = talloc(state, ctdb_sock_addr);
389 CTDB_NO_MEMORY(ctdb, state->addr);
390 *state->addr = pip->addr;
393 ret = ctdb_event_script_callback(ctdb,
394 timeval_set(ctdb->tunable.script_timeout, 0),
395 state, release_ip_callback, state,
396 "releaseip %s %s %u",
398 talloc_strdup(state, ctdb_addr_to_str(&pip->addr)),
399 vnn->public_netmask_bits);
401 DEBUG(DEBUG_ERR,(__location__ " Failed to release IP %s on interface %s\n",
402 ctdb_addr_to_str(&pip->addr),
408 /* tell the control that we will be reply asynchronously */
414 release an ip address old v4 style
416 int32_t ctdb_control_release_ipv4(struct ctdb_context *ctdb,
417 struct ctdb_req_control *c,
423 data.dsize = sizeof(struct ctdb_public_ip);
424 data.dptr = (uint8_t *)talloc_zero(c, struct ctdb_public_ip);
425 CTDB_NO_MEMORY(ctdb, data.dptr);
427 memcpy(data.dptr, indata.dptr, indata.dsize);
428 return ctdb_control_release_ip(ctdb, c, data, async_reply);
432 static int ctdb_add_public_address(struct ctdb_context *ctdb, ctdb_sock_addr *addr, unsigned mask, const char *iface)
434 struct ctdb_vnn *vnn;
436 /* Verify that we dont have an entry for this ip yet */
437 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
438 if (ctdb_same_sockaddr(addr, &vnn->public_address)) {
439 DEBUG(DEBUG_CRIT,("Same ip '%s' specified multiple times in the public address list \n",
440 ctdb_addr_to_str(addr)));
445 /* create a new vnn structure for this ip address */
446 vnn = talloc_zero(ctdb, struct ctdb_vnn);
447 CTDB_NO_MEMORY_FATAL(ctdb, vnn);
448 vnn->iface = talloc_strdup(vnn, iface);
449 CTDB_NO_MEMORY(ctdb, vnn->iface);
450 vnn->public_address = *addr;
451 vnn->public_netmask_bits = mask;
454 DLIST_ADD(ctdb->vnn, vnn);
461 setup the event script directory
463 int ctdb_set_event_script_dir(struct ctdb_context *ctdb, const char *script_dir)
465 ctdb->event_script_dir = talloc_strdup(ctdb, script_dir);
466 CTDB_NO_MEMORY(ctdb, ctdb->event_script_dir);
471 setup the public address lists from a file
473 int ctdb_set_public_addresses(struct ctdb_context *ctdb, const char *alist)
479 lines = file_lines_load(alist, &nlines, ctdb);
481 ctdb_set_error(ctdb, "Failed to load public address list '%s'\n", alist);
484 while (nlines > 0 && strcmp(lines[nlines-1], "") == 0) {
488 for (i=0;i<nlines;i++) {
496 while ((*line == ' ') || (*line == '\t')) {
502 if (strcmp(line, "") == 0) {
505 tok = strtok(line, " \t");
507 tok = strtok(NULL, " \t");
509 if (NULL == ctdb->default_public_interface) {
510 DEBUG(DEBUG_CRIT,("No default public interface and no interface specified at line %u of public address list\n",
515 iface = ctdb->default_public_interface;
520 if (!addrstr || !parse_ip_mask(addrstr, iface, &addr, &mask)) {
521 DEBUG(DEBUG_CRIT,("Badly formed line %u in public address list\n", i+1));
525 if (ctdb_add_public_address(ctdb, &addr, mask, iface)) {
526 DEBUG(DEBUG_CRIT,("Failed to add line %u to the public address list\n", i+1));
539 struct ctdb_public_ip_list {
540 struct ctdb_public_ip_list *next;
546 /* Given a physical node, return the number of
547 public addresses that is currently assigned to this node.
549 static int node_ip_coverage(struct ctdb_context *ctdb,
551 struct ctdb_public_ip_list *ips)
555 for (;ips;ips=ips->next) {
556 if (ips->pnn == pnn) {
564 /* Check if this is a public ip known to the node, i.e. can that
565 node takeover this ip ?
567 static int can_node_serve_ip(struct ctdb_context *ctdb, int32_t pnn,
568 struct ctdb_public_ip_list *ip)
570 struct ctdb_all_public_ips *public_ips;
573 public_ips = ctdb->nodes[pnn]->public_ips;
575 if (public_ips == NULL) {
579 for (i=0;i<public_ips->num;i++) {
580 if (ctdb_same_ip(&ip->addr, &public_ips->ips[i].addr)) {
581 /* yes, this node can serve this public ip */
590 /* search the node lists list for a node to takeover this ip.
591 pick the node that currently are serving the least number of ips
592 so that the ips get spread out evenly.
594 static int find_takeover_node(struct ctdb_context *ctdb,
595 struct ctdb_node_map *nodemap, uint32_t mask,
596 struct ctdb_public_ip_list *ip,
597 struct ctdb_public_ip_list *all_ips)
603 for (i=0;i<nodemap->num;i++) {
604 if (nodemap->nodes[i].flags & mask) {
605 /* This node is not healty and can not be used to serve
611 /* verify that this node can serve this ip */
612 if (can_node_serve_ip(ctdb, i, ip)) {
613 /* no it couldnt so skip to the next node */
617 num = node_ip_coverage(ctdb, i, all_ips);
618 /* was this the first node we checked ? */
630 DEBUG(DEBUG_WARNING,(__location__ " Could not find node to take over public address '%s'\n",
631 ctdb_addr_to_str(&ip->addr)));
640 struct ctdb_public_ip_list *
641 add_ip_to_merged_list(struct ctdb_context *ctdb,
643 struct ctdb_public_ip_list *ip_list,
644 struct ctdb_public_ip *ip)
646 struct ctdb_public_ip_list *tmp_ip;
648 /* do we already have this ip in our merged list ?*/
649 for (tmp_ip=ip_list;tmp_ip;tmp_ip=tmp_ip->next) {
651 /* we already have this public ip in the list */
652 if (ctdb_same_ip(&tmp_ip->addr, &ip->addr)) {
657 /* this is a new public ip, we must add it to the list */
658 tmp_ip = talloc_zero(tmp_ctx, struct ctdb_public_ip_list);
659 CTDB_NO_MEMORY_NULL(ctdb, tmp_ip);
660 tmp_ip->pnn = ip->pnn;
661 tmp_ip->addr = ip->addr;
662 tmp_ip->next = ip_list;
667 struct ctdb_public_ip_list *
668 create_merged_ip_list(struct ctdb_context *ctdb, TALLOC_CTX *tmp_ctx)
671 struct ctdb_public_ip_list *ip_list = NULL;
672 struct ctdb_all_public_ips *public_ips;
674 for (i=0;i<ctdb->num_nodes;i++) {
675 public_ips = ctdb->nodes[i]->public_ips;
677 if (ctdb->nodes[i]->flags & NODE_FLAGS_DELETED) {
681 /* there were no public ips for this node */
682 if (public_ips == NULL) {
686 for (j=0;j<public_ips->num;j++) {
687 ip_list = add_ip_to_merged_list(ctdb, tmp_ctx,
688 ip_list, &public_ips->ips[j]);
696 make any IP alias changes for public addresses that are necessary
698 int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap)
700 int i, num_healthy, retries;
701 struct ctdb_public_ip ip;
702 struct ctdb_public_ipv4 ipv4;
704 struct ctdb_public_ip_list *all_ips, *tmp_ip;
705 int maxnode, maxnum=0, minnode, minnum=0, num;
707 struct timeval timeout;
708 struct client_async_data *async_data;
709 struct ctdb_client_control_state *state;
710 TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
715 /* Count how many completely healthy nodes we have */
717 for (i=0;i<nodemap->num;i++) {
718 if (!(nodemap->nodes[i].flags & (NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED))) {
723 if (num_healthy > 0) {
724 /* We have healthy nodes, so only consider them for
725 serving public addresses
727 mask = NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED;
729 /* We didnt have any completely healthy nodes so
730 use "disabled" nodes as a fallback
732 mask = NODE_FLAGS_INACTIVE;
735 /* since nodes only know about those public addresses that
736 can be served by that particular node, no single node has
737 a full list of all public addresses that exist in the cluster.
738 Walk over all node structures and create a merged list of
739 all public addresses that exist in the cluster.
741 all_ips = create_merged_ip_list(ctdb, tmp_ctx);
743 /* If we want deterministic ip allocations, i.e. that the ip addresses
744 will always be allocated the same way for a specific set of
745 available/unavailable nodes.
747 if (1 == ctdb->tunable.deterministic_public_ips) {
748 DEBUG(DEBUG_NOTICE,("Deterministic IPs enabled. Resetting all ip allocations\n"));
749 for (i=0,tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next,i++) {
750 tmp_ip->pnn = i%nodemap->num;
755 /* mark all public addresses with a masked node as being served by
758 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
759 if (tmp_ip->pnn == -1) {
762 if (nodemap->nodes[tmp_ip->pnn].flags & mask) {
767 /* verify that the assigned nodes can serve that public ip
768 and set it to -1 if not
770 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
771 if (tmp_ip->pnn == -1) {
774 if (can_node_serve_ip(ctdb, tmp_ip->pnn, tmp_ip) != 0) {
775 /* this node can not serve this ip. */
781 /* now we must redistribute all public addresses with takeover node
782 -1 among the nodes available
786 /* loop over all ip's and find a physical node to cover for
789 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
790 if (tmp_ip->pnn == -1) {
791 if (find_takeover_node(ctdb, nodemap, mask, tmp_ip, all_ips)) {
792 DEBUG(DEBUG_WARNING,("Failed to find node to cover ip %s\n",
793 ctdb_addr_to_str(&tmp_ip->addr)));
798 /* If we dont want ips to fail back after a node becomes healthy
799 again, we wont even try to reallocat the ip addresses so that
800 they are evenly spread out.
801 This can NOT be used at the same time as DeterministicIPs !
803 if (1 == ctdb->tunable.no_ip_failback) {
804 if (1 == ctdb->tunable.deterministic_public_ips) {
805 DEBUG(DEBUG_ERR, ("ERROR: You can not use 'DeterministicIPs' and 'NoIPFailback' at the same time\n"));
811 /* now, try to make sure the ip adresses are evenly distributed
813 for each ip address, loop over all nodes that can serve this
814 ip and make sure that the difference between the node
815 serving the most and the node serving the least ip's are not greater
818 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
819 if (tmp_ip->pnn == -1) {
823 /* Get the highest and lowest number of ips's served by any
824 valid node which can serve this ip.
828 for (i=0;i<nodemap->num;i++) {
829 if (nodemap->nodes[i].flags & mask) {
833 /* only check nodes that can actually serve this ip */
834 if (can_node_serve_ip(ctdb, i, tmp_ip)) {
835 /* no it couldnt so skip to the next node */
839 num = node_ip_coverage(ctdb, i, all_ips);
860 DEBUG(DEBUG_WARNING,(__location__ " Could not find maxnode. May not be able to serve ip '%s'\n",
861 ctdb_addr_to_str(&tmp_ip->addr)));
866 /* If we want deterministic IPs then dont try to reallocate
867 them to spread out the load.
869 if (1 == ctdb->tunable.deterministic_public_ips) {
873 /* if the spread between the smallest and largest coverage by
874 a node is >=2 we steal one of the ips from the node with
875 most coverage to even things out a bit.
876 try to do this at most 5 times since we dont want to spend
877 too much time balancing the ip coverage.
879 if ( (maxnum > minnum+1)
881 struct ctdb_public_ip_list *tmp;
883 /* mark one of maxnode's vnn's as unassigned and try
886 for (tmp=all_ips;tmp;tmp=tmp->next) {
887 if (tmp->pnn == maxnode) {
897 /* finished distributing the public addresses, now just send the
898 info out to the nodes
902 /* at this point ->pnn is the node which will own each IP
903 or -1 if there is no node that can cover this ip
906 /* now tell all nodes to delete any alias that they should not
907 have. This will be a NOOP on nodes that don't currently
908 hold the given alias */
909 async_data = talloc_zero(tmp_ctx, struct client_async_data);
910 CTDB_NO_MEMORY_FATAL(ctdb, async_data);
912 for (i=0;i<nodemap->num;i++) {
913 /* don't talk to unconnected nodes, but do talk to banned nodes */
914 if (nodemap->nodes[i].flags & NODE_FLAGS_DISCONNECTED) {
918 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
919 if (tmp_ip->pnn == nodemap->nodes[i].pnn) {
920 /* This node should be serving this
921 vnn so dont tell it to release the ip
925 if (tmp_ip->addr.sa.sa_family == AF_INET) {
926 ipv4.pnn = tmp_ip->pnn;
927 ipv4.sin = tmp_ip->addr.ip;
929 timeout = TAKEOVER_TIMEOUT();
930 data.dsize = sizeof(ipv4);
931 data.dptr = (uint8_t *)&ipv4;
932 state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
933 0, CTDB_CONTROL_RELEASE_IPv4, 0,
937 ip.pnn = tmp_ip->pnn;
938 ip.addr = tmp_ip->addr;
940 timeout = TAKEOVER_TIMEOUT();
941 data.dsize = sizeof(ip);
942 data.dptr = (uint8_t *)&ip;
943 state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
944 0, CTDB_CONTROL_RELEASE_IP, 0,
950 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_RELEASE_IP to node %u\n", nodemap->nodes[i].pnn));
951 talloc_free(tmp_ctx);
955 ctdb_client_async_add(async_data, state);
958 if (ctdb_client_async_wait(ctdb, async_data) != 0) {
959 DEBUG(DEBUG_ERR,(__location__ " Async control CTDB_CONTROL_RELEASE_IP failed\n"));
960 talloc_free(tmp_ctx);
963 talloc_free(async_data);
966 /* tell all nodes to get their own IPs */
967 async_data = talloc_zero(tmp_ctx, struct client_async_data);
968 CTDB_NO_MEMORY_FATAL(ctdb, async_data);
969 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
970 if (tmp_ip->pnn == -1) {
971 /* this IP won't be taken over */
975 if (tmp_ip->addr.sa.sa_family == AF_INET) {
976 ipv4.pnn = tmp_ip->pnn;
977 ipv4.sin = tmp_ip->addr.ip;
979 timeout = TAKEOVER_TIMEOUT();
980 data.dsize = sizeof(ipv4);
981 data.dptr = (uint8_t *)&ipv4;
982 state = ctdb_control_send(ctdb, tmp_ip->pnn,
983 0, CTDB_CONTROL_TAKEOVER_IPv4, 0,
987 ip.pnn = tmp_ip->pnn;
988 ip.addr = tmp_ip->addr;
990 timeout = TAKEOVER_TIMEOUT();
991 data.dsize = sizeof(ip);
992 data.dptr = (uint8_t *)&ip;
993 state = ctdb_control_send(ctdb, tmp_ip->pnn,
994 0, CTDB_CONTROL_TAKEOVER_IP, 0,
999 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_TAKEOVER_IP to node %u\n", tmp_ip->pnn));
1000 talloc_free(tmp_ctx);
1004 ctdb_client_async_add(async_data, state);
1006 if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1007 DEBUG(DEBUG_ERR,(__location__ " Async control CTDB_CONTROL_TAKEOVER_IP failed\n"));
1008 talloc_free(tmp_ctx);
1012 talloc_free(tmp_ctx);
1018 destroy a ctdb_client_ip structure
1020 static int ctdb_client_ip_destructor(struct ctdb_client_ip *ip)
1022 DEBUG(DEBUG_DEBUG,("destroying client tcp for %s:%u (client_id %u)\n",
1023 ctdb_addr_to_str(&ip->addr),
1024 ntohs(ip->addr.ip.sin_port),
1027 DLIST_REMOVE(ip->ctdb->client_ip_list, ip);
1032 called by a client to inform us of a TCP connection that it is managing
1033 that should tickled with an ACK when IP takeover is done
1034 we handle both the old ipv4 style of packets as well as the new ipv4/6
1037 int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id,
1040 struct ctdb_client *client = ctdb_reqid_find(ctdb, client_id, struct ctdb_client);
1041 struct ctdb_control_tcp *old_addr = NULL;
1042 struct ctdb_control_tcp_addr new_addr;
1043 struct ctdb_control_tcp_addr *tcp_sock = NULL;
1044 struct ctdb_tcp_list *tcp;
1045 struct ctdb_control_tcp_vnn t;
1048 struct ctdb_client_ip *ip;
1049 struct ctdb_vnn *vnn;
1050 ctdb_sock_addr addr;
1052 switch (indata.dsize) {
1053 case sizeof(struct ctdb_control_tcp):
1054 old_addr = (struct ctdb_control_tcp *)indata.dptr;
1055 ZERO_STRUCT(new_addr);
1056 tcp_sock = &new_addr;
1057 tcp_sock->src.ip = old_addr->src;
1058 tcp_sock->dest.ip = old_addr->dest;
1060 case sizeof(struct ctdb_control_tcp_addr):
1061 tcp_sock = (struct ctdb_control_tcp_addr *)indata.dptr;
1064 DEBUG(DEBUG_ERR,(__location__ " Invalid data structure passed "
1065 "to ctdb_control_tcp_client. size was %d but "
1066 "only allowed sizes are %lu and %lu\n",
1068 (long unsigned)sizeof(struct ctdb_control_tcp),
1069 (long unsigned)sizeof(struct ctdb_control_tcp_addr)));
1073 addr = tcp_sock->src;
1074 ctdb_canonicalize_ip(&addr, &tcp_sock->src);
1075 addr = tcp_sock->dest;
1076 ctdb_canonicalize_ip(&addr, &tcp_sock->dest);
1079 memcpy(&addr, &tcp_sock->dest, sizeof(addr));
1080 vnn = find_public_ip_vnn(ctdb, &addr);
1082 switch (addr.sa.sa_family) {
1084 if (ntohl(addr.ip.sin_addr.s_addr) != INADDR_LOOPBACK) {
1085 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public address.\n",
1086 ctdb_addr_to_str(&addr)));
1090 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public ipv6 address.\n",
1091 ctdb_addr_to_str(&addr)));
1094 DEBUG(DEBUG_ERR,(__location__ " Unknown family type %d\n", addr.sa.sa_family));
1100 if (vnn->pnn != ctdb->pnn) {
1101 DEBUG(DEBUG_ERR,("Attempt to register tcp client for IP %s we don't hold - failing (client_id %u pid %u)\n",
1102 ctdb_addr_to_str(&addr),
1103 client_id, client->pid));
1104 /* failing this call will tell smbd to die */
1108 ip = talloc(client, struct ctdb_client_ip);
1109 CTDB_NO_MEMORY(ctdb, ip);
1113 ip->client_id = client_id;
1114 talloc_set_destructor(ip, ctdb_client_ip_destructor);
1115 DLIST_ADD(ctdb->client_ip_list, ip);
1117 tcp = talloc(client, struct ctdb_tcp_list);
1118 CTDB_NO_MEMORY(ctdb, tcp);
1120 tcp->connection.src_addr = tcp_sock->src;
1121 tcp->connection.dst_addr = tcp_sock->dest;
1123 DLIST_ADD(client->tcp_list, tcp);
1125 t.src = tcp_sock->src;
1126 t.dest = tcp_sock->dest;
1128 data.dptr = (uint8_t *)&t;
1129 data.dsize = sizeof(t);
1131 switch (addr.sa.sa_family) {
1133 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1134 (unsigned)ntohs(tcp_sock->dest.ip.sin_port),
1135 ctdb_addr_to_str(&tcp_sock->src),
1136 (unsigned)ntohs(tcp_sock->src.ip.sin_port), client_id, client->pid));
1139 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1140 (unsigned)ntohs(tcp_sock->dest.ip6.sin6_port),
1141 ctdb_addr_to_str(&tcp_sock->src),
1142 (unsigned)ntohs(tcp_sock->src.ip6.sin6_port), client_id, client->pid));
1145 DEBUG(DEBUG_ERR,(__location__ " Unknown family %d\n", addr.sa.sa_family));
1149 /* tell all nodes about this tcp connection */
1150 ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0,
1151 CTDB_CONTROL_TCP_ADD,
1152 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
1154 DEBUG(DEBUG_ERR,(__location__ " Failed to send CTDB_CONTROL_TCP_ADD\n"));
1162 find a tcp address on a list
1164 static struct ctdb_tcp_connection *ctdb_tcp_find(struct ctdb_tcp_array *array,
1165 struct ctdb_tcp_connection *tcp)
1169 if (array == NULL) {
1173 for (i=0;i<array->num;i++) {
1174 if (ctdb_same_sockaddr(&array->connections[i].src_addr, &tcp->src_addr) &&
1175 ctdb_same_sockaddr(&array->connections[i].dst_addr, &tcp->dst_addr)) {
1176 return &array->connections[i];
1183 called by a daemon to inform us of a TCP connection that one of its
1184 clients managing that should tickled with an ACK when IP takeover is
1187 int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata)
1189 struct ctdb_control_tcp_vnn *p = (struct ctdb_control_tcp_vnn *)indata.dptr;
1190 struct ctdb_tcp_array *tcparray;
1191 struct ctdb_tcp_connection tcp;
1192 struct ctdb_vnn *vnn;
1194 vnn = find_public_ip_vnn(ctdb, &p->dest);
1196 DEBUG(DEBUG_INFO,(__location__ " got TCP_ADD control for an address which is not a public address '%s'\n",
1197 ctdb_addr_to_str(&p->dest)));
1203 tcparray = vnn->tcp_array;
1205 /* If this is the first tickle */
1206 if (tcparray == NULL) {
1207 tcparray = talloc_size(ctdb->nodes,
1208 offsetof(struct ctdb_tcp_array, connections) +
1209 sizeof(struct ctdb_tcp_connection) * 1);
1210 CTDB_NO_MEMORY(ctdb, tcparray);
1211 vnn->tcp_array = tcparray;
1214 tcparray->connections = talloc_size(tcparray, sizeof(struct ctdb_tcp_connection));
1215 CTDB_NO_MEMORY(ctdb, tcparray->connections);
1217 tcparray->connections[tcparray->num].src_addr = p->src;
1218 tcparray->connections[tcparray->num].dst_addr = p->dest;
1224 /* Do we already have this tickle ?*/
1225 tcp.src_addr = p->src;
1226 tcp.dst_addr = p->dest;
1227 if (ctdb_tcp_find(vnn->tcp_array, &tcp) != NULL) {
1228 DEBUG(DEBUG_DEBUG,("Already had tickle info for %s:%u for vnn:%u\n",
1229 ctdb_addr_to_str(&tcp.dst_addr),
1230 ntohs(tcp.dst_addr.ip.sin_port),
1235 /* A new tickle, we must add it to the array */
1236 tcparray->connections = talloc_realloc(tcparray, tcparray->connections,
1237 struct ctdb_tcp_connection,
1239 CTDB_NO_MEMORY(ctdb, tcparray->connections);
1241 vnn->tcp_array = tcparray;
1242 tcparray->connections[tcparray->num].src_addr = p->src;
1243 tcparray->connections[tcparray->num].dst_addr = p->dest;
1246 DEBUG(DEBUG_INFO,("Added tickle info for %s:%u from vnn %u\n",
1247 ctdb_addr_to_str(&tcp.dst_addr),
1248 ntohs(tcp.dst_addr.ip.sin_port),
1256 called by a daemon to inform us of a TCP connection that one of its
1257 clients managing that should tickled with an ACK when IP takeover is
1260 static void ctdb_remove_tcp_connection(struct ctdb_context *ctdb, struct ctdb_tcp_connection *conn)
1262 struct ctdb_tcp_connection *tcpp;
1263 struct ctdb_vnn *vnn = find_public_ip_vnn(ctdb, &conn->dst_addr);
1266 DEBUG(DEBUG_ERR,(__location__ " unable to find public address %s\n",
1267 ctdb_addr_to_str(&conn->dst_addr)));
1271 /* if the array is empty we cant remove it
1272 and we dont need to do anything
1274 if (vnn->tcp_array == NULL) {
1275 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist (array is empty) %s:%u\n",
1276 ctdb_addr_to_str(&conn->dst_addr),
1277 ntohs(conn->dst_addr.ip.sin_port)));
1282 /* See if we know this connection
1283 if we dont know this connection then we dont need to do anything
1285 tcpp = ctdb_tcp_find(vnn->tcp_array, conn);
1287 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist %s:%u\n",
1288 ctdb_addr_to_str(&conn->dst_addr),
1289 ntohs(conn->dst_addr.ip.sin_port)));
1294 /* We need to remove this entry from the array.
1295 Instead of allocating a new array and copying data to it
1296 we cheat and just copy the last entry in the existing array
1297 to the entry that is to be removed and just shring the
1300 *tcpp = vnn->tcp_array->connections[vnn->tcp_array->num - 1];
1301 vnn->tcp_array->num--;
1303 /* If we deleted the last entry we also need to remove the entire array
1305 if (vnn->tcp_array->num == 0) {
1306 talloc_free(vnn->tcp_array);
1307 vnn->tcp_array = NULL;
1310 vnn->tcp_update_needed = true;
1312 DEBUG(DEBUG_INFO,("Removed tickle info for %s:%u\n",
1313 ctdb_addr_to_str(&conn->src_addr),
1314 ntohs(conn->src_addr.ip.sin_port)));
1319 called when a daemon restarts - send all tickes for all public addresses
1320 we are serving immediately to the new node.
1322 int32_t ctdb_control_startup(struct ctdb_context *ctdb, uint32_t vnn)
1324 /*XXX here we should send all tickes we are serving to the new node */
1330 called when a client structure goes away - hook to remove
1331 elements from the tcp_list in all daemons
1333 void ctdb_takeover_client_destructor_hook(struct ctdb_client *client)
1335 while (client->tcp_list) {
1336 struct ctdb_tcp_list *tcp = client->tcp_list;
1337 DLIST_REMOVE(client->tcp_list, tcp);
1338 ctdb_remove_tcp_connection(client->ctdb, &tcp->connection);
1344 release all IPs on shutdown
1346 void ctdb_release_all_ips(struct ctdb_context *ctdb)
1348 struct ctdb_vnn *vnn;
1350 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1351 if (!ctdb_sys_have_ip(&vnn->public_address)) {
1354 if (vnn->pnn == ctdb->pnn) {
1357 ctdb_event_script(ctdb, "releaseip %s %s %u",
1359 talloc_strdup(ctdb, ctdb_addr_to_str(&vnn->public_address)),
1360 vnn->public_netmask_bits);
1361 release_kill_clients(ctdb, &vnn->public_address);
1367 get list of public IPs
1369 int32_t ctdb_control_get_public_ips(struct ctdb_context *ctdb,
1370 struct ctdb_req_control *c, TDB_DATA *outdata)
1373 struct ctdb_all_public_ips *ips;
1374 struct ctdb_vnn *vnn;
1376 /* count how many public ip structures we have */
1378 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1382 len = offsetof(struct ctdb_all_public_ips, ips) +
1383 num*sizeof(struct ctdb_public_ip);
1384 ips = talloc_zero_size(outdata, len);
1385 CTDB_NO_MEMORY(ctdb, ips);
1387 outdata->dsize = len;
1388 outdata->dptr = (uint8_t *)ips;
1392 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1393 ips->ips[i].pnn = vnn->pnn;
1394 ips->ips[i].addr = vnn->public_address;
1403 get list of public IPs, old ipv4 style. only returns ipv4 addresses
1405 int32_t ctdb_control_get_public_ipsv4(struct ctdb_context *ctdb,
1406 struct ctdb_req_control *c, TDB_DATA *outdata)
1409 struct ctdb_all_public_ipsv4 *ips;
1410 struct ctdb_vnn *vnn;
1412 /* count how many public ip structures we have */
1414 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1415 if (vnn->public_address.sa.sa_family != AF_INET) {
1421 len = offsetof(struct ctdb_all_public_ipsv4, ips) +
1422 num*sizeof(struct ctdb_public_ipv4);
1423 ips = talloc_zero_size(outdata, len);
1424 CTDB_NO_MEMORY(ctdb, ips);
1426 outdata->dsize = len;
1427 outdata->dptr = (uint8_t *)ips;
1431 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1432 if (vnn->public_address.sa.sa_family != AF_INET) {
1435 ips->ips[i].pnn = vnn->pnn;
1436 ips->ips[i].sin = vnn->public_address.ip;
1445 structure containing the listening socket and the list of tcp connections
1446 that the ctdb daemon is to kill
1448 struct ctdb_kill_tcp {
1449 struct ctdb_vnn *vnn;
1450 struct ctdb_context *ctdb;
1452 struct fd_event *fde;
1453 trbt_tree_t *connections;
1458 a tcp connection that is to be killed
1460 struct ctdb_killtcp_con {
1461 ctdb_sock_addr src_addr;
1462 ctdb_sock_addr dst_addr;
1464 struct ctdb_kill_tcp *killtcp;
1467 /* this function is used to create a key to represent this socketpair
1468 in the killtcp tree.
1469 this key is used to insert and lookup matching socketpairs that are
1470 to be tickled and RST
1472 #define KILLTCP_KEYLEN 10
1473 static uint32_t *killtcp_key(ctdb_sock_addr *src, ctdb_sock_addr *dst)
1475 static uint32_t key[KILLTCP_KEYLEN];
1477 bzero(key, sizeof(key));
1479 if (src->sa.sa_family != dst->sa.sa_family) {
1480 DEBUG(DEBUG_ERR, (__location__ " ERROR, different families passed :%u vs %u\n", src->sa.sa_family, dst->sa.sa_family));
1484 switch (src->sa.sa_family) {
1486 key[0] = dst->ip.sin_addr.s_addr;
1487 key[1] = src->ip.sin_addr.s_addr;
1488 key[2] = dst->ip.sin_port;
1489 key[3] = src->ip.sin_port;
1492 key[0] = dst->ip6.sin6_addr.s6_addr32[3];
1493 key[1] = src->ip6.sin6_addr.s6_addr32[3];
1494 key[2] = dst->ip6.sin6_addr.s6_addr32[2];
1495 key[3] = src->ip6.sin6_addr.s6_addr32[2];
1496 key[4] = dst->ip6.sin6_addr.s6_addr32[1];
1497 key[5] = src->ip6.sin6_addr.s6_addr32[1];
1498 key[6] = dst->ip6.sin6_addr.s6_addr32[0];
1499 key[7] = src->ip6.sin6_addr.s6_addr32[0];
1500 key[8] = dst->ip6.sin6_port;
1501 key[9] = src->ip6.sin6_port;
1504 DEBUG(DEBUG_ERR, (__location__ " ERROR, unknown family passed :%u\n", src->sa.sa_family));
1512 called when we get a read event on the raw socket
1514 static void capture_tcp_handler(struct event_context *ev, struct fd_event *fde,
1515 uint16_t flags, void *private_data)
1517 struct ctdb_kill_tcp *killtcp = talloc_get_type(private_data, struct ctdb_kill_tcp);
1518 struct ctdb_killtcp_con *con;
1519 ctdb_sock_addr src, dst;
1520 uint32_t ack_seq, seq;
1522 if (!(flags & EVENT_FD_READ)) {
1526 if (ctdb_sys_read_tcp_packet(killtcp->capture_fd,
1527 killtcp->private_data,
1529 &ack_seq, &seq) != 0) {
1530 /* probably a non-tcp ACK packet */
1534 /* check if we have this guy in our list of connections
1537 con = trbt_lookuparray32(killtcp->connections,
1538 KILLTCP_KEYLEN, killtcp_key(&src, &dst));
1540 /* no this was some other packet we can just ignore */
1544 /* This one has been tickled !
1545 now reset him and remove him from the list.
1547 DEBUG(DEBUG_INFO, ("sending a tcp reset to kill connection :%d -> %s:%d\n",
1548 ntohs(con->dst_addr.ip.sin_port),
1549 ctdb_addr_to_str(&con->src_addr),
1550 ntohs(con->src_addr.ip.sin_port)));
1552 ctdb_sys_send_tcp(&con->dst_addr, &con->src_addr, ack_seq, seq, 1);
1557 /* when traversing the list of all tcp connections to send tickle acks to
1558 (so that we can capture the ack coming back and kill the connection
1560 this callback is called for each connection we are currently trying to kill
1562 static void tickle_connection_traverse(void *param, void *data)
1564 struct ctdb_killtcp_con *con = talloc_get_type(data, struct ctdb_killtcp_con);
1566 /* have tried too many times, just give up */
1567 if (con->count >= 5) {
1572 /* othervise, try tickling it again */
1575 (ctdb_sock_addr *)&con->dst_addr,
1576 (ctdb_sock_addr *)&con->src_addr,
1582 called every second until all sentenced connections have been reset
1584 static void ctdb_tickle_sentenced_connections(struct event_context *ev, struct timed_event *te,
1585 struct timeval t, void *private_data)
1587 struct ctdb_kill_tcp *killtcp = talloc_get_type(private_data, struct ctdb_kill_tcp);
1590 /* loop over all connections sending tickle ACKs */
1591 trbt_traversearray32(killtcp->connections, KILLTCP_KEYLEN, tickle_connection_traverse, NULL);
1594 /* If there are no more connections to kill we can remove the
1595 entire killtcp structure
1597 if ( (killtcp->connections == NULL) ||
1598 (killtcp->connections->root == NULL) ) {
1599 talloc_free(killtcp);
1603 /* try tickling them again in a seconds time
1605 event_add_timed(killtcp->ctdb->ev, killtcp, timeval_current_ofs(1, 0),
1606 ctdb_tickle_sentenced_connections, killtcp);
1610 destroy the killtcp structure
1612 static int ctdb_killtcp_destructor(struct ctdb_kill_tcp *killtcp)
1614 killtcp->vnn->killtcp = NULL;
1619 /* nothing fancy here, just unconditionally replace any existing
1620 connection structure with the new one.
1622 dont even free the old one if it did exist, that one is talloc_stolen
1623 by the same node in the tree anyway and will be deleted when the new data
1626 static void *add_killtcp_callback(void *parm, void *data)
1632 add a tcp socket to the list of connections we want to RST
1634 static int ctdb_killtcp_add_connection(struct ctdb_context *ctdb,
1638 ctdb_sock_addr src, dst;
1639 struct ctdb_kill_tcp *killtcp;
1640 struct ctdb_killtcp_con *con;
1641 struct ctdb_vnn *vnn;
1643 ctdb_canonicalize_ip(s, &src);
1644 ctdb_canonicalize_ip(d, &dst);
1646 vnn = find_public_ip_vnn(ctdb, &dst);
1648 vnn = find_public_ip_vnn(ctdb, &src);
1651 /* if it is not a public ip it could be our 'single ip' */
1652 if (ctdb->single_ip_vnn) {
1653 if (ctdb_same_ip(&ctdb->single_ip_vnn->public_address, &dst)) {
1654 vnn = ctdb->single_ip_vnn;
1659 DEBUG(DEBUG_ERR,(__location__ " Could not killtcp, not a public address\n"));
1663 killtcp = vnn->killtcp;
1665 /* If this is the first connection to kill we must allocate
1668 if (killtcp == NULL) {
1669 killtcp = talloc_zero(ctdb, struct ctdb_kill_tcp);
1670 CTDB_NO_MEMORY(ctdb, killtcp);
1673 killtcp->ctdb = ctdb;
1674 killtcp->capture_fd = -1;
1675 killtcp->connections = trbt_create(killtcp, 0);
1677 vnn->killtcp = killtcp;
1678 talloc_set_destructor(killtcp, ctdb_killtcp_destructor);
1683 /* create a structure that describes this connection we want to
1684 RST and store it in killtcp->connections
1686 con = talloc(killtcp, struct ctdb_killtcp_con);
1687 CTDB_NO_MEMORY(ctdb, con);
1688 con->src_addr = src;
1689 con->dst_addr = dst;
1691 con->killtcp = killtcp;
1694 trbt_insertarray32_callback(killtcp->connections,
1695 KILLTCP_KEYLEN, killtcp_key(&con->dst_addr, &con->src_addr),
1696 add_killtcp_callback, con);
1699 If we dont have a socket to listen on yet we must create it
1701 if (killtcp->capture_fd == -1) {
1702 killtcp->capture_fd = ctdb_sys_open_capture_socket(vnn->iface, &killtcp->private_data);
1703 if (killtcp->capture_fd == -1) {
1704 DEBUG(DEBUG_CRIT,(__location__ " Failed to open capturing socket for killtcp\n"));
1710 if (killtcp->fde == NULL) {
1711 killtcp->fde = event_add_fd(ctdb->ev, killtcp, killtcp->capture_fd,
1712 EVENT_FD_READ | EVENT_FD_AUTOCLOSE,
1713 capture_tcp_handler, killtcp);
1715 /* We also need to set up some events to tickle all these connections
1716 until they are all reset
1718 event_add_timed(ctdb->ev, killtcp, timeval_current_ofs(1, 0),
1719 ctdb_tickle_sentenced_connections, killtcp);
1722 /* tickle him once now */
1731 talloc_free(vnn->killtcp);
1732 vnn->killtcp = NULL;
1737 kill a TCP connection.
1739 int32_t ctdb_control_kill_tcp(struct ctdb_context *ctdb, TDB_DATA indata)
1741 struct ctdb_control_killtcp *killtcp = (struct ctdb_control_killtcp *)indata.dptr;
1743 return ctdb_killtcp_add_connection(ctdb, &killtcp->src_addr, &killtcp->dst_addr);
1747 called by a daemon to inform us of the entire list of TCP tickles for
1748 a particular public address.
1749 this control should only be sent by the node that is currently serving
1750 that public address.
1752 int32_t ctdb_control_set_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata)
1754 struct ctdb_control_tcp_tickle_list *list = (struct ctdb_control_tcp_tickle_list *)indata.dptr;
1755 struct ctdb_tcp_array *tcparray;
1756 struct ctdb_vnn *vnn;
1758 /* We must at least have tickles.num or else we cant verify the size
1759 of the received data blob
1761 if (indata.dsize < offsetof(struct ctdb_control_tcp_tickle_list,
1762 tickles.connections)) {
1763 DEBUG(DEBUG_ERR,("Bad indata in ctdb_control_set_tcp_tickle_list. Not enough data for the tickle.num field\n"));
1767 /* verify that the size of data matches what we expect */
1768 if (indata.dsize < offsetof(struct ctdb_control_tcp_tickle_list,
1769 tickles.connections)
1770 + sizeof(struct ctdb_tcp_connection)
1771 * list->tickles.num) {
1772 DEBUG(DEBUG_ERR,("Bad indata in ctdb_control_set_tcp_tickle_list\n"));
1776 vnn = find_public_ip_vnn(ctdb, &list->addr);
1778 DEBUG(DEBUG_INFO,(__location__ " Could not set tcp tickle list, '%s' is not a public address\n",
1779 ctdb_addr_to_str(&list->addr)));
1784 /* remove any old ticklelist we might have */
1785 talloc_free(vnn->tcp_array);
1786 vnn->tcp_array = NULL;
1788 tcparray = talloc(ctdb->nodes, struct ctdb_tcp_array);
1789 CTDB_NO_MEMORY(ctdb, tcparray);
1791 tcparray->num = list->tickles.num;
1793 tcparray->connections = talloc_array(tcparray, struct ctdb_tcp_connection, tcparray->num);
1794 CTDB_NO_MEMORY(ctdb, tcparray->connections);
1796 memcpy(tcparray->connections, &list->tickles.connections[0],
1797 sizeof(struct ctdb_tcp_connection)*tcparray->num);
1799 /* We now have a new fresh tickle list array for this vnn */
1800 vnn->tcp_array = talloc_steal(vnn, tcparray);
1806 called to return the full list of tickles for the puclic address associated
1807 with the provided vnn
1809 int32_t ctdb_control_get_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata)
1811 ctdb_sock_addr *addr = (ctdb_sock_addr *)indata.dptr;
1812 struct ctdb_control_tcp_tickle_list *list;
1813 struct ctdb_tcp_array *tcparray;
1815 struct ctdb_vnn *vnn;
1817 vnn = find_public_ip_vnn(ctdb, addr);
1819 DEBUG(DEBUG_ERR,(__location__ " Could not get tcp tickle list, '%s' is not a public address\n",
1820 ctdb_addr_to_str(addr)));
1825 tcparray = vnn->tcp_array;
1827 num = tcparray->num;
1832 outdata->dsize = offsetof(struct ctdb_control_tcp_tickle_list,
1833 tickles.connections)
1834 + sizeof(struct ctdb_tcp_connection) * num;
1836 outdata->dptr = talloc_size(outdata, outdata->dsize);
1837 CTDB_NO_MEMORY(ctdb, outdata->dptr);
1838 list = (struct ctdb_control_tcp_tickle_list *)outdata->dptr;
1841 list->tickles.num = num;
1843 memcpy(&list->tickles.connections[0], tcparray->connections,
1844 sizeof(struct ctdb_tcp_connection) * num);
1852 set the list of all tcp tickles for a public address
1854 static int ctdb_ctrl_set_tcp_tickles(struct ctdb_context *ctdb,
1855 struct timeval timeout, uint32_t destnode,
1856 ctdb_sock_addr *addr,
1857 struct ctdb_tcp_array *tcparray)
1861 struct ctdb_control_tcp_tickle_list *list;
1864 num = tcparray->num;
1869 data.dsize = offsetof(struct ctdb_control_tcp_tickle_list,
1870 tickles.connections) +
1871 sizeof(struct ctdb_tcp_connection) * num;
1872 data.dptr = talloc_size(ctdb, data.dsize);
1873 CTDB_NO_MEMORY(ctdb, data.dptr);
1875 list = (struct ctdb_control_tcp_tickle_list *)data.dptr;
1877 list->tickles.num = num;
1879 memcpy(&list->tickles.connections[0], tcparray->connections, sizeof(struct ctdb_tcp_connection) * num);
1882 ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0,
1883 CTDB_CONTROL_SET_TCP_TICKLE_LIST,
1884 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
1886 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set tcp tickles failed\n"));
1890 talloc_free(data.dptr);
1897 perform tickle updates if required
1899 static void ctdb_update_tcp_tickles(struct event_context *ev,
1900 struct timed_event *te,
1901 struct timeval t, void *private_data)
1903 struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
1905 struct ctdb_vnn *vnn;
1907 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1908 /* we only send out updates for public addresses that
1911 if (ctdb->pnn != vnn->pnn) {
1914 /* We only send out the updates if we need to */
1915 if (!vnn->tcp_update_needed) {
1918 ret = ctdb_ctrl_set_tcp_tickles(ctdb,
1920 CTDB_BROADCAST_CONNECTED,
1921 &vnn->public_address,
1924 DEBUG(DEBUG_ERR,("Failed to send the tickle update for public address %s\n",
1925 ctdb_addr_to_str(&vnn->public_address)));
1929 event_add_timed(ctdb->ev, ctdb->tickle_update_context,
1930 timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
1931 ctdb_update_tcp_tickles, ctdb);
1936 start periodic update of tcp tickles
1938 void ctdb_start_tcp_tickle_update(struct ctdb_context *ctdb)
1940 ctdb->tickle_update_context = talloc_new(ctdb);
1942 event_add_timed(ctdb->ev, ctdb->tickle_update_context,
1943 timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
1944 ctdb_update_tcp_tickles, ctdb);
1950 struct control_gratious_arp {
1951 struct ctdb_context *ctdb;
1952 ctdb_sock_addr addr;
1958 send a control_gratuitous arp
1960 static void send_gratious_arp(struct event_context *ev, struct timed_event *te,
1961 struct timeval t, void *private_data)
1964 struct control_gratious_arp *arp = talloc_get_type(private_data,
1965 struct control_gratious_arp);
1967 ret = ctdb_sys_send_arp(&arp->addr, arp->iface);
1969 DEBUG(DEBUG_ERR,(__location__ " sending of gratious arp failed (%s)\n", strerror(errno)));
1974 if (arp->count == CTDB_ARP_REPEAT) {
1979 event_add_timed(arp->ctdb->ev, arp,
1980 timeval_current_ofs(CTDB_ARP_INTERVAL, 0),
1981 send_gratious_arp, arp);
1988 int32_t ctdb_control_send_gratious_arp(struct ctdb_context *ctdb, TDB_DATA indata)
1990 struct ctdb_control_gratious_arp *gratious_arp = (struct ctdb_control_gratious_arp *)indata.dptr;
1991 struct control_gratious_arp *arp;
1993 /* verify the size of indata */
1994 if (indata.dsize < offsetof(struct ctdb_control_gratious_arp, iface)) {
1995 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_gratious_arp structure. Got %u require %u bytes\n",
1996 (unsigned)indata.dsize,
1997 (unsigned)offsetof(struct ctdb_control_gratious_arp, iface)));
2001 ( offsetof(struct ctdb_control_gratious_arp, iface)
2002 + gratious_arp->len ) ){
2004 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2005 "but should be %u bytes\n",
2006 (unsigned)indata.dsize,
2007 (unsigned)(offsetof(struct ctdb_control_gratious_arp, iface)+gratious_arp->len)));
2012 arp = talloc(ctdb, struct control_gratious_arp);
2013 CTDB_NO_MEMORY(ctdb, arp);
2016 arp->addr = gratious_arp->addr;
2017 arp->iface = talloc_strdup(arp, gratious_arp->iface);
2018 CTDB_NO_MEMORY(ctdb, arp->iface);
2021 event_add_timed(arp->ctdb->ev, arp,
2022 timeval_zero(), send_gratious_arp, arp);
2027 int32_t ctdb_control_add_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2029 struct ctdb_control_ip_iface *pub = (struct ctdb_control_ip_iface *)indata.dptr;
2032 /* verify the size of indata */
2033 if (indata.dsize < offsetof(struct ctdb_control_ip_iface, iface)) {
2034 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_ip_iface structure\n"));
2038 ( offsetof(struct ctdb_control_ip_iface, iface)
2041 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2042 "but should be %u bytes\n",
2043 (unsigned)indata.dsize,
2044 (unsigned)(offsetof(struct ctdb_control_ip_iface, iface)+pub->len)));
2048 ret = ctdb_add_public_address(ctdb, &pub->addr, pub->mask, &pub->iface[0]);
2051 DEBUG(DEBUG_ERR,(__location__ " Failed to add public address\n"));
2059 called when releaseip event finishes for del_public_address
2061 static void delete_ip_callback(struct ctdb_context *ctdb, int status,
2064 talloc_free(private_data);
2067 int32_t ctdb_control_del_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2069 struct ctdb_control_ip_iface *pub = (struct ctdb_control_ip_iface *)indata.dptr;
2070 struct ctdb_vnn *vnn;
2073 /* verify the size of indata */
2074 if (indata.dsize < offsetof(struct ctdb_control_ip_iface, iface)) {
2075 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_ip_iface structure\n"));
2079 ( offsetof(struct ctdb_control_ip_iface, iface)
2082 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2083 "but should be %u bytes\n",
2084 (unsigned)indata.dsize,
2085 (unsigned)(offsetof(struct ctdb_control_ip_iface, iface)+pub->len)));
2089 /* walk over all public addresses until we find a match */
2090 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2091 if (ctdb_same_ip(&vnn->public_address, &pub->addr)) {
2092 TALLOC_CTX *mem_ctx = talloc_new(ctdb);
2094 DLIST_REMOVE(ctdb->vnn, vnn);
2096 ret = ctdb_event_script_callback(ctdb,
2097 timeval_set(ctdb->tunable.script_timeout, 0),
2098 mem_ctx, delete_ip_callback, mem_ctx,
2099 "releaseip %s %s %u",
2101 talloc_strdup(mem_ctx, ctdb_addr_to_str(&vnn->public_address)),
2102 vnn->public_netmask_bits);