4 Copyright (C) Ronnie Sahlberg 2007
5 Copyright (C) Andrew Tridgell 2007
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, see <http://www.gnu.org/licenses/>.
21 #include "lib/events/events.h"
22 #include "lib/tdb/include/tdb.h"
23 #include "lib/util/dlinklist.h"
24 #include "system/network.h"
25 #include "system/filesys.h"
26 #include "system/wait.h"
27 #include "../include/ctdb_private.h"
28 #include "../common/rb_tree.h"
31 #define TAKEOVER_TIMEOUT() timeval_current_ofs(ctdb->tunable.takeover_timeout,0)
33 #define CTDB_ARP_INTERVAL 1
34 #define CTDB_ARP_REPEAT 3
36 struct ctdb_takeover_arp {
37 struct ctdb_context *ctdb;
40 struct ctdb_tcp_array *tcparray;
46 lists of tcp endpoints
48 struct ctdb_tcp_list {
49 struct ctdb_tcp_list *prev, *next;
50 struct ctdb_tcp_connection connection;
54 list of clients to kill on IP release
56 struct ctdb_client_ip {
57 struct ctdb_client_ip *prev, *next;
58 struct ctdb_context *ctdb;
67 static void ctdb_control_send_arp(struct event_context *ev, struct timed_event *te,
68 struct timeval t, void *private_data)
70 struct ctdb_takeover_arp *arp = talloc_get_type(private_data,
71 struct ctdb_takeover_arp);
73 struct ctdb_tcp_array *tcparray;
75 ret = ctdb_sys_send_arp(&arp->addr, arp->vnn->iface);
77 DEBUG(DEBUG_CRIT,(__location__ " sending of arp failed (%s)\n", strerror(errno)));
80 tcparray = arp->tcparray;
82 for (i=0;i<tcparray->num;i++) {
83 struct ctdb_tcp_connection *tcon;
85 tcon = &tcparray->connections[i];
86 DEBUG(DEBUG_INFO,("sending tcp tickle ack for %u->%s:%u\n",
87 (unsigned)ntohs(tcon->dst_addr.ip.sin_port),
88 ctdb_addr_to_str(&tcon->src_addr),
89 (unsigned)ntohs(tcon->src_addr.ip.sin_port)));
90 ret = ctdb_sys_send_tcp(
95 DEBUG(DEBUG_CRIT,(__location__ " Failed to send tcp tickle ack for %s\n",
96 ctdb_addr_to_str(&tcon->src_addr)));
103 if (arp->count == CTDB_ARP_REPEAT) {
108 event_add_timed(arp->ctdb->ev, arp->vnn->takeover_ctx,
109 timeval_current_ofs(CTDB_ARP_INTERVAL, 100000),
110 ctdb_control_send_arp, arp);
113 struct takeover_callback_state {
114 struct ctdb_req_control *c;
115 ctdb_sock_addr *addr;
116 struct ctdb_vnn *vnn;
120 called when takeip event finishes
122 static void takeover_ip_callback(struct ctdb_context *ctdb, int status,
125 struct takeover_callback_state *state =
126 talloc_get_type(private_data, struct takeover_callback_state);
127 struct ctdb_takeover_arp *arp;
128 struct ctdb_tcp_array *tcparray;
131 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
132 ctdb_addr_to_str(state->addr),
134 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
139 if (!state->vnn->takeover_ctx) {
140 state->vnn->takeover_ctx = talloc_new(state->vnn);
141 if (!state->vnn->takeover_ctx) {
146 arp = talloc_zero(state->vnn->takeover_ctx, struct ctdb_takeover_arp);
147 if (!arp) goto failed;
150 arp->addr = *state->addr;
151 arp->vnn = state->vnn;
153 tcparray = state->vnn->tcp_array;
155 /* add all of the known tcp connections for this IP to the
156 list of tcp connections to send tickle acks for */
157 arp->tcparray = talloc_steal(arp, tcparray);
159 state->vnn->tcp_array = NULL;
160 state->vnn->tcp_update_needed = true;
163 event_add_timed(arp->ctdb->ev, state->vnn->takeover_ctx,
164 timeval_zero(), ctdb_control_send_arp, arp);
166 /* the control succeeded */
167 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
172 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
178 Find the vnn of the node that has a public ip address
179 returns -1 if the address is not known as a public address
181 static struct ctdb_vnn *find_public_ip_vnn(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
183 struct ctdb_vnn *vnn;
185 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
186 if (ctdb_same_ip(&vnn->public_address, addr)) {
196 take over an ip address
198 int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb,
199 struct ctdb_req_control *c,
204 struct takeover_callback_state *state;
205 struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
206 struct ctdb_vnn *vnn;
208 /* update out vnn list */
209 vnn = find_public_ip_vnn(ctdb, &pip->addr);
211 DEBUG(DEBUG_INFO,("takeoverip called for an ip '%s' that is not a public address\n",
212 ctdb_addr_to_str(&pip->addr)));
217 /* if our kernel already has this IP, do nothing */
218 if (ctdb_sys_have_ip(&pip->addr)) {
222 state = talloc(vnn, struct takeover_callback_state);
223 CTDB_NO_MEMORY(ctdb, state);
225 state->c = talloc_steal(ctdb, c);
226 state->addr = talloc(ctdb, ctdb_sock_addr);
227 CTDB_NO_MEMORY(ctdb, state->addr);
229 *state->addr = pip->addr;
232 DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u on interface %s\n",
233 ctdb_addr_to_str(&pip->addr),
234 vnn->public_netmask_bits,
237 ret = ctdb_event_script_callback(ctdb,
238 state, takeover_ip_callback, state,
243 talloc_strdup(state, ctdb_addr_to_str(&pip->addr)),
244 vnn->public_netmask_bits);
247 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
248 ctdb_addr_to_str(&pip->addr),
254 /* tell ctdb_control.c that we will be replying asynchronously */
261 takeover an ip address old v4 style
263 int32_t ctdb_control_takeover_ipv4(struct ctdb_context *ctdb,
264 struct ctdb_req_control *c,
270 data.dsize = sizeof(struct ctdb_public_ip);
271 data.dptr = (uint8_t *)talloc_zero(c, struct ctdb_public_ip);
272 CTDB_NO_MEMORY(ctdb, data.dptr);
274 memcpy(data.dptr, indata.dptr, indata.dsize);
275 return ctdb_control_takeover_ip(ctdb, c, data, async_reply);
279 kill any clients that are registered with a IP that is being released
281 static void release_kill_clients(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
283 struct ctdb_client_ip *ip;
285 DEBUG(DEBUG_INFO,("release_kill_clients for ip %s\n",
286 ctdb_addr_to_str(addr)));
288 for (ip=ctdb->client_ip_list; ip; ip=ip->next) {
289 ctdb_sock_addr tmp_addr;
292 DEBUG(DEBUG_INFO,("checking for client %u with IP %s\n",
294 ctdb_addr_to_str(&ip->addr)));
296 if (ctdb_same_ip(&tmp_addr, addr)) {
297 struct ctdb_client *client = ctdb_reqid_find(ctdb,
300 DEBUG(DEBUG_INFO,("matched client %u with IP %s and pid %u\n",
302 ctdb_addr_to_str(&ip->addr),
305 if (client->pid != 0) {
306 DEBUG(DEBUG_INFO,(__location__ " Killing client pid %u for IP %s on client_id %u\n",
307 (unsigned)client->pid,
308 ctdb_addr_to_str(addr),
310 kill(client->pid, SIGKILL);
317 called when releaseip event finishes
319 static void release_ip_callback(struct ctdb_context *ctdb, int status,
322 struct takeover_callback_state *state =
323 talloc_get_type(private_data, struct takeover_callback_state);
326 /* send a message to all clients of this node telling them
327 that the cluster has been reconfigured and they should
328 release any sockets on this IP */
329 data.dptr = (uint8_t *)talloc_strdup(state, ctdb_addr_to_str(state->addr));
330 CTDB_NO_MEMORY_VOID(ctdb, data.dptr);
331 data.dsize = strlen((char *)data.dptr)+1;
333 DEBUG(DEBUG_INFO,(__location__ " sending RELEASE_IP for '%s'\n", data.dptr));
335 ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_RELEASE_IP, data);
337 /* kill clients that have registered with this IP */
338 release_kill_clients(ctdb, state->addr);
340 /* the control succeeded */
341 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
346 release an ip address
348 int32_t ctdb_control_release_ip(struct ctdb_context *ctdb,
349 struct ctdb_req_control *c,
354 struct takeover_callback_state *state;
355 struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
356 struct ctdb_vnn *vnn;
358 /* update our vnn list */
359 vnn = find_public_ip_vnn(ctdb, &pip->addr);
361 DEBUG(DEBUG_INFO,("releaseip called for an ip '%s' that is not a public address\n",
362 ctdb_addr_to_str(&pip->addr)));
367 /* stop any previous arps */
368 talloc_free(vnn->takeover_ctx);
369 vnn->takeover_ctx = NULL;
371 if (!ctdb_sys_have_ip(&pip->addr)) {
372 DEBUG(DEBUG_NOTICE,("Redundant release of IP %s/%u on interface %s (ip not held)\n",
373 ctdb_addr_to_str(&pip->addr),
374 vnn->public_netmask_bits,
379 DEBUG(DEBUG_NOTICE,("Release of IP %s/%u on interface %s node:%u\n",
380 ctdb_addr_to_str(&pip->addr),
381 vnn->public_netmask_bits,
385 state = talloc(ctdb, struct takeover_callback_state);
386 CTDB_NO_MEMORY(ctdb, state);
388 state->c = talloc_steal(state, c);
389 state->addr = talloc(state, ctdb_sock_addr);
390 CTDB_NO_MEMORY(ctdb, state->addr);
391 *state->addr = pip->addr;
394 ret = ctdb_event_script_callback(ctdb,
395 state, release_ip_callback, state,
397 CTDB_EVENT_RELEASE_IP,
400 talloc_strdup(state, ctdb_addr_to_str(&pip->addr)),
401 vnn->public_netmask_bits);
403 DEBUG(DEBUG_ERR,(__location__ " Failed to release IP %s on interface %s\n",
404 ctdb_addr_to_str(&pip->addr),
410 /* tell the control that we will be reply asynchronously */
416 release an ip address old v4 style
418 int32_t ctdb_control_release_ipv4(struct ctdb_context *ctdb,
419 struct ctdb_req_control *c,
425 data.dsize = sizeof(struct ctdb_public_ip);
426 data.dptr = (uint8_t *)talloc_zero(c, struct ctdb_public_ip);
427 CTDB_NO_MEMORY(ctdb, data.dptr);
429 memcpy(data.dptr, indata.dptr, indata.dsize);
430 return ctdb_control_release_ip(ctdb, c, data, async_reply);
434 static int ctdb_add_public_address(struct ctdb_context *ctdb, ctdb_sock_addr *addr, unsigned mask, const char *iface)
436 struct ctdb_vnn *vnn;
438 /* Verify that we dont have an entry for this ip yet */
439 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
440 if (ctdb_same_sockaddr(addr, &vnn->public_address)) {
441 DEBUG(DEBUG_CRIT,("Same ip '%s' specified multiple times in the public address list \n",
442 ctdb_addr_to_str(addr)));
447 /* create a new vnn structure for this ip address */
448 vnn = talloc_zero(ctdb, struct ctdb_vnn);
449 CTDB_NO_MEMORY_FATAL(ctdb, vnn);
450 vnn->iface = talloc_strdup(vnn, iface);
451 CTDB_NO_MEMORY(ctdb, vnn->iface);
452 vnn->public_address = *addr;
453 vnn->public_netmask_bits = mask;
456 DLIST_ADD(ctdb->vnn, vnn);
463 setup the event script directory
465 int ctdb_set_event_script_dir(struct ctdb_context *ctdb, const char *script_dir)
467 ctdb->event_script_dir = talloc_strdup(ctdb, script_dir);
468 CTDB_NO_MEMORY(ctdb, ctdb->event_script_dir);
473 setup the public address lists from a file
475 int ctdb_set_public_addresses(struct ctdb_context *ctdb, const char *alist)
481 lines = file_lines_load(alist, &nlines, ctdb);
483 ctdb_set_error(ctdb, "Failed to load public address list '%s'\n", alist);
486 while (nlines > 0 && strcmp(lines[nlines-1], "") == 0) {
490 for (i=0;i<nlines;i++) {
498 while ((*line == ' ') || (*line == '\t')) {
504 if (strcmp(line, "") == 0) {
507 tok = strtok(line, " \t");
509 tok = strtok(NULL, " \t");
511 if (NULL == ctdb->default_public_interface) {
512 DEBUG(DEBUG_CRIT,("No default public interface and no interface specified at line %u of public address list\n",
517 iface = ctdb->default_public_interface;
522 if (!addrstr || !parse_ip_mask(addrstr, iface, &addr, &mask)) {
523 DEBUG(DEBUG_CRIT,("Badly formed line %u in public address list\n", i+1));
527 if (ctdb_add_public_address(ctdb, &addr, mask, iface)) {
528 DEBUG(DEBUG_CRIT,("Failed to add line %u to the public address list\n", i+1));
541 struct ctdb_public_ip_list {
542 struct ctdb_public_ip_list *next;
548 /* Given a physical node, return the number of
549 public addresses that is currently assigned to this node.
551 static int node_ip_coverage(struct ctdb_context *ctdb,
553 struct ctdb_public_ip_list *ips)
557 for (;ips;ips=ips->next) {
558 if (ips->pnn == pnn) {
566 /* Check if this is a public ip known to the node, i.e. can that
567 node takeover this ip ?
569 static int can_node_serve_ip(struct ctdb_context *ctdb, int32_t pnn,
570 struct ctdb_public_ip_list *ip)
572 struct ctdb_all_public_ips *public_ips;
575 public_ips = ctdb->nodes[pnn]->public_ips;
577 if (public_ips == NULL) {
581 for (i=0;i<public_ips->num;i++) {
582 if (ctdb_same_ip(&ip->addr, &public_ips->ips[i].addr)) {
583 /* yes, this node can serve this public ip */
592 /* search the node lists list for a node to takeover this ip.
593 pick the node that currently are serving the least number of ips
594 so that the ips get spread out evenly.
596 static int find_takeover_node(struct ctdb_context *ctdb,
597 struct ctdb_node_map *nodemap, uint32_t mask,
598 struct ctdb_public_ip_list *ip,
599 struct ctdb_public_ip_list *all_ips)
605 for (i=0;i<nodemap->num;i++) {
606 if (nodemap->nodes[i].flags & mask) {
607 /* This node is not healty and can not be used to serve
613 /* verify that this node can serve this ip */
614 if (can_node_serve_ip(ctdb, i, ip)) {
615 /* no it couldnt so skip to the next node */
619 num = node_ip_coverage(ctdb, i, all_ips);
620 /* was this the first node we checked ? */
632 DEBUG(DEBUG_WARNING,(__location__ " Could not find node to take over public address '%s'\n",
633 ctdb_addr_to_str(&ip->addr)));
643 static uint32_t *ip_key(ctdb_sock_addr *ip)
645 static uint32_t key[IP_KEYLEN];
647 bzero(key, sizeof(key));
649 switch (ip->sa.sa_family) {
651 key[3] = htonl(ip->ip.sin_addr.s_addr);
654 key[0] = htonl(ip->ip6.sin6_addr.s6_addr32[0]);
655 key[1] = htonl(ip->ip6.sin6_addr.s6_addr32[1]);
656 key[2] = htonl(ip->ip6.sin6_addr.s6_addr32[2]);
657 key[3] = htonl(ip->ip6.sin6_addr.s6_addr32[3]);
660 DEBUG(DEBUG_ERR, (__location__ " ERROR, unknown family passed :%u\n", ip->sa.sa_family));
667 static void *add_ip_callback(void *parm, void *data)
672 void getips_count_callback(void *param, void *data)
674 struct ctdb_public_ip_list **ip_list = (struct ctdb_public_ip_list **)param;
675 struct ctdb_public_ip_list *new_ip = (struct ctdb_public_ip_list *)data;
677 new_ip->next = *ip_list;
681 struct ctdb_public_ip_list *
682 create_merged_ip_list(struct ctdb_context *ctdb, TALLOC_CTX *tmp_ctx)
685 struct ctdb_public_ip_list *ip_list;
686 struct ctdb_all_public_ips *public_ips;
687 trbt_tree_t *ip_tree;
689 ip_tree = trbt_create(tmp_ctx, 0);
691 for (i=0;i<ctdb->num_nodes;i++) {
692 public_ips = ctdb->nodes[i]->public_ips;
694 if (ctdb->nodes[i]->flags & NODE_FLAGS_DELETED) {
698 /* there were no public ips for this node */
699 if (public_ips == NULL) {
703 for (j=0;j<public_ips->num;j++) {
704 struct ctdb_public_ip_list *tmp_ip;
706 tmp_ip = talloc_zero(tmp_ctx, struct ctdb_public_ip_list);
707 CTDB_NO_MEMORY_NULL(ctdb, tmp_ip);
708 tmp_ip->pnn = public_ips->ips[j].pnn;
709 tmp_ip->addr = public_ips->ips[j].addr;
712 trbt_insertarray32_callback(ip_tree,
713 IP_KEYLEN, ip_key(&public_ips->ips[j].addr),
720 trbt_traversearray32(ip_tree, IP_KEYLEN, getips_count_callback, &ip_list);
726 make any IP alias changes for public addresses that are necessary
728 int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap)
730 int i, num_healthy, retries;
731 struct ctdb_public_ip ip;
732 struct ctdb_public_ipv4 ipv4;
734 struct ctdb_public_ip_list *all_ips, *tmp_ip;
735 int maxnode, maxnum=0, minnode, minnum=0, num;
737 struct timeval timeout;
738 struct client_async_data *async_data;
739 struct ctdb_client_control_state *state;
740 TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
745 /* Count how many completely healthy nodes we have */
747 for (i=0;i<nodemap->num;i++) {
748 if (!(nodemap->nodes[i].flags & (NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED))) {
753 if (num_healthy > 0) {
754 /* We have healthy nodes, so only consider them for
755 serving public addresses
757 mask = NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED;
759 /* We didnt have any completely healthy nodes so
760 use "disabled" nodes as a fallback
762 mask = NODE_FLAGS_INACTIVE;
765 /* since nodes only know about those public addresses that
766 can be served by that particular node, no single node has
767 a full list of all public addresses that exist in the cluster.
768 Walk over all node structures and create a merged list of
769 all public addresses that exist in the cluster.
771 all_ips = create_merged_ip_list(ctdb, tmp_ctx);
773 /* If we want deterministic ip allocations, i.e. that the ip addresses
774 will always be allocated the same way for a specific set of
775 available/unavailable nodes.
777 if (1 == ctdb->tunable.deterministic_public_ips) {
778 DEBUG(DEBUG_NOTICE,("Deterministic IPs enabled. Resetting all ip allocations\n"));
779 for (i=0,tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next,i++) {
780 tmp_ip->pnn = i%nodemap->num;
785 /* mark all public addresses with a masked node as being served by
788 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
789 if (tmp_ip->pnn == -1) {
792 if (nodemap->nodes[tmp_ip->pnn].flags & mask) {
797 /* verify that the assigned nodes can serve that public ip
798 and set it to -1 if not
800 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
801 if (tmp_ip->pnn == -1) {
804 if (can_node_serve_ip(ctdb, tmp_ip->pnn, tmp_ip) != 0) {
805 /* this node can not serve this ip. */
811 /* now we must redistribute all public addresses with takeover node
812 -1 among the nodes available
816 /* loop over all ip's and find a physical node to cover for
819 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
820 if (tmp_ip->pnn == -1) {
821 if (find_takeover_node(ctdb, nodemap, mask, tmp_ip, all_ips)) {
822 DEBUG(DEBUG_WARNING,("Failed to find node to cover ip %s\n",
823 ctdb_addr_to_str(&tmp_ip->addr)));
828 /* If we dont want ips to fail back after a node becomes healthy
829 again, we wont even try to reallocat the ip addresses so that
830 they are evenly spread out.
831 This can NOT be used at the same time as DeterministicIPs !
833 if (1 == ctdb->tunable.no_ip_failback) {
834 if (1 == ctdb->tunable.deterministic_public_ips) {
835 DEBUG(DEBUG_ERR, ("ERROR: You can not use 'DeterministicIPs' and 'NoIPFailback' at the same time\n"));
841 /* now, try to make sure the ip adresses are evenly distributed
843 for each ip address, loop over all nodes that can serve this
844 ip and make sure that the difference between the node
845 serving the most and the node serving the least ip's are not greater
848 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
849 if (tmp_ip->pnn == -1) {
853 /* Get the highest and lowest number of ips's served by any
854 valid node which can serve this ip.
858 for (i=0;i<nodemap->num;i++) {
859 if (nodemap->nodes[i].flags & mask) {
863 /* only check nodes that can actually serve this ip */
864 if (can_node_serve_ip(ctdb, i, tmp_ip)) {
865 /* no it couldnt so skip to the next node */
869 num = node_ip_coverage(ctdb, i, all_ips);
890 DEBUG(DEBUG_WARNING,(__location__ " Could not find maxnode. May not be able to serve ip '%s'\n",
891 ctdb_addr_to_str(&tmp_ip->addr)));
896 /* If we want deterministic IPs then dont try to reallocate
897 them to spread out the load.
899 if (1 == ctdb->tunable.deterministic_public_ips) {
903 /* if the spread between the smallest and largest coverage by
904 a node is >=2 we steal one of the ips from the node with
905 most coverage to even things out a bit.
906 try to do this at most 5 times since we dont want to spend
907 too much time balancing the ip coverage.
909 if ( (maxnum > minnum+1)
911 struct ctdb_public_ip_list *tmp;
913 /* mark one of maxnode's vnn's as unassigned and try
916 for (tmp=all_ips;tmp;tmp=tmp->next) {
917 if (tmp->pnn == maxnode) {
927 /* finished distributing the public addresses, now just send the
928 info out to the nodes
932 /* at this point ->pnn is the node which will own each IP
933 or -1 if there is no node that can cover this ip
936 /* now tell all nodes to delete any alias that they should not
937 have. This will be a NOOP on nodes that don't currently
938 hold the given alias */
939 async_data = talloc_zero(tmp_ctx, struct client_async_data);
940 CTDB_NO_MEMORY_FATAL(ctdb, async_data);
942 for (i=0;i<nodemap->num;i++) {
943 /* don't talk to unconnected nodes, but do talk to banned nodes */
944 if (nodemap->nodes[i].flags & NODE_FLAGS_DISCONNECTED) {
948 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
949 if (tmp_ip->pnn == nodemap->nodes[i].pnn) {
950 /* This node should be serving this
951 vnn so dont tell it to release the ip
955 if (tmp_ip->addr.sa.sa_family == AF_INET) {
956 ipv4.pnn = tmp_ip->pnn;
957 ipv4.sin = tmp_ip->addr.ip;
959 timeout = TAKEOVER_TIMEOUT();
960 data.dsize = sizeof(ipv4);
961 data.dptr = (uint8_t *)&ipv4;
962 state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
963 0, CTDB_CONTROL_RELEASE_IPv4, 0,
967 ip.pnn = tmp_ip->pnn;
968 ip.addr = tmp_ip->addr;
970 timeout = TAKEOVER_TIMEOUT();
971 data.dsize = sizeof(ip);
972 data.dptr = (uint8_t *)&ip;
973 state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
974 0, CTDB_CONTROL_RELEASE_IP, 0,
980 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_RELEASE_IP to node %u\n", nodemap->nodes[i].pnn));
981 talloc_free(tmp_ctx);
985 ctdb_client_async_add(async_data, state);
988 if (ctdb_client_async_wait(ctdb, async_data) != 0) {
989 DEBUG(DEBUG_ERR,(__location__ " Async control CTDB_CONTROL_RELEASE_IP failed\n"));
990 talloc_free(tmp_ctx);
993 talloc_free(async_data);
996 /* tell all nodes to get their own IPs */
997 async_data = talloc_zero(tmp_ctx, struct client_async_data);
998 CTDB_NO_MEMORY_FATAL(ctdb, async_data);
999 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1000 if (tmp_ip->pnn == -1) {
1001 /* this IP won't be taken over */
1005 if (tmp_ip->addr.sa.sa_family == AF_INET) {
1006 ipv4.pnn = tmp_ip->pnn;
1007 ipv4.sin = tmp_ip->addr.ip;
1009 timeout = TAKEOVER_TIMEOUT();
1010 data.dsize = sizeof(ipv4);
1011 data.dptr = (uint8_t *)&ipv4;
1012 state = ctdb_control_send(ctdb, tmp_ip->pnn,
1013 0, CTDB_CONTROL_TAKEOVER_IPv4, 0,
1017 ip.pnn = tmp_ip->pnn;
1018 ip.addr = tmp_ip->addr;
1020 timeout = TAKEOVER_TIMEOUT();
1021 data.dsize = sizeof(ip);
1022 data.dptr = (uint8_t *)&ip;
1023 state = ctdb_control_send(ctdb, tmp_ip->pnn,
1024 0, CTDB_CONTROL_TAKEOVER_IP, 0,
1028 if (state == NULL) {
1029 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_TAKEOVER_IP to node %u\n", tmp_ip->pnn));
1030 talloc_free(tmp_ctx);
1034 ctdb_client_async_add(async_data, state);
1036 if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1037 DEBUG(DEBUG_ERR,(__location__ " Async control CTDB_CONTROL_TAKEOVER_IP failed\n"));
1038 talloc_free(tmp_ctx);
1042 talloc_free(tmp_ctx);
1048 destroy a ctdb_client_ip structure
1050 static int ctdb_client_ip_destructor(struct ctdb_client_ip *ip)
1052 DEBUG(DEBUG_DEBUG,("destroying client tcp for %s:%u (client_id %u)\n",
1053 ctdb_addr_to_str(&ip->addr),
1054 ntohs(ip->addr.ip.sin_port),
1057 DLIST_REMOVE(ip->ctdb->client_ip_list, ip);
1062 called by a client to inform us of a TCP connection that it is managing
1063 that should tickled with an ACK when IP takeover is done
1064 we handle both the old ipv4 style of packets as well as the new ipv4/6
1067 int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id,
1070 struct ctdb_client *client = ctdb_reqid_find(ctdb, client_id, struct ctdb_client);
1071 struct ctdb_control_tcp *old_addr = NULL;
1072 struct ctdb_control_tcp_addr new_addr;
1073 struct ctdb_control_tcp_addr *tcp_sock = NULL;
1074 struct ctdb_tcp_list *tcp;
1075 struct ctdb_control_tcp_vnn t;
1078 struct ctdb_client_ip *ip;
1079 struct ctdb_vnn *vnn;
1080 ctdb_sock_addr addr;
1082 switch (indata.dsize) {
1083 case sizeof(struct ctdb_control_tcp):
1084 old_addr = (struct ctdb_control_tcp *)indata.dptr;
1085 ZERO_STRUCT(new_addr);
1086 tcp_sock = &new_addr;
1087 tcp_sock->src.ip = old_addr->src;
1088 tcp_sock->dest.ip = old_addr->dest;
1090 case sizeof(struct ctdb_control_tcp_addr):
1091 tcp_sock = (struct ctdb_control_tcp_addr *)indata.dptr;
1094 DEBUG(DEBUG_ERR,(__location__ " Invalid data structure passed "
1095 "to ctdb_control_tcp_client. size was %d but "
1096 "only allowed sizes are %lu and %lu\n",
1098 (long unsigned)sizeof(struct ctdb_control_tcp),
1099 (long unsigned)sizeof(struct ctdb_control_tcp_addr)));
1103 addr = tcp_sock->src;
1104 ctdb_canonicalize_ip(&addr, &tcp_sock->src);
1105 addr = tcp_sock->dest;
1106 ctdb_canonicalize_ip(&addr, &tcp_sock->dest);
1109 memcpy(&addr, &tcp_sock->dest, sizeof(addr));
1110 vnn = find_public_ip_vnn(ctdb, &addr);
1112 switch (addr.sa.sa_family) {
1114 if (ntohl(addr.ip.sin_addr.s_addr) != INADDR_LOOPBACK) {
1115 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public address.\n",
1116 ctdb_addr_to_str(&addr)));
1120 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public ipv6 address.\n",
1121 ctdb_addr_to_str(&addr)));
1124 DEBUG(DEBUG_ERR,(__location__ " Unknown family type %d\n", addr.sa.sa_family));
1130 if (vnn->pnn != ctdb->pnn) {
1131 DEBUG(DEBUG_ERR,("Attempt to register tcp client for IP %s we don't hold - failing (client_id %u pid %u)\n",
1132 ctdb_addr_to_str(&addr),
1133 client_id, client->pid));
1134 /* failing this call will tell smbd to die */
1138 ip = talloc(client, struct ctdb_client_ip);
1139 CTDB_NO_MEMORY(ctdb, ip);
1143 ip->client_id = client_id;
1144 talloc_set_destructor(ip, ctdb_client_ip_destructor);
1145 DLIST_ADD(ctdb->client_ip_list, ip);
1147 tcp = talloc(client, struct ctdb_tcp_list);
1148 CTDB_NO_MEMORY(ctdb, tcp);
1150 tcp->connection.src_addr = tcp_sock->src;
1151 tcp->connection.dst_addr = tcp_sock->dest;
1153 DLIST_ADD(client->tcp_list, tcp);
1155 t.src = tcp_sock->src;
1156 t.dest = tcp_sock->dest;
1158 data.dptr = (uint8_t *)&t;
1159 data.dsize = sizeof(t);
1161 switch (addr.sa.sa_family) {
1163 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1164 (unsigned)ntohs(tcp_sock->dest.ip.sin_port),
1165 ctdb_addr_to_str(&tcp_sock->src),
1166 (unsigned)ntohs(tcp_sock->src.ip.sin_port), client_id, client->pid));
1169 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1170 (unsigned)ntohs(tcp_sock->dest.ip6.sin6_port),
1171 ctdb_addr_to_str(&tcp_sock->src),
1172 (unsigned)ntohs(tcp_sock->src.ip6.sin6_port), client_id, client->pid));
1175 DEBUG(DEBUG_ERR,(__location__ " Unknown family %d\n", addr.sa.sa_family));
1179 /* tell all nodes about this tcp connection */
1180 ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0,
1181 CTDB_CONTROL_TCP_ADD,
1182 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
1184 DEBUG(DEBUG_ERR,(__location__ " Failed to send CTDB_CONTROL_TCP_ADD\n"));
1192 find a tcp address on a list
1194 static struct ctdb_tcp_connection *ctdb_tcp_find(struct ctdb_tcp_array *array,
1195 struct ctdb_tcp_connection *tcp)
1199 if (array == NULL) {
1203 for (i=0;i<array->num;i++) {
1204 if (ctdb_same_sockaddr(&array->connections[i].src_addr, &tcp->src_addr) &&
1205 ctdb_same_sockaddr(&array->connections[i].dst_addr, &tcp->dst_addr)) {
1206 return &array->connections[i];
1213 called by a daemon to inform us of a TCP connection that one of its
1214 clients managing that should tickled with an ACK when IP takeover is
1217 int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata)
1219 struct ctdb_control_tcp_vnn *p = (struct ctdb_control_tcp_vnn *)indata.dptr;
1220 struct ctdb_tcp_array *tcparray;
1221 struct ctdb_tcp_connection tcp;
1222 struct ctdb_vnn *vnn;
1224 vnn = find_public_ip_vnn(ctdb, &p->dest);
1226 DEBUG(DEBUG_INFO,(__location__ " got TCP_ADD control for an address which is not a public address '%s'\n",
1227 ctdb_addr_to_str(&p->dest)));
1233 tcparray = vnn->tcp_array;
1235 /* If this is the first tickle */
1236 if (tcparray == NULL) {
1237 tcparray = talloc_size(ctdb->nodes,
1238 offsetof(struct ctdb_tcp_array, connections) +
1239 sizeof(struct ctdb_tcp_connection) * 1);
1240 CTDB_NO_MEMORY(ctdb, tcparray);
1241 vnn->tcp_array = tcparray;
1244 tcparray->connections = talloc_size(tcparray, sizeof(struct ctdb_tcp_connection));
1245 CTDB_NO_MEMORY(ctdb, tcparray->connections);
1247 tcparray->connections[tcparray->num].src_addr = p->src;
1248 tcparray->connections[tcparray->num].dst_addr = p->dest;
1254 /* Do we already have this tickle ?*/
1255 tcp.src_addr = p->src;
1256 tcp.dst_addr = p->dest;
1257 if (ctdb_tcp_find(vnn->tcp_array, &tcp) != NULL) {
1258 DEBUG(DEBUG_DEBUG,("Already had tickle info for %s:%u for vnn:%u\n",
1259 ctdb_addr_to_str(&tcp.dst_addr),
1260 ntohs(tcp.dst_addr.ip.sin_port),
1265 /* A new tickle, we must add it to the array */
1266 tcparray->connections = talloc_realloc(tcparray, tcparray->connections,
1267 struct ctdb_tcp_connection,
1269 CTDB_NO_MEMORY(ctdb, tcparray->connections);
1271 vnn->tcp_array = tcparray;
1272 tcparray->connections[tcparray->num].src_addr = p->src;
1273 tcparray->connections[tcparray->num].dst_addr = p->dest;
1276 DEBUG(DEBUG_INFO,("Added tickle info for %s:%u from vnn %u\n",
1277 ctdb_addr_to_str(&tcp.dst_addr),
1278 ntohs(tcp.dst_addr.ip.sin_port),
1286 called by a daemon to inform us of a TCP connection that one of its
1287 clients managing that should tickled with an ACK when IP takeover is
1290 static void ctdb_remove_tcp_connection(struct ctdb_context *ctdb, struct ctdb_tcp_connection *conn)
1292 struct ctdb_tcp_connection *tcpp;
1293 struct ctdb_vnn *vnn = find_public_ip_vnn(ctdb, &conn->dst_addr);
1296 DEBUG(DEBUG_ERR,(__location__ " unable to find public address %s\n",
1297 ctdb_addr_to_str(&conn->dst_addr)));
1301 /* if the array is empty we cant remove it
1302 and we dont need to do anything
1304 if (vnn->tcp_array == NULL) {
1305 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist (array is empty) %s:%u\n",
1306 ctdb_addr_to_str(&conn->dst_addr),
1307 ntohs(conn->dst_addr.ip.sin_port)));
1312 /* See if we know this connection
1313 if we dont know this connection then we dont need to do anything
1315 tcpp = ctdb_tcp_find(vnn->tcp_array, conn);
1317 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist %s:%u\n",
1318 ctdb_addr_to_str(&conn->dst_addr),
1319 ntohs(conn->dst_addr.ip.sin_port)));
1324 /* We need to remove this entry from the array.
1325 Instead of allocating a new array and copying data to it
1326 we cheat and just copy the last entry in the existing array
1327 to the entry that is to be removed and just shring the
1330 *tcpp = vnn->tcp_array->connections[vnn->tcp_array->num - 1];
1331 vnn->tcp_array->num--;
1333 /* If we deleted the last entry we also need to remove the entire array
1335 if (vnn->tcp_array->num == 0) {
1336 talloc_free(vnn->tcp_array);
1337 vnn->tcp_array = NULL;
1340 vnn->tcp_update_needed = true;
1342 DEBUG(DEBUG_INFO,("Removed tickle info for %s:%u\n",
1343 ctdb_addr_to_str(&conn->src_addr),
1344 ntohs(conn->src_addr.ip.sin_port)));
1349 called when a daemon restarts - send all tickes for all public addresses
1350 we are serving immediately to the new node.
1352 int32_t ctdb_control_startup(struct ctdb_context *ctdb, uint32_t vnn)
1354 /*XXX here we should send all tickes we are serving to the new node */
1360 called when a client structure goes away - hook to remove
1361 elements from the tcp_list in all daemons
1363 void ctdb_takeover_client_destructor_hook(struct ctdb_client *client)
1365 while (client->tcp_list) {
1366 struct ctdb_tcp_list *tcp = client->tcp_list;
1367 DLIST_REMOVE(client->tcp_list, tcp);
1368 ctdb_remove_tcp_connection(client->ctdb, &tcp->connection);
1374 release all IPs on shutdown
1376 void ctdb_release_all_ips(struct ctdb_context *ctdb)
1378 struct ctdb_vnn *vnn;
1380 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1381 if (!ctdb_sys_have_ip(&vnn->public_address)) {
1384 if (vnn->pnn == ctdb->pnn) {
1387 ctdb_event_script_args(ctdb, CTDB_EVENT_RELEASE_IP, "%s %s %u",
1389 talloc_strdup(ctdb, ctdb_addr_to_str(&vnn->public_address)),
1390 vnn->public_netmask_bits);
1391 release_kill_clients(ctdb, &vnn->public_address);
1397 get list of public IPs
1399 int32_t ctdb_control_get_public_ips(struct ctdb_context *ctdb,
1400 struct ctdb_req_control *c, TDB_DATA *outdata)
1403 struct ctdb_all_public_ips *ips;
1404 struct ctdb_vnn *vnn;
1406 /* count how many public ip structures we have */
1408 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1412 len = offsetof(struct ctdb_all_public_ips, ips) +
1413 num*sizeof(struct ctdb_public_ip);
1414 ips = talloc_zero_size(outdata, len);
1415 CTDB_NO_MEMORY(ctdb, ips);
1417 outdata->dsize = len;
1418 outdata->dptr = (uint8_t *)ips;
1422 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1423 ips->ips[i].pnn = vnn->pnn;
1424 ips->ips[i].addr = vnn->public_address;
1433 get list of public IPs, old ipv4 style. only returns ipv4 addresses
1435 int32_t ctdb_control_get_public_ipsv4(struct ctdb_context *ctdb,
1436 struct ctdb_req_control *c, TDB_DATA *outdata)
1439 struct ctdb_all_public_ipsv4 *ips;
1440 struct ctdb_vnn *vnn;
1442 /* count how many public ip structures we have */
1444 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1445 if (vnn->public_address.sa.sa_family != AF_INET) {
1451 len = offsetof(struct ctdb_all_public_ipsv4, ips) +
1452 num*sizeof(struct ctdb_public_ipv4);
1453 ips = talloc_zero_size(outdata, len);
1454 CTDB_NO_MEMORY(ctdb, ips);
1456 outdata->dsize = len;
1457 outdata->dptr = (uint8_t *)ips;
1461 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1462 if (vnn->public_address.sa.sa_family != AF_INET) {
1465 ips->ips[i].pnn = vnn->pnn;
1466 ips->ips[i].sin = vnn->public_address.ip;
1475 structure containing the listening socket and the list of tcp connections
1476 that the ctdb daemon is to kill
1478 struct ctdb_kill_tcp {
1479 struct ctdb_vnn *vnn;
1480 struct ctdb_context *ctdb;
1482 struct fd_event *fde;
1483 trbt_tree_t *connections;
1488 a tcp connection that is to be killed
1490 struct ctdb_killtcp_con {
1491 ctdb_sock_addr src_addr;
1492 ctdb_sock_addr dst_addr;
1494 struct ctdb_kill_tcp *killtcp;
1497 /* this function is used to create a key to represent this socketpair
1498 in the killtcp tree.
1499 this key is used to insert and lookup matching socketpairs that are
1500 to be tickled and RST
1502 #define KILLTCP_KEYLEN 10
1503 static uint32_t *killtcp_key(ctdb_sock_addr *src, ctdb_sock_addr *dst)
1505 static uint32_t key[KILLTCP_KEYLEN];
1507 bzero(key, sizeof(key));
1509 if (src->sa.sa_family != dst->sa.sa_family) {
1510 DEBUG(DEBUG_ERR, (__location__ " ERROR, different families passed :%u vs %u\n", src->sa.sa_family, dst->sa.sa_family));
1514 switch (src->sa.sa_family) {
1516 key[0] = dst->ip.sin_addr.s_addr;
1517 key[1] = src->ip.sin_addr.s_addr;
1518 key[2] = dst->ip.sin_port;
1519 key[3] = src->ip.sin_port;
1522 key[0] = dst->ip6.sin6_addr.s6_addr32[3];
1523 key[1] = src->ip6.sin6_addr.s6_addr32[3];
1524 key[2] = dst->ip6.sin6_addr.s6_addr32[2];
1525 key[3] = src->ip6.sin6_addr.s6_addr32[2];
1526 key[4] = dst->ip6.sin6_addr.s6_addr32[1];
1527 key[5] = src->ip6.sin6_addr.s6_addr32[1];
1528 key[6] = dst->ip6.sin6_addr.s6_addr32[0];
1529 key[7] = src->ip6.sin6_addr.s6_addr32[0];
1530 key[8] = dst->ip6.sin6_port;
1531 key[9] = src->ip6.sin6_port;
1534 DEBUG(DEBUG_ERR, (__location__ " ERROR, unknown family passed :%u\n", src->sa.sa_family));
1542 called when we get a read event on the raw socket
1544 static void capture_tcp_handler(struct event_context *ev, struct fd_event *fde,
1545 uint16_t flags, void *private_data)
1547 struct ctdb_kill_tcp *killtcp = talloc_get_type(private_data, struct ctdb_kill_tcp);
1548 struct ctdb_killtcp_con *con;
1549 ctdb_sock_addr src, dst;
1550 uint32_t ack_seq, seq;
1552 if (!(flags & EVENT_FD_READ)) {
1556 if (ctdb_sys_read_tcp_packet(killtcp->capture_fd,
1557 killtcp->private_data,
1559 &ack_seq, &seq) != 0) {
1560 /* probably a non-tcp ACK packet */
1564 /* check if we have this guy in our list of connections
1567 con = trbt_lookuparray32(killtcp->connections,
1568 KILLTCP_KEYLEN, killtcp_key(&src, &dst));
1570 /* no this was some other packet we can just ignore */
1574 /* This one has been tickled !
1575 now reset him and remove him from the list.
1577 DEBUG(DEBUG_INFO, ("sending a tcp reset to kill connection :%d -> %s:%d\n",
1578 ntohs(con->dst_addr.ip.sin_port),
1579 ctdb_addr_to_str(&con->src_addr),
1580 ntohs(con->src_addr.ip.sin_port)));
1582 ctdb_sys_send_tcp(&con->dst_addr, &con->src_addr, ack_seq, seq, 1);
1587 /* when traversing the list of all tcp connections to send tickle acks to
1588 (so that we can capture the ack coming back and kill the connection
1590 this callback is called for each connection we are currently trying to kill
1592 static void tickle_connection_traverse(void *param, void *data)
1594 struct ctdb_killtcp_con *con = talloc_get_type(data, struct ctdb_killtcp_con);
1596 /* have tried too many times, just give up */
1597 if (con->count >= 5) {
1602 /* othervise, try tickling it again */
1605 (ctdb_sock_addr *)&con->dst_addr,
1606 (ctdb_sock_addr *)&con->src_addr,
1612 called every second until all sentenced connections have been reset
1614 static void ctdb_tickle_sentenced_connections(struct event_context *ev, struct timed_event *te,
1615 struct timeval t, void *private_data)
1617 struct ctdb_kill_tcp *killtcp = talloc_get_type(private_data, struct ctdb_kill_tcp);
1620 /* loop over all connections sending tickle ACKs */
1621 trbt_traversearray32(killtcp->connections, KILLTCP_KEYLEN, tickle_connection_traverse, NULL);
1624 /* If there are no more connections to kill we can remove the
1625 entire killtcp structure
1627 if ( (killtcp->connections == NULL) ||
1628 (killtcp->connections->root == NULL) ) {
1629 talloc_free(killtcp);
1633 /* try tickling them again in a seconds time
1635 event_add_timed(killtcp->ctdb->ev, killtcp, timeval_current_ofs(1, 0),
1636 ctdb_tickle_sentenced_connections, killtcp);
1640 destroy the killtcp structure
1642 static int ctdb_killtcp_destructor(struct ctdb_kill_tcp *killtcp)
1644 killtcp->vnn->killtcp = NULL;
1649 /* nothing fancy here, just unconditionally replace any existing
1650 connection structure with the new one.
1652 dont even free the old one if it did exist, that one is talloc_stolen
1653 by the same node in the tree anyway and will be deleted when the new data
1656 static void *add_killtcp_callback(void *parm, void *data)
1662 add a tcp socket to the list of connections we want to RST
1664 static int ctdb_killtcp_add_connection(struct ctdb_context *ctdb,
1668 ctdb_sock_addr src, dst;
1669 struct ctdb_kill_tcp *killtcp;
1670 struct ctdb_killtcp_con *con;
1671 struct ctdb_vnn *vnn;
1673 ctdb_canonicalize_ip(s, &src);
1674 ctdb_canonicalize_ip(d, &dst);
1676 vnn = find_public_ip_vnn(ctdb, &dst);
1678 vnn = find_public_ip_vnn(ctdb, &src);
1681 /* if it is not a public ip it could be our 'single ip' */
1682 if (ctdb->single_ip_vnn) {
1683 if (ctdb_same_ip(&ctdb->single_ip_vnn->public_address, &dst)) {
1684 vnn = ctdb->single_ip_vnn;
1689 DEBUG(DEBUG_ERR,(__location__ " Could not killtcp, not a public address\n"));
1693 killtcp = vnn->killtcp;
1695 /* If this is the first connection to kill we must allocate
1698 if (killtcp == NULL) {
1699 killtcp = talloc_zero(ctdb, struct ctdb_kill_tcp);
1700 CTDB_NO_MEMORY(ctdb, killtcp);
1703 killtcp->ctdb = ctdb;
1704 killtcp->capture_fd = -1;
1705 killtcp->connections = trbt_create(killtcp, 0);
1707 vnn->killtcp = killtcp;
1708 talloc_set_destructor(killtcp, ctdb_killtcp_destructor);
1713 /* create a structure that describes this connection we want to
1714 RST and store it in killtcp->connections
1716 con = talloc(killtcp, struct ctdb_killtcp_con);
1717 CTDB_NO_MEMORY(ctdb, con);
1718 con->src_addr = src;
1719 con->dst_addr = dst;
1721 con->killtcp = killtcp;
1724 trbt_insertarray32_callback(killtcp->connections,
1725 KILLTCP_KEYLEN, killtcp_key(&con->dst_addr, &con->src_addr),
1726 add_killtcp_callback, con);
1729 If we dont have a socket to listen on yet we must create it
1731 if (killtcp->capture_fd == -1) {
1732 killtcp->capture_fd = ctdb_sys_open_capture_socket(vnn->iface, &killtcp->private_data);
1733 if (killtcp->capture_fd == -1) {
1734 DEBUG(DEBUG_CRIT,(__location__ " Failed to open capturing socket for killtcp\n"));
1740 if (killtcp->fde == NULL) {
1741 killtcp->fde = event_add_fd(ctdb->ev, killtcp, killtcp->capture_fd,
1742 EVENT_FD_READ | EVENT_FD_AUTOCLOSE,
1743 capture_tcp_handler, killtcp);
1745 /* We also need to set up some events to tickle all these connections
1746 until they are all reset
1748 event_add_timed(ctdb->ev, killtcp, timeval_current_ofs(1, 0),
1749 ctdb_tickle_sentenced_connections, killtcp);
1752 /* tickle him once now */
1761 talloc_free(vnn->killtcp);
1762 vnn->killtcp = NULL;
1767 kill a TCP connection.
1769 int32_t ctdb_control_kill_tcp(struct ctdb_context *ctdb, TDB_DATA indata)
1771 struct ctdb_control_killtcp *killtcp = (struct ctdb_control_killtcp *)indata.dptr;
1773 return ctdb_killtcp_add_connection(ctdb, &killtcp->src_addr, &killtcp->dst_addr);
1777 called by a daemon to inform us of the entire list of TCP tickles for
1778 a particular public address.
1779 this control should only be sent by the node that is currently serving
1780 that public address.
1782 int32_t ctdb_control_set_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata)
1784 struct ctdb_control_tcp_tickle_list *list = (struct ctdb_control_tcp_tickle_list *)indata.dptr;
1785 struct ctdb_tcp_array *tcparray;
1786 struct ctdb_vnn *vnn;
1788 /* We must at least have tickles.num or else we cant verify the size
1789 of the received data blob
1791 if (indata.dsize < offsetof(struct ctdb_control_tcp_tickle_list,
1792 tickles.connections)) {
1793 DEBUG(DEBUG_ERR,("Bad indata in ctdb_control_set_tcp_tickle_list. Not enough data for the tickle.num field\n"));
1797 /* verify that the size of data matches what we expect */
1798 if (indata.dsize < offsetof(struct ctdb_control_tcp_tickle_list,
1799 tickles.connections)
1800 + sizeof(struct ctdb_tcp_connection)
1801 * list->tickles.num) {
1802 DEBUG(DEBUG_ERR,("Bad indata in ctdb_control_set_tcp_tickle_list\n"));
1806 vnn = find_public_ip_vnn(ctdb, &list->addr);
1808 DEBUG(DEBUG_INFO,(__location__ " Could not set tcp tickle list, '%s' is not a public address\n",
1809 ctdb_addr_to_str(&list->addr)));
1814 /* remove any old ticklelist we might have */
1815 talloc_free(vnn->tcp_array);
1816 vnn->tcp_array = NULL;
1818 tcparray = talloc(ctdb->nodes, struct ctdb_tcp_array);
1819 CTDB_NO_MEMORY(ctdb, tcparray);
1821 tcparray->num = list->tickles.num;
1823 tcparray->connections = talloc_array(tcparray, struct ctdb_tcp_connection, tcparray->num);
1824 CTDB_NO_MEMORY(ctdb, tcparray->connections);
1826 memcpy(tcparray->connections, &list->tickles.connections[0],
1827 sizeof(struct ctdb_tcp_connection)*tcparray->num);
1829 /* We now have a new fresh tickle list array for this vnn */
1830 vnn->tcp_array = talloc_steal(vnn, tcparray);
1836 called to return the full list of tickles for the puclic address associated
1837 with the provided vnn
1839 int32_t ctdb_control_get_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata)
1841 ctdb_sock_addr *addr = (ctdb_sock_addr *)indata.dptr;
1842 struct ctdb_control_tcp_tickle_list *list;
1843 struct ctdb_tcp_array *tcparray;
1845 struct ctdb_vnn *vnn;
1847 vnn = find_public_ip_vnn(ctdb, addr);
1849 DEBUG(DEBUG_ERR,(__location__ " Could not get tcp tickle list, '%s' is not a public address\n",
1850 ctdb_addr_to_str(addr)));
1855 tcparray = vnn->tcp_array;
1857 num = tcparray->num;
1862 outdata->dsize = offsetof(struct ctdb_control_tcp_tickle_list,
1863 tickles.connections)
1864 + sizeof(struct ctdb_tcp_connection) * num;
1866 outdata->dptr = talloc_size(outdata, outdata->dsize);
1867 CTDB_NO_MEMORY(ctdb, outdata->dptr);
1868 list = (struct ctdb_control_tcp_tickle_list *)outdata->dptr;
1871 list->tickles.num = num;
1873 memcpy(&list->tickles.connections[0], tcparray->connections,
1874 sizeof(struct ctdb_tcp_connection) * num);
1882 set the list of all tcp tickles for a public address
1884 static int ctdb_ctrl_set_tcp_tickles(struct ctdb_context *ctdb,
1885 struct timeval timeout, uint32_t destnode,
1886 ctdb_sock_addr *addr,
1887 struct ctdb_tcp_array *tcparray)
1891 struct ctdb_control_tcp_tickle_list *list;
1894 num = tcparray->num;
1899 data.dsize = offsetof(struct ctdb_control_tcp_tickle_list,
1900 tickles.connections) +
1901 sizeof(struct ctdb_tcp_connection) * num;
1902 data.dptr = talloc_size(ctdb, data.dsize);
1903 CTDB_NO_MEMORY(ctdb, data.dptr);
1905 list = (struct ctdb_control_tcp_tickle_list *)data.dptr;
1907 list->tickles.num = num;
1909 memcpy(&list->tickles.connections[0], tcparray->connections, sizeof(struct ctdb_tcp_connection) * num);
1912 ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0,
1913 CTDB_CONTROL_SET_TCP_TICKLE_LIST,
1914 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
1916 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set tcp tickles failed\n"));
1920 talloc_free(data.dptr);
1927 perform tickle updates if required
1929 static void ctdb_update_tcp_tickles(struct event_context *ev,
1930 struct timed_event *te,
1931 struct timeval t, void *private_data)
1933 struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
1935 struct ctdb_vnn *vnn;
1937 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1938 /* we only send out updates for public addresses that
1941 if (ctdb->pnn != vnn->pnn) {
1944 /* We only send out the updates if we need to */
1945 if (!vnn->tcp_update_needed) {
1948 ret = ctdb_ctrl_set_tcp_tickles(ctdb,
1950 CTDB_BROADCAST_CONNECTED,
1951 &vnn->public_address,
1954 DEBUG(DEBUG_ERR,("Failed to send the tickle update for public address %s\n",
1955 ctdb_addr_to_str(&vnn->public_address)));
1959 event_add_timed(ctdb->ev, ctdb->tickle_update_context,
1960 timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
1961 ctdb_update_tcp_tickles, ctdb);
1966 start periodic update of tcp tickles
1968 void ctdb_start_tcp_tickle_update(struct ctdb_context *ctdb)
1970 ctdb->tickle_update_context = talloc_new(ctdb);
1972 event_add_timed(ctdb->ev, ctdb->tickle_update_context,
1973 timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
1974 ctdb_update_tcp_tickles, ctdb);
1980 struct control_gratious_arp {
1981 struct ctdb_context *ctdb;
1982 ctdb_sock_addr addr;
1988 send a control_gratuitous arp
1990 static void send_gratious_arp(struct event_context *ev, struct timed_event *te,
1991 struct timeval t, void *private_data)
1994 struct control_gratious_arp *arp = talloc_get_type(private_data,
1995 struct control_gratious_arp);
1997 ret = ctdb_sys_send_arp(&arp->addr, arp->iface);
1999 DEBUG(DEBUG_ERR,(__location__ " sending of gratious arp failed (%s)\n", strerror(errno)));
2004 if (arp->count == CTDB_ARP_REPEAT) {
2009 event_add_timed(arp->ctdb->ev, arp,
2010 timeval_current_ofs(CTDB_ARP_INTERVAL, 0),
2011 send_gratious_arp, arp);
2018 int32_t ctdb_control_send_gratious_arp(struct ctdb_context *ctdb, TDB_DATA indata)
2020 struct ctdb_control_gratious_arp *gratious_arp = (struct ctdb_control_gratious_arp *)indata.dptr;
2021 struct control_gratious_arp *arp;
2023 /* verify the size of indata */
2024 if (indata.dsize < offsetof(struct ctdb_control_gratious_arp, iface)) {
2025 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_gratious_arp structure. Got %u require %u bytes\n",
2026 (unsigned)indata.dsize,
2027 (unsigned)offsetof(struct ctdb_control_gratious_arp, iface)));
2031 ( offsetof(struct ctdb_control_gratious_arp, iface)
2032 + gratious_arp->len ) ){
2034 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2035 "but should be %u bytes\n",
2036 (unsigned)indata.dsize,
2037 (unsigned)(offsetof(struct ctdb_control_gratious_arp, iface)+gratious_arp->len)));
2042 arp = talloc(ctdb, struct control_gratious_arp);
2043 CTDB_NO_MEMORY(ctdb, arp);
2046 arp->addr = gratious_arp->addr;
2047 arp->iface = talloc_strdup(arp, gratious_arp->iface);
2048 CTDB_NO_MEMORY(ctdb, arp->iface);
2051 event_add_timed(arp->ctdb->ev, arp,
2052 timeval_zero(), send_gratious_arp, arp);
2057 int32_t ctdb_control_add_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2059 struct ctdb_control_ip_iface *pub = (struct ctdb_control_ip_iface *)indata.dptr;
2062 /* verify the size of indata */
2063 if (indata.dsize < offsetof(struct ctdb_control_ip_iface, iface)) {
2064 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_ip_iface structure\n"));
2068 ( offsetof(struct ctdb_control_ip_iface, iface)
2071 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2072 "but should be %u bytes\n",
2073 (unsigned)indata.dsize,
2074 (unsigned)(offsetof(struct ctdb_control_ip_iface, iface)+pub->len)));
2078 ret = ctdb_add_public_address(ctdb, &pub->addr, pub->mask, &pub->iface[0]);
2081 DEBUG(DEBUG_ERR,(__location__ " Failed to add public address\n"));
2089 called when releaseip event finishes for del_public_address
2091 static void delete_ip_callback(struct ctdb_context *ctdb, int status,
2094 talloc_free(private_data);
2097 int32_t ctdb_control_del_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2099 struct ctdb_control_ip_iface *pub = (struct ctdb_control_ip_iface *)indata.dptr;
2100 struct ctdb_vnn *vnn;
2103 /* verify the size of indata */
2104 if (indata.dsize < offsetof(struct ctdb_control_ip_iface, iface)) {
2105 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_ip_iface structure\n"));
2109 ( offsetof(struct ctdb_control_ip_iface, iface)
2112 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2113 "but should be %u bytes\n",
2114 (unsigned)indata.dsize,
2115 (unsigned)(offsetof(struct ctdb_control_ip_iface, iface)+pub->len)));
2119 /* walk over all public addresses until we find a match */
2120 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2121 if (ctdb_same_ip(&vnn->public_address, &pub->addr)) {
2122 TALLOC_CTX *mem_ctx = talloc_new(ctdb);
2124 DLIST_REMOVE(ctdb->vnn, vnn);
2126 ret = ctdb_event_script_callback(ctdb,
2127 mem_ctx, delete_ip_callback, mem_ctx,
2129 CTDB_EVENT_RELEASE_IP,
2132 talloc_strdup(mem_ctx, ctdb_addr_to_str(&vnn->public_address)),
2133 vnn->public_netmask_bits);