4 Copyright (C) Ronnie Sahlberg 2007
5 Copyright (C) Andrew Tridgell 2007
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, see <http://www.gnu.org/licenses/>.
21 #include "lib/events/events.h"
22 #include "lib/tdb/include/tdb.h"
23 #include "lib/util/dlinklist.h"
24 #include "system/network.h"
25 #include "system/filesys.h"
26 #include "system/wait.h"
27 #include "../include/ctdb_private.h"
28 #include "../common/rb_tree.h"
31 #define TAKEOVER_TIMEOUT() timeval_current_ofs(ctdb->tunable.takeover_timeout,0)
33 #define CTDB_ARP_INTERVAL 1
34 #define CTDB_ARP_REPEAT 3
36 struct ctdb_takeover_arp {
37 struct ctdb_context *ctdb;
40 struct ctdb_tcp_array *tcparray;
46 lists of tcp endpoints
48 struct ctdb_tcp_list {
49 struct ctdb_tcp_list *prev, *next;
50 struct ctdb_tcp_connection connection;
54 list of clients to kill on IP release
56 struct ctdb_client_ip {
57 struct ctdb_client_ip *prev, *next;
58 struct ctdb_context *ctdb;
67 static void ctdb_control_send_arp(struct event_context *ev, struct timed_event *te,
68 struct timeval t, void *private_data)
70 struct ctdb_takeover_arp *arp = talloc_get_type(private_data,
71 struct ctdb_takeover_arp);
73 struct ctdb_tcp_array *tcparray;
75 ret = ctdb_sys_send_arp(&arp->addr, arp->vnn->iface);
77 DEBUG(DEBUG_CRIT,(__location__ " sending of arp failed (%s)\n", strerror(errno)));
80 tcparray = arp->tcparray;
82 for (i=0;i<tcparray->num;i++) {
83 struct ctdb_tcp_connection *tcon;
85 tcon = &tcparray->connections[i];
86 DEBUG(DEBUG_INFO,("sending tcp tickle ack for %u->%s:%u\n",
87 (unsigned)ntohs(tcon->dst_addr.ip.sin_port),
88 ctdb_addr_to_str(&tcon->src_addr),
89 (unsigned)ntohs(tcon->src_addr.ip.sin_port)));
90 ret = ctdb_sys_send_tcp(
95 DEBUG(DEBUG_CRIT,(__location__ " Failed to send tcp tickle ack for %s\n",
96 ctdb_addr_to_str(&tcon->src_addr)));
103 if (arp->count == CTDB_ARP_REPEAT) {
108 event_add_timed(arp->ctdb->ev, arp->vnn->takeover_ctx,
109 timeval_current_ofs(CTDB_ARP_INTERVAL, 0),
110 ctdb_control_send_arp, arp);
113 struct takeover_callback_state {
114 struct ctdb_req_control *c;
115 ctdb_sock_addr *addr;
116 struct ctdb_vnn *vnn;
120 called when takeip event finishes
122 static void takeover_ip_callback(struct ctdb_context *ctdb, int status,
125 struct takeover_callback_state *state =
126 talloc_get_type(private_data, struct takeover_callback_state);
127 struct ctdb_takeover_arp *arp;
128 struct ctdb_tcp_array *tcparray;
131 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
132 ctdb_addr_to_str(state->addr),
134 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
139 if (!state->vnn->takeover_ctx) {
140 state->vnn->takeover_ctx = talloc_new(ctdb);
141 if (!state->vnn->takeover_ctx) {
146 arp = talloc_zero(state->vnn->takeover_ctx, struct ctdb_takeover_arp);
147 if (!arp) goto failed;
150 arp->addr = *state->addr;
151 arp->vnn = state->vnn;
153 tcparray = state->vnn->tcp_array;
155 /* add all of the known tcp connections for this IP to the
156 list of tcp connections to send tickle acks for */
157 arp->tcparray = talloc_steal(arp, tcparray);
159 state->vnn->tcp_array = NULL;
160 state->vnn->tcp_update_needed = true;
163 event_add_timed(arp->ctdb->ev, state->vnn->takeover_ctx,
164 timeval_zero(), ctdb_control_send_arp, arp);
166 /* the control succeeded */
167 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
172 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
178 Find the vnn of the node that has a public ip address
179 returns -1 if the address is not known as a public address
181 static struct ctdb_vnn *find_public_ip_vnn(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
183 struct ctdb_vnn *vnn;
185 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
186 if (ctdb_same_ip(&vnn->public_address, addr)) {
196 take over an ip address
198 int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb,
199 struct ctdb_req_control *c,
204 struct takeover_callback_state *state;
205 struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
206 struct ctdb_vnn *vnn;
208 /* update out vnn list */
209 vnn = find_public_ip_vnn(ctdb, &pip->addr);
211 DEBUG(DEBUG_ERR,("takeoverip called for an ip '%s' that is not a public address\n",
212 ctdb_addr_to_str(&pip->addr)));
217 /* if our kernel already has this IP, do nothing */
218 if (ctdb_sys_have_ip(&pip->addr)) {
222 state = talloc(ctdb, struct takeover_callback_state);
223 CTDB_NO_MEMORY(ctdb, state);
225 state->c = talloc_steal(ctdb, c);
226 state->addr = talloc(ctdb, ctdb_sock_addr);
227 CTDB_NO_MEMORY(ctdb, state->addr);
229 *state->addr = pip->addr;
232 DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u on interface %s\n",
233 ctdb_addr_to_str(&pip->addr),
234 vnn->public_netmask_bits,
237 ret = ctdb_event_script_callback(ctdb,
238 timeval_current_ofs(ctdb->tunable.script_timeout, 0),
239 state, takeover_ip_callback, state,
242 talloc_strdup(state, ctdb_addr_to_str(&pip->addr)),
243 vnn->public_netmask_bits);
246 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
247 ctdb_addr_to_str(&pip->addr),
253 /* tell ctdb_control.c that we will be replying asynchronously */
260 takeover an ip address old v4 style
262 int32_t ctdb_control_takeover_ipv4(struct ctdb_context *ctdb,
263 struct ctdb_req_control *c,
269 data.dsize = sizeof(struct ctdb_public_ip);
270 data.dptr = (uint8_t *)talloc_zero(c, struct ctdb_public_ip);
271 CTDB_NO_MEMORY(ctdb, data.dptr);
273 memcpy(data.dptr, indata.dptr, indata.dsize);
274 return ctdb_control_takeover_ip(ctdb, c, data, async_reply);
278 kill any clients that are registered with a IP that is being released
280 static void release_kill_clients(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
282 struct ctdb_client_ip *ip;
284 DEBUG(DEBUG_INFO,("release_kill_clients for ip %s\n",
285 ctdb_addr_to_str(addr)));
287 for (ip=ctdb->client_ip_list; ip; ip=ip->next) {
288 ctdb_sock_addr tmp_addr;
291 DEBUG(DEBUG_INFO,("checking for client %u with IP %s\n",
293 ctdb_addr_to_str(&ip->addr)));
295 if (ctdb_same_ip(&tmp_addr, addr)) {
296 struct ctdb_client *client = ctdb_reqid_find(ctdb,
299 DEBUG(DEBUG_INFO,("matched client %u with IP %s and pid %u\n",
301 ctdb_addr_to_str(&ip->addr),
304 if (client->pid != 0) {
305 DEBUG(DEBUG_INFO,(__location__ " Killing client pid %u for IP %s on client_id %u\n",
306 (unsigned)client->pid,
307 ctdb_addr_to_str(addr),
309 kill(client->pid, SIGKILL);
316 called when releaseip event finishes
318 static void release_ip_callback(struct ctdb_context *ctdb, int status,
321 struct takeover_callback_state *state =
322 talloc_get_type(private_data, struct takeover_callback_state);
325 /* send a message to all clients of this node telling them
326 that the cluster has been reconfigured and they should
327 release any sockets on this IP */
328 data.dptr = (uint8_t *)talloc_strdup(state, ctdb_addr_to_str(state->addr));
329 CTDB_NO_MEMORY_VOID(ctdb, data.dptr);
330 data.dsize = strlen((char *)data.dptr)+1;
332 DEBUG(DEBUG_INFO,(__location__ " sending RELEASE_IP for '%s'\n", data.dptr));
334 ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_RELEASE_IP, data);
336 /* kill clients that have registered with this IP */
337 release_kill_clients(ctdb, state->addr);
339 /* the control succeeded */
340 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
345 release an ip address
347 int32_t ctdb_control_release_ip(struct ctdb_context *ctdb,
348 struct ctdb_req_control *c,
353 struct takeover_callback_state *state;
354 struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
355 struct ctdb_vnn *vnn;
357 /* update our vnn list */
358 vnn = find_public_ip_vnn(ctdb, &pip->addr);
360 DEBUG(DEBUG_ERR,("takeoverip called for an ip '%s' that is not a public address\n",
361 ctdb_addr_to_str(&pip->addr)));
366 /* stop any previous arps */
367 talloc_free(vnn->takeover_ctx);
368 vnn->takeover_ctx = NULL;
370 if (!ctdb_sys_have_ip(&pip->addr)) {
371 DEBUG(DEBUG_INFO,("Redundant release of IP %s/%u on interface %s (ip not held)\n",
372 ctdb_addr_to_str(&pip->addr),
373 vnn->public_netmask_bits,
378 DEBUG(DEBUG_NOTICE,("Release of IP %s/%u on interface %s\n",
379 ctdb_addr_to_str(&pip->addr),
380 vnn->public_netmask_bits,
383 state = talloc(ctdb, struct takeover_callback_state);
384 CTDB_NO_MEMORY(ctdb, state);
386 state->c = talloc_steal(state, c);
387 state->addr = talloc(state, ctdb_sock_addr);
388 CTDB_NO_MEMORY(ctdb, state->addr);
389 *state->addr = pip->addr;
392 ret = ctdb_event_script_callback(ctdb,
393 timeval_current_ofs(ctdb->tunable.script_timeout, 0),
394 state, release_ip_callback, state,
395 "releaseip %s %s %u",
397 talloc_strdup(state, ctdb_addr_to_str(&pip->addr)),
398 vnn->public_netmask_bits);
400 DEBUG(DEBUG_ERR,(__location__ " Failed to release IP %s on interface %s\n",
401 ctdb_addr_to_str(&pip->addr),
407 /* tell the control that we will be reply asynchronously */
413 release an ip address old v4 style
415 int32_t ctdb_control_release_ipv4(struct ctdb_context *ctdb,
416 struct ctdb_req_control *c,
422 data.dsize = sizeof(struct ctdb_public_ip);
423 data.dptr = (uint8_t *)talloc_zero(c, struct ctdb_public_ip);
424 CTDB_NO_MEMORY(ctdb, data.dptr);
426 memcpy(data.dptr, indata.dptr, indata.dsize);
427 return ctdb_control_release_ip(ctdb, c, data, async_reply);
431 static int ctdb_add_public_address(struct ctdb_context *ctdb, ctdb_sock_addr *addr, unsigned mask, const char *iface)
433 struct ctdb_vnn *vnn;
435 /* Verify that we dont have an entry for this ip yet */
436 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
437 if (ctdb_same_sockaddr(addr, &vnn->public_address)) {
438 DEBUG(DEBUG_CRIT,("Same ip '%s' specified multiple times in the public address list \n",
439 ctdb_addr_to_str(addr)));
444 /* create a new vnn structure for this ip address */
445 vnn = talloc_zero(ctdb, struct ctdb_vnn);
446 CTDB_NO_MEMORY_FATAL(ctdb, vnn);
447 vnn->iface = talloc_strdup(vnn, iface);
448 CTDB_NO_MEMORY(ctdb, vnn->iface);
449 vnn->public_address = *addr;
450 vnn->public_netmask_bits = mask;
453 DLIST_ADD(ctdb->vnn, vnn);
460 setup the event script directory
462 int ctdb_set_event_script_dir(struct ctdb_context *ctdb, const char *script_dir)
464 ctdb->event_script_dir = talloc_strdup(ctdb, script_dir);
465 CTDB_NO_MEMORY(ctdb, ctdb->event_script_dir);
470 setup the public address lists from a file
472 int ctdb_set_public_addresses(struct ctdb_context *ctdb, const char *alist)
478 lines = file_lines_load(alist, &nlines, ctdb);
480 ctdb_set_error(ctdb, "Failed to load public address list '%s'\n", alist);
483 while (nlines > 0 && strcmp(lines[nlines-1], "") == 0) {
487 for (i=0;i<nlines;i++) {
495 while ((*line == ' ') || (*line == '\t')) {
501 if (strcmp(line, "") == 0) {
504 tok = strtok(line, " \t");
506 tok = strtok(NULL, " \t");
508 if (NULL == ctdb->default_public_interface) {
509 DEBUG(DEBUG_CRIT,("No default public interface and no interface specified at line %u of public address list\n",
514 iface = ctdb->default_public_interface;
519 if (!addrstr || !parse_ip_mask(addrstr, iface, &addr, &mask)) {
520 DEBUG(DEBUG_CRIT,("Badly formed line %u in public address list\n", i+1));
524 if (ctdb_add_public_address(ctdb, &addr, mask, iface)) {
525 DEBUG(DEBUG_CRIT,("Failed to add line %u to the public address list\n", i+1));
538 struct ctdb_public_ip_list {
539 struct ctdb_public_ip_list *next;
545 /* Given a physical node, return the number of
546 public addresses that is currently assigned to this node.
548 static int node_ip_coverage(struct ctdb_context *ctdb,
550 struct ctdb_public_ip_list *ips)
554 for (;ips;ips=ips->next) {
555 if (ips->pnn == pnn) {
563 /* Check if this is a public ip known to the node, i.e. can that
564 node takeover this ip ?
566 static int can_node_serve_ip(struct ctdb_context *ctdb, int32_t pnn,
567 struct ctdb_public_ip_list *ip)
569 struct ctdb_all_public_ips *public_ips;
572 public_ips = ctdb->nodes[pnn]->public_ips;
574 if (public_ips == NULL) {
578 for (i=0;i<public_ips->num;i++) {
579 if (ctdb_same_ip(&ip->addr, &public_ips->ips[i].addr)) {
580 /* yes, this node can serve this public ip */
589 /* search the node lists list for a node to takeover this ip.
590 pick the node that currently are serving the least number of ips
591 so that the ips get spread out evenly.
593 static int find_takeover_node(struct ctdb_context *ctdb,
594 struct ctdb_node_map *nodemap, uint32_t mask,
595 struct ctdb_public_ip_list *ip,
596 struct ctdb_public_ip_list *all_ips)
602 for (i=0;i<nodemap->num;i++) {
603 if (nodemap->nodes[i].flags & mask) {
604 /* This node is not healty and can not be used to serve
610 /* verify that this node can serve this ip */
611 if (can_node_serve_ip(ctdb, i, ip)) {
612 /* no it couldnt so skip to the next node */
616 num = node_ip_coverage(ctdb, i, all_ips);
617 /* was this the first node we checked ? */
629 DEBUG(DEBUG_WARNING,(__location__ " Could not find node to take over public address '%s'\n",
630 ctdb_addr_to_str(&ip->addr)));
639 struct ctdb_public_ip_list *
640 add_ip_to_merged_list(struct ctdb_context *ctdb,
642 struct ctdb_public_ip_list *ip_list,
643 struct ctdb_public_ip *ip)
645 struct ctdb_public_ip_list *tmp_ip;
647 /* do we already have this ip in our merged list ?*/
648 for (tmp_ip=ip_list;tmp_ip;tmp_ip=tmp_ip->next) {
650 /* we already have this public ip in the list */
651 if (ctdb_same_ip(&tmp_ip->addr, &ip->addr)) {
656 /* this is a new public ip, we must add it to the list */
657 tmp_ip = talloc_zero(tmp_ctx, struct ctdb_public_ip_list);
658 CTDB_NO_MEMORY_NULL(ctdb, tmp_ip);
659 tmp_ip->pnn = ip->pnn;
660 tmp_ip->addr = ip->addr;
661 tmp_ip->next = ip_list;
666 struct ctdb_public_ip_list *
667 create_merged_ip_list(struct ctdb_context *ctdb, TALLOC_CTX *tmp_ctx)
670 struct ctdb_public_ip_list *ip_list = NULL;
671 struct ctdb_all_public_ips *public_ips;
673 for (i=0;i<ctdb->num_nodes;i++) {
674 public_ips = ctdb->nodes[i]->public_ips;
676 if (ctdb->nodes[i]->flags & NODE_FLAGS_DELETED) {
680 /* there were no public ips for this node */
681 if (public_ips == NULL) {
685 for (j=0;j<public_ips->num;j++) {
686 ip_list = add_ip_to_merged_list(ctdb, tmp_ctx,
687 ip_list, &public_ips->ips[j]);
695 make any IP alias changes for public addresses that are necessary
697 int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap)
699 int i, num_healthy, retries;
700 struct ctdb_public_ip ip;
701 struct ctdb_public_ipv4 ipv4;
703 struct ctdb_public_ip_list *all_ips, *tmp_ip;
704 int maxnode, maxnum=0, minnode, minnum=0, num;
706 struct timeval timeout;
707 struct client_async_data *async_data;
708 struct ctdb_client_control_state *state;
709 TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
714 /* Count how many completely healthy nodes we have */
716 for (i=0;i<nodemap->num;i++) {
717 if (!(nodemap->nodes[i].flags & (NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED))) {
722 if (num_healthy > 0) {
723 /* We have healthy nodes, so only consider them for
724 serving public addresses
726 mask = NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED;
728 /* We didnt have any completely healthy nodes so
729 use "disabled" nodes as a fallback
731 mask = NODE_FLAGS_INACTIVE;
734 /* since nodes only know about those public addresses that
735 can be served by that particular node, no single node has
736 a full list of all public addresses that exist in the cluster.
737 Walk over all node structures and create a merged list of
738 all public addresses that exist in the cluster.
740 all_ips = create_merged_ip_list(ctdb, tmp_ctx);
742 /* If we want deterministic ip allocations, i.e. that the ip addresses
743 will always be allocated the same way for a specific set of
744 available/unavailable nodes.
746 if (1 == ctdb->tunable.deterministic_public_ips) {
747 DEBUG(DEBUG_NOTICE,("Deterministic IPs enabled. Resetting all ip allocations\n"));
748 for (i=0,tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next,i++) {
749 tmp_ip->pnn = i%nodemap->num;
754 /* mark all public addresses with a masked node as being served by
757 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
758 if (tmp_ip->pnn == -1) {
761 if (nodemap->nodes[tmp_ip->pnn].flags & mask) {
766 /* verify that the assigned nodes can serve that public ip
767 and set it to -1 if not
769 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
770 if (tmp_ip->pnn == -1) {
773 if (can_node_serve_ip(ctdb, tmp_ip->pnn, tmp_ip) != 0) {
774 /* this node can not serve this ip. */
780 /* now we must redistribute all public addresses with takeover node
781 -1 among the nodes available
785 /* loop over all ip's and find a physical node to cover for
788 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
789 if (tmp_ip->pnn == -1) {
790 if (find_takeover_node(ctdb, nodemap, mask, tmp_ip, all_ips)) {
791 DEBUG(DEBUG_WARNING,("Failed to find node to cover ip %s\n",
792 ctdb_addr_to_str(&tmp_ip->addr)));
797 /* If we dont want ips to fail back after a node becomes healthy
798 again, we wont even try to reallocat the ip addresses so that
799 they are evenly spread out.
800 This can NOT be used at the same time as DeterministicIPs !
802 if (1 == ctdb->tunable.no_ip_failback) {
803 if (1 == ctdb->tunable.deterministic_public_ips) {
804 DEBUG(DEBUG_ERR, ("ERROR: You can not use 'DeterministicIPs' and 'NoIPFailback' at the same time\n"));
810 /* now, try to make sure the ip adresses are evenly distributed
812 for each ip address, loop over all nodes that can serve this
813 ip and make sure that the difference between the node
814 serving the most and the node serving the least ip's are not greater
817 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
818 if (tmp_ip->pnn == -1) {
822 /* Get the highest and lowest number of ips's served by any
823 valid node which can serve this ip.
827 for (i=0;i<nodemap->num;i++) {
828 if (nodemap->nodes[i].flags & mask) {
832 /* only check nodes that can actually serve this ip */
833 if (can_node_serve_ip(ctdb, i, tmp_ip)) {
834 /* no it couldnt so skip to the next node */
838 num = node_ip_coverage(ctdb, i, all_ips);
859 DEBUG(DEBUG_WARNING,(__location__ " Could not find maxnode. May not be able to serve ip '%s'\n",
860 ctdb_addr_to_str(&tmp_ip->addr)));
865 /* If we want deterministic IPs then dont try to reallocate
866 them to spread out the load.
868 if (1 == ctdb->tunable.deterministic_public_ips) {
872 /* if the spread between the smallest and largest coverage by
873 a node is >=2 we steal one of the ips from the node with
874 most coverage to even things out a bit.
875 try to do this at most 5 times since we dont want to spend
876 too much time balancing the ip coverage.
878 if ( (maxnum > minnum+1)
880 struct ctdb_public_ip_list *tmp;
882 /* mark one of maxnode's vnn's as unassigned and try
885 for (tmp=all_ips;tmp;tmp=tmp->next) {
886 if (tmp->pnn == maxnode) {
896 /* finished distributing the public addresses, now just send the
897 info out to the nodes
901 /* at this point ->pnn is the node which will own each IP
902 or -1 if there is no node that can cover this ip
905 /* now tell all nodes to delete any alias that they should not
906 have. This will be a NOOP on nodes that don't currently
907 hold the given alias */
908 async_data = talloc_zero(tmp_ctx, struct client_async_data);
909 CTDB_NO_MEMORY_FATAL(ctdb, async_data);
911 for (i=0;i<nodemap->num;i++) {
912 /* don't talk to unconnected nodes, but do talk to banned nodes */
913 if (nodemap->nodes[i].flags & NODE_FLAGS_DISCONNECTED) {
917 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
918 if (tmp_ip->pnn == nodemap->nodes[i].pnn) {
919 /* This node should be serving this
920 vnn so dont tell it to release the ip
924 if (tmp_ip->addr.sa.sa_family == AF_INET) {
925 ipv4.pnn = tmp_ip->pnn;
926 ipv4.sin = tmp_ip->addr.ip;
928 timeout = TAKEOVER_TIMEOUT();
929 data.dsize = sizeof(ipv4);
930 data.dptr = (uint8_t *)&ipv4;
931 state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
932 0, CTDB_CONTROL_RELEASE_IPv4, 0,
936 ip.pnn = tmp_ip->pnn;
937 ip.addr = tmp_ip->addr;
939 timeout = TAKEOVER_TIMEOUT();
940 data.dsize = sizeof(ip);
941 data.dptr = (uint8_t *)&ip;
942 state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
943 0, CTDB_CONTROL_RELEASE_IP, 0,
949 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_RELEASE_IP to node %u\n", nodemap->nodes[i].pnn));
950 talloc_free(tmp_ctx);
954 ctdb_client_async_add(async_data, state);
957 if (ctdb_client_async_wait(ctdb, async_data) != 0) {
958 DEBUG(DEBUG_ERR,(__location__ " Async control CTDB_CONTROL_RELEASE_IP failed\n"));
959 talloc_free(tmp_ctx);
962 talloc_free(async_data);
965 /* tell all nodes to get their own IPs */
966 async_data = talloc_zero(tmp_ctx, struct client_async_data);
967 CTDB_NO_MEMORY_FATAL(ctdb, async_data);
968 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
969 if (tmp_ip->pnn == -1) {
970 /* this IP won't be taken over */
974 if (tmp_ip->addr.sa.sa_family == AF_INET) {
975 ipv4.pnn = tmp_ip->pnn;
976 ipv4.sin = tmp_ip->addr.ip;
978 timeout = TAKEOVER_TIMEOUT();
979 data.dsize = sizeof(ipv4);
980 data.dptr = (uint8_t *)&ipv4;
981 state = ctdb_control_send(ctdb, tmp_ip->pnn,
982 0, CTDB_CONTROL_TAKEOVER_IPv4, 0,
986 ip.pnn = tmp_ip->pnn;
987 ip.addr = tmp_ip->addr;
989 timeout = TAKEOVER_TIMEOUT();
990 data.dsize = sizeof(ip);
991 data.dptr = (uint8_t *)&ip;
992 state = ctdb_control_send(ctdb, tmp_ip->pnn,
993 0, CTDB_CONTROL_TAKEOVER_IP, 0,
998 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_TAKEOVER_IP to node %u\n", tmp_ip->pnn));
999 talloc_free(tmp_ctx);
1003 ctdb_client_async_add(async_data, state);
1005 if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1006 DEBUG(DEBUG_ERR,(__location__ " Async control CTDB_CONTROL_TAKEOVER_IP failed\n"));
1007 talloc_free(tmp_ctx);
1011 talloc_free(tmp_ctx);
1017 destroy a ctdb_client_ip structure
1019 static int ctdb_client_ip_destructor(struct ctdb_client_ip *ip)
1021 DEBUG(DEBUG_DEBUG,("destroying client tcp for %s:%u (client_id %u)\n",
1022 ctdb_addr_to_str(&ip->addr),
1023 ntohs(ip->addr.ip.sin_port),
1026 DLIST_REMOVE(ip->ctdb->client_ip_list, ip);
1031 called by a client to inform us of a TCP connection that it is managing
1032 that should tickled with an ACK when IP takeover is done
1033 we handle both the old ipv4 style of packets as well as the new ipv4/6
1036 int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id,
1039 struct ctdb_client *client = ctdb_reqid_find(ctdb, client_id, struct ctdb_client);
1040 struct ctdb_control_tcp *old_addr = NULL;
1041 struct ctdb_control_tcp_addr new_addr;
1042 struct ctdb_control_tcp_addr *tcp_sock = NULL;
1043 struct ctdb_tcp_list *tcp;
1044 struct ctdb_control_tcp_vnn t;
1047 struct ctdb_client_ip *ip;
1048 struct ctdb_vnn *vnn;
1049 ctdb_sock_addr addr;
1051 switch (indata.dsize) {
1052 case sizeof(struct ctdb_control_tcp):
1053 old_addr = (struct ctdb_control_tcp *)indata.dptr;
1054 ZERO_STRUCT(new_addr);
1055 tcp_sock = &new_addr;
1056 tcp_sock->src.ip = old_addr->src;
1057 tcp_sock->dest.ip = old_addr->dest;
1059 case sizeof(struct ctdb_control_tcp_addr):
1060 tcp_sock = (struct ctdb_control_tcp_addr *)indata.dptr;
1063 DEBUG(DEBUG_ERR,(__location__ " Invalid data structure passed to ctdb_control_tcp_client. size was %d but only allowed sizes are %lu and %lu\n", (int)indata.dsize, sizeof(struct ctdb_control_tcp), sizeof(struct ctdb_control_tcp_addr)));
1067 addr = tcp_sock->src;
1068 ctdb_canonicalize_ip(&addr, &tcp_sock->src);
1069 addr = tcp_sock->dest;
1070 ctdb_canonicalize_ip(&addr, &tcp_sock->dest);
1073 memcpy(&addr, &tcp_sock->dest, sizeof(addr));
1074 vnn = find_public_ip_vnn(ctdb, &addr);
1076 switch (addr.sa.sa_family) {
1078 if (ntohl(addr.ip.sin_addr.s_addr) != INADDR_LOOPBACK) {
1079 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public address.\n",
1080 ctdb_addr_to_str(&addr)));
1084 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public ipv6 address.\n",
1085 ctdb_addr_to_str(&addr)));
1088 DEBUG(DEBUG_ERR,(__location__ " Unknown family type %d\n", addr.sa.sa_family));
1094 if (vnn->pnn != ctdb->pnn) {
1095 DEBUG(DEBUG_ERR,("Attempt to register tcp client for IP %s we don't hold - failing (client_id %u pid %u)\n",
1096 ctdb_addr_to_str(&addr),
1097 client_id, client->pid));
1098 /* failing this call will tell smbd to die */
1102 ip = talloc(client, struct ctdb_client_ip);
1103 CTDB_NO_MEMORY(ctdb, ip);
1107 ip->client_id = client_id;
1108 talloc_set_destructor(ip, ctdb_client_ip_destructor);
1109 DLIST_ADD(ctdb->client_ip_list, ip);
1111 tcp = talloc(client, struct ctdb_tcp_list);
1112 CTDB_NO_MEMORY(ctdb, tcp);
1114 tcp->connection.src_addr = tcp_sock->src;
1115 tcp->connection.dst_addr = tcp_sock->dest;
1117 DLIST_ADD(client->tcp_list, tcp);
1119 t.src = tcp_sock->src;
1120 t.dest = tcp_sock->dest;
1122 data.dptr = (uint8_t *)&t;
1123 data.dsize = sizeof(t);
1125 switch (addr.sa.sa_family) {
1127 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1128 (unsigned)ntohs(tcp_sock->dest.ip.sin_port),
1129 ctdb_addr_to_str(&tcp_sock->src),
1130 (unsigned)ntohs(tcp_sock->src.ip.sin_port), client_id, client->pid));
1133 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1134 (unsigned)ntohs(tcp_sock->dest.ip6.sin6_port),
1135 ctdb_addr_to_str(&tcp_sock->src),
1136 (unsigned)ntohs(tcp_sock->src.ip6.sin6_port), client_id, client->pid));
1139 DEBUG(DEBUG_ERR,(__location__ " Unknown family %d\n", addr.sa.sa_family));
1143 /* tell all nodes about this tcp connection */
1144 ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0,
1145 CTDB_CONTROL_TCP_ADD,
1146 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
1148 DEBUG(DEBUG_ERR,(__location__ " Failed to send CTDB_CONTROL_TCP_ADD\n"));
1156 find a tcp address on a list
1158 static struct ctdb_tcp_connection *ctdb_tcp_find(struct ctdb_tcp_array *array,
1159 struct ctdb_tcp_connection *tcp)
1163 if (array == NULL) {
1167 for (i=0;i<array->num;i++) {
1168 if (ctdb_same_sockaddr(&array->connections[i].src_addr, &tcp->src_addr) &&
1169 ctdb_same_sockaddr(&array->connections[i].dst_addr, &tcp->dst_addr)) {
1170 return &array->connections[i];
1177 called by a daemon to inform us of a TCP connection that one of its
1178 clients managing that should tickled with an ACK when IP takeover is
1181 int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata)
1183 struct ctdb_control_tcp_vnn *p = (struct ctdb_control_tcp_vnn *)indata.dptr;
1184 struct ctdb_tcp_array *tcparray;
1185 struct ctdb_tcp_connection tcp;
1186 struct ctdb_vnn *vnn;
1188 vnn = find_public_ip_vnn(ctdb, &p->dest);
1190 DEBUG(DEBUG_INFO,(__location__ " got TCP_ADD control for an address which is not a public address '%s'\n",
1191 ctdb_addr_to_str(&p->dest)));
1197 tcparray = vnn->tcp_array;
1199 /* If this is the first tickle */
1200 if (tcparray == NULL) {
1201 tcparray = talloc_size(ctdb->nodes,
1202 offsetof(struct ctdb_tcp_array, connections) +
1203 sizeof(struct ctdb_tcp_connection) * 1);
1204 CTDB_NO_MEMORY(ctdb, tcparray);
1205 vnn->tcp_array = tcparray;
1208 tcparray->connections = talloc_size(tcparray, sizeof(struct ctdb_tcp_connection));
1209 CTDB_NO_MEMORY(ctdb, tcparray->connections);
1211 tcparray->connections[tcparray->num].src_addr = p->src;
1212 tcparray->connections[tcparray->num].dst_addr = p->dest;
1218 /* Do we already have this tickle ?*/
1219 tcp.src_addr = p->src;
1220 tcp.dst_addr = p->dest;
1221 if (ctdb_tcp_find(vnn->tcp_array, &tcp) != NULL) {
1222 DEBUG(DEBUG_DEBUG,("Already had tickle info for %s:%u for vnn:%u\n",
1223 ctdb_addr_to_str(&tcp.dst_addr),
1224 ntohs(tcp.dst_addr.ip.sin_port),
1229 /* A new tickle, we must add it to the array */
1230 tcparray->connections = talloc_realloc(tcparray, tcparray->connections,
1231 struct ctdb_tcp_connection,
1233 CTDB_NO_MEMORY(ctdb, tcparray->connections);
1235 vnn->tcp_array = tcparray;
1236 tcparray->connections[tcparray->num].src_addr = p->src;
1237 tcparray->connections[tcparray->num].dst_addr = p->dest;
1240 DEBUG(DEBUG_INFO,("Added tickle info for %s:%u from vnn %u\n",
1241 ctdb_addr_to_str(&tcp.dst_addr),
1242 ntohs(tcp.dst_addr.ip.sin_port),
1250 called by a daemon to inform us of a TCP connection that one of its
1251 clients managing that should tickled with an ACK when IP takeover is
1254 static void ctdb_remove_tcp_connection(struct ctdb_context *ctdb, struct ctdb_tcp_connection *conn)
1256 struct ctdb_tcp_connection *tcpp;
1257 struct ctdb_vnn *vnn = find_public_ip_vnn(ctdb, &conn->dst_addr);
1260 DEBUG(DEBUG_ERR,(__location__ " unable to find public address %s\n",
1261 ctdb_addr_to_str(&conn->dst_addr)));
1265 /* if the array is empty we cant remove it
1266 and we dont need to do anything
1268 if (vnn->tcp_array == NULL) {
1269 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist (array is empty) %s:%u\n",
1270 ctdb_addr_to_str(&conn->dst_addr),
1271 ntohs(conn->dst_addr.ip.sin_port)));
1276 /* See if we know this connection
1277 if we dont know this connection then we dont need to do anything
1279 tcpp = ctdb_tcp_find(vnn->tcp_array, conn);
1281 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist %s:%u\n",
1282 ctdb_addr_to_str(&conn->dst_addr),
1283 ntohs(conn->dst_addr.ip.sin_port)));
1288 /* We need to remove this entry from the array.
1289 Instead of allocating a new array and copying data to it
1290 we cheat and just copy the last entry in the existing array
1291 to the entry that is to be removed and just shring the
1294 *tcpp = vnn->tcp_array->connections[vnn->tcp_array->num - 1];
1295 vnn->tcp_array->num--;
1297 /* If we deleted the last entry we also need to remove the entire array
1299 if (vnn->tcp_array->num == 0) {
1300 talloc_free(vnn->tcp_array);
1301 vnn->tcp_array = NULL;
1304 vnn->tcp_update_needed = true;
1306 DEBUG(DEBUG_INFO,("Removed tickle info for %s:%u\n",
1307 ctdb_addr_to_str(&conn->src_addr),
1308 ntohs(conn->src_addr.ip.sin_port)));
1313 called when a daemon restarts - send all tickes for all public addresses
1314 we are serving immediately to the new node.
1316 int32_t ctdb_control_startup(struct ctdb_context *ctdb, uint32_t vnn)
1318 /*XXX here we should send all tickes we are serving to the new node */
1324 called when a client structure goes away - hook to remove
1325 elements from the tcp_list in all daemons
1327 void ctdb_takeover_client_destructor_hook(struct ctdb_client *client)
1329 while (client->tcp_list) {
1330 struct ctdb_tcp_list *tcp = client->tcp_list;
1331 DLIST_REMOVE(client->tcp_list, tcp);
1332 ctdb_remove_tcp_connection(client->ctdb, &tcp->connection);
1338 release all IPs on shutdown
1340 void ctdb_release_all_ips(struct ctdb_context *ctdb)
1342 struct ctdb_vnn *vnn;
1344 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1345 if (!ctdb_sys_have_ip(&vnn->public_address)) {
1348 if (vnn->pnn == ctdb->pnn) {
1351 ctdb_event_script(ctdb, "releaseip %s %s %u",
1353 talloc_strdup(ctdb, ctdb_addr_to_str(&vnn->public_address)),
1354 vnn->public_netmask_bits);
1355 release_kill_clients(ctdb, &vnn->public_address);
1361 get list of public IPs
1363 int32_t ctdb_control_get_public_ips(struct ctdb_context *ctdb,
1364 struct ctdb_req_control *c, TDB_DATA *outdata)
1367 struct ctdb_all_public_ips *ips;
1368 struct ctdb_vnn *vnn;
1370 /* count how many public ip structures we have */
1372 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1376 len = offsetof(struct ctdb_all_public_ips, ips) +
1377 num*sizeof(struct ctdb_public_ip);
1378 ips = talloc_zero_size(outdata, len);
1379 CTDB_NO_MEMORY(ctdb, ips);
1381 outdata->dsize = len;
1382 outdata->dptr = (uint8_t *)ips;
1386 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1387 ips->ips[i].pnn = vnn->pnn;
1388 ips->ips[i].addr = vnn->public_address;
1397 get list of public IPs, old ipv4 style. only returns ipv4 addresses
1399 int32_t ctdb_control_get_public_ipsv4(struct ctdb_context *ctdb,
1400 struct ctdb_req_control *c, TDB_DATA *outdata)
1403 struct ctdb_all_public_ipsv4 *ips;
1404 struct ctdb_vnn *vnn;
1406 /* count how many public ip structures we have */
1408 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1409 if (vnn->public_address.sa.sa_family != AF_INET) {
1415 len = offsetof(struct ctdb_all_public_ipsv4, ips) +
1416 num*sizeof(struct ctdb_public_ipv4);
1417 ips = talloc_zero_size(outdata, len);
1418 CTDB_NO_MEMORY(ctdb, ips);
1420 outdata->dsize = len;
1421 outdata->dptr = (uint8_t *)ips;
1425 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1426 if (vnn->public_address.sa.sa_family != AF_INET) {
1429 ips->ips[i].pnn = vnn->pnn;
1430 ips->ips[i].sin = vnn->public_address.ip;
1439 structure containing the listening socket and the list of tcp connections
1440 that the ctdb daemon is to kill
1442 struct ctdb_kill_tcp {
1443 struct ctdb_vnn *vnn;
1444 struct ctdb_context *ctdb;
1446 struct fd_event *fde;
1447 trbt_tree_t *connections;
1452 a tcp connection that is to be killed
1454 struct ctdb_killtcp_con {
1455 ctdb_sock_addr src_addr;
1456 ctdb_sock_addr dst_addr;
1458 struct ctdb_kill_tcp *killtcp;
1461 /* this function is used to create a key to represent this socketpair
1462 in the killtcp tree.
1463 this key is used to insert and lookup matching socketpairs that are
1464 to be tickled and RST
1466 #define KILLTCP_KEYLEN 10
1467 static uint32_t *killtcp_key(ctdb_sock_addr *src, ctdb_sock_addr *dst)
1469 static uint32_t key[KILLTCP_KEYLEN];
1471 bzero(key, sizeof(key));
1473 if (src->sa.sa_family != dst->sa.sa_family) {
1474 DEBUG(DEBUG_ERR, (__location__ " ERROR, different families passed :%u vs %u\n", src->sa.sa_family, dst->sa.sa_family));
1478 switch (src->sa.sa_family) {
1480 key[0] = dst->ip.sin_addr.s_addr;
1481 key[1] = src->ip.sin_addr.s_addr;
1482 key[2] = dst->ip.sin_port;
1483 key[3] = src->ip.sin_port;
1486 key[0] = dst->ip6.sin6_addr.s6_addr32[3];
1487 key[1] = src->ip6.sin6_addr.s6_addr32[3];
1488 key[2] = dst->ip6.sin6_addr.s6_addr32[2];
1489 key[3] = src->ip6.sin6_addr.s6_addr32[2];
1490 key[4] = dst->ip6.sin6_addr.s6_addr32[1];
1491 key[5] = src->ip6.sin6_addr.s6_addr32[1];
1492 key[6] = dst->ip6.sin6_addr.s6_addr32[0];
1493 key[7] = src->ip6.sin6_addr.s6_addr32[0];
1494 key[8] = dst->ip6.sin6_port;
1495 key[9] = src->ip6.sin6_port;
1498 DEBUG(DEBUG_ERR, (__location__ " ERROR, unknown family passed :%u\n", src->sa.sa_family));
1506 called when we get a read event on the raw socket
1508 static void capture_tcp_handler(struct event_context *ev, struct fd_event *fde,
1509 uint16_t flags, void *private_data)
1511 struct ctdb_kill_tcp *killtcp = talloc_get_type(private_data, struct ctdb_kill_tcp);
1512 struct ctdb_killtcp_con *con;
1513 ctdb_sock_addr src, dst;
1514 uint32_t ack_seq, seq;
1516 if (!(flags & EVENT_FD_READ)) {
1520 if (ctdb_sys_read_tcp_packet(killtcp->capture_fd,
1521 killtcp->private_data,
1523 &ack_seq, &seq) != 0) {
1524 /* probably a non-tcp ACK packet */
1528 /* check if we have this guy in our list of connections
1531 con = trbt_lookuparray32(killtcp->connections,
1532 KILLTCP_KEYLEN, killtcp_key(&src, &dst));
1534 /* no this was some other packet we can just ignore */
1538 /* This one has been tickled !
1539 now reset him and remove him from the list.
1541 DEBUG(DEBUG_INFO, ("sending a tcp reset to kill connection :%d -> %s:%d\n",
1542 ntohs(con->dst_addr.ip.sin_port),
1543 ctdb_addr_to_str(&con->src_addr),
1544 ntohs(con->src_addr.ip.sin_port)));
1546 ctdb_sys_send_tcp(&con->dst_addr, &con->src_addr, ack_seq, seq, 1);
1551 /* when traversing the list of all tcp connections to send tickle acks to
1552 (so that we can capture the ack coming back and kill the connection
1554 this callback is called for each connection we are currently trying to kill
1556 static void tickle_connection_traverse(void *param, void *data)
1558 struct ctdb_killtcp_con *con = talloc_get_type(data, struct ctdb_killtcp_con);
1560 /* have tried too many times, just give up */
1561 if (con->count >= 5) {
1566 /* othervise, try tickling it again */
1569 (ctdb_sock_addr *)&con->dst_addr,
1570 (ctdb_sock_addr *)&con->src_addr,
1576 called every second until all sentenced connections have been reset
1578 static void ctdb_tickle_sentenced_connections(struct event_context *ev, struct timed_event *te,
1579 struct timeval t, void *private_data)
1581 struct ctdb_kill_tcp *killtcp = talloc_get_type(private_data, struct ctdb_kill_tcp);
1584 /* loop over all connections sending tickle ACKs */
1585 trbt_traversearray32(killtcp->connections, KILLTCP_KEYLEN, tickle_connection_traverse, NULL);
1588 /* If there are no more connections to kill we can remove the
1589 entire killtcp structure
1591 if ( (killtcp->connections == NULL) ||
1592 (killtcp->connections->root == NULL) ) {
1593 talloc_free(killtcp);
1597 /* try tickling them again in a seconds time
1599 event_add_timed(killtcp->ctdb->ev, killtcp, timeval_current_ofs(1, 0),
1600 ctdb_tickle_sentenced_connections, killtcp);
1604 destroy the killtcp structure
1606 static int ctdb_killtcp_destructor(struct ctdb_kill_tcp *killtcp)
1608 killtcp->vnn->killtcp = NULL;
1613 /* nothing fancy here, just unconditionally replace any existing
1614 connection structure with the new one.
1616 dont even free the old one if it did exist, that one is talloc_stolen
1617 by the same node in the tree anyway and will be deleted when the new data
1620 static void *add_killtcp_callback(void *parm, void *data)
1626 add a tcp socket to the list of connections we want to RST
1628 static int ctdb_killtcp_add_connection(struct ctdb_context *ctdb,
1632 ctdb_sock_addr src, dst;
1633 struct ctdb_kill_tcp *killtcp;
1634 struct ctdb_killtcp_con *con;
1635 struct ctdb_vnn *vnn;
1637 ctdb_canonicalize_ip(s, &src);
1638 ctdb_canonicalize_ip(d, &dst);
1640 vnn = find_public_ip_vnn(ctdb, &dst);
1642 vnn = find_public_ip_vnn(ctdb, &src);
1645 /* if it is not a public ip it could be our 'single ip' */
1646 if (ctdb->single_ip_vnn) {
1647 if (ctdb_same_ip(&ctdb->single_ip_vnn->public_address, &dst)) {
1648 vnn = ctdb->single_ip_vnn;
1653 DEBUG(DEBUG_ERR,(__location__ " Could not killtcp, not a public address\n"));
1657 killtcp = vnn->killtcp;
1659 /* If this is the first connection to kill we must allocate
1662 if (killtcp == NULL) {
1663 killtcp = talloc_zero(ctdb, struct ctdb_kill_tcp);
1664 CTDB_NO_MEMORY(ctdb, killtcp);
1667 killtcp->ctdb = ctdb;
1668 killtcp->capture_fd = -1;
1669 killtcp->connections = trbt_create(killtcp, 0);
1671 vnn->killtcp = killtcp;
1672 talloc_set_destructor(killtcp, ctdb_killtcp_destructor);
1677 /* create a structure that describes this connection we want to
1678 RST and store it in killtcp->connections
1680 con = talloc(killtcp, struct ctdb_killtcp_con);
1681 CTDB_NO_MEMORY(ctdb, con);
1682 con->src_addr = src;
1683 con->dst_addr = dst;
1685 con->killtcp = killtcp;
1688 trbt_insertarray32_callback(killtcp->connections,
1689 KILLTCP_KEYLEN, killtcp_key(&con->dst_addr, &con->src_addr),
1690 add_killtcp_callback, con);
1693 If we dont have a socket to listen on yet we must create it
1695 if (killtcp->capture_fd == -1) {
1696 killtcp->capture_fd = ctdb_sys_open_capture_socket(vnn->iface, &killtcp->private_data);
1697 if (killtcp->capture_fd == -1) {
1698 DEBUG(DEBUG_CRIT,(__location__ " Failed to open capturing socket for killtcp\n"));
1704 if (killtcp->fde == NULL) {
1705 killtcp->fde = event_add_fd(ctdb->ev, killtcp, killtcp->capture_fd,
1706 EVENT_FD_READ | EVENT_FD_AUTOCLOSE,
1707 capture_tcp_handler, killtcp);
1709 /* We also need to set up some events to tickle all these connections
1710 until they are all reset
1712 event_add_timed(ctdb->ev, killtcp, timeval_current_ofs(1, 0),
1713 ctdb_tickle_sentenced_connections, killtcp);
1716 /* tickle him once now */
1725 talloc_free(vnn->killtcp);
1726 vnn->killtcp = NULL;
1731 kill a TCP connection.
1733 int32_t ctdb_control_kill_tcp(struct ctdb_context *ctdb, TDB_DATA indata)
1735 struct ctdb_control_killtcp *killtcp = (struct ctdb_control_killtcp *)indata.dptr;
1737 return ctdb_killtcp_add_connection(ctdb, &killtcp->src_addr, &killtcp->dst_addr);
1741 called by a daemon to inform us of the entire list of TCP tickles for
1742 a particular public address.
1743 this control should only be sent by the node that is currently serving
1744 that public address.
1746 int32_t ctdb_control_set_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata)
1748 struct ctdb_control_tcp_tickle_list *list = (struct ctdb_control_tcp_tickle_list *)indata.dptr;
1749 struct ctdb_tcp_array *tcparray;
1750 struct ctdb_vnn *vnn;
1752 /* We must at least have tickles.num or else we cant verify the size
1753 of the received data blob
1755 if (indata.dsize < offsetof(struct ctdb_control_tcp_tickle_list,
1756 tickles.connections)) {
1757 DEBUG(DEBUG_ERR,("Bad indata in ctdb_control_set_tcp_tickle_list. Not enough data for the tickle.num field\n"));
1761 /* verify that the size of data matches what we expect */
1762 if (indata.dsize < offsetof(struct ctdb_control_tcp_tickle_list,
1763 tickles.connections)
1764 + sizeof(struct ctdb_tcp_connection)
1765 * list->tickles.num) {
1766 DEBUG(DEBUG_ERR,("Bad indata in ctdb_control_set_tcp_tickle_list\n"));
1770 vnn = find_public_ip_vnn(ctdb, &list->addr);
1772 DEBUG(DEBUG_INFO,(__location__ " Could not set tcp tickle list, '%s' is not a public address\n",
1773 ctdb_addr_to_str(&list->addr)));
1778 /* remove any old ticklelist we might have */
1779 talloc_free(vnn->tcp_array);
1780 vnn->tcp_array = NULL;
1782 tcparray = talloc(ctdb->nodes, struct ctdb_tcp_array);
1783 CTDB_NO_MEMORY(ctdb, tcparray);
1785 tcparray->num = list->tickles.num;
1787 tcparray->connections = talloc_array(tcparray, struct ctdb_tcp_connection, tcparray->num);
1788 CTDB_NO_MEMORY(ctdb, tcparray->connections);
1790 memcpy(tcparray->connections, &list->tickles.connections[0],
1791 sizeof(struct ctdb_tcp_connection)*tcparray->num);
1793 /* We now have a new fresh tickle list array for this vnn */
1794 vnn->tcp_array = talloc_steal(vnn, tcparray);
1800 called to return the full list of tickles for the puclic address associated
1801 with the provided vnn
1803 int32_t ctdb_control_get_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata)
1805 ctdb_sock_addr *addr = (ctdb_sock_addr *)indata.dptr;
1806 struct ctdb_control_tcp_tickle_list *list;
1807 struct ctdb_tcp_array *tcparray;
1809 struct ctdb_vnn *vnn;
1811 vnn = find_public_ip_vnn(ctdb, addr);
1813 DEBUG(DEBUG_ERR,(__location__ " Could not get tcp tickle list, '%s' is not a public address\n",
1814 ctdb_addr_to_str(addr)));
1819 tcparray = vnn->tcp_array;
1821 num = tcparray->num;
1826 outdata->dsize = offsetof(struct ctdb_control_tcp_tickle_list,
1827 tickles.connections)
1828 + sizeof(struct ctdb_tcp_connection) * num;
1830 outdata->dptr = talloc_size(outdata, outdata->dsize);
1831 CTDB_NO_MEMORY(ctdb, outdata->dptr);
1832 list = (struct ctdb_control_tcp_tickle_list *)outdata->dptr;
1835 list->tickles.num = num;
1837 memcpy(&list->tickles.connections[0], tcparray->connections,
1838 sizeof(struct ctdb_tcp_connection) * num);
1846 set the list of all tcp tickles for a public address
1848 static int ctdb_ctrl_set_tcp_tickles(struct ctdb_context *ctdb,
1849 struct timeval timeout, uint32_t destnode,
1850 ctdb_sock_addr *addr,
1851 struct ctdb_tcp_array *tcparray)
1855 struct ctdb_control_tcp_tickle_list *list;
1858 num = tcparray->num;
1863 data.dsize = offsetof(struct ctdb_control_tcp_tickle_list,
1864 tickles.connections) +
1865 sizeof(struct ctdb_tcp_connection) * num;
1866 data.dptr = talloc_size(ctdb, data.dsize);
1867 CTDB_NO_MEMORY(ctdb, data.dptr);
1869 list = (struct ctdb_control_tcp_tickle_list *)data.dptr;
1871 list->tickles.num = num;
1873 memcpy(&list->tickles.connections[0], tcparray->connections, sizeof(struct ctdb_tcp_connection) * num);
1876 ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0,
1877 CTDB_CONTROL_SET_TCP_TICKLE_LIST,
1878 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
1880 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set tcp tickles failed\n"));
1884 talloc_free(data.dptr);
1891 perform tickle updates if required
1893 static void ctdb_update_tcp_tickles(struct event_context *ev,
1894 struct timed_event *te,
1895 struct timeval t, void *private_data)
1897 struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
1899 struct ctdb_vnn *vnn;
1901 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1902 /* we only send out updates for public addresses that
1905 if (ctdb->pnn != vnn->pnn) {
1908 /* We only send out the updates if we need to */
1909 if (!vnn->tcp_update_needed) {
1912 ret = ctdb_ctrl_set_tcp_tickles(ctdb,
1914 CTDB_BROADCAST_CONNECTED,
1915 &vnn->public_address,
1918 DEBUG(DEBUG_ERR,("Failed to send the tickle update for public address %s\n",
1919 ctdb_addr_to_str(&vnn->public_address)));
1923 event_add_timed(ctdb->ev, ctdb->tickle_update_context,
1924 timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
1925 ctdb_update_tcp_tickles, ctdb);
1930 start periodic update of tcp tickles
1932 void ctdb_start_tcp_tickle_update(struct ctdb_context *ctdb)
1934 ctdb->tickle_update_context = talloc_new(ctdb);
1936 event_add_timed(ctdb->ev, ctdb->tickle_update_context,
1937 timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
1938 ctdb_update_tcp_tickles, ctdb);
1944 struct control_gratious_arp {
1945 struct ctdb_context *ctdb;
1946 ctdb_sock_addr addr;
1952 send a control_gratuitous arp
1954 static void send_gratious_arp(struct event_context *ev, struct timed_event *te,
1955 struct timeval t, void *private_data)
1958 struct control_gratious_arp *arp = talloc_get_type(private_data,
1959 struct control_gratious_arp);
1961 ret = ctdb_sys_send_arp(&arp->addr, arp->iface);
1963 DEBUG(DEBUG_ERR,(__location__ " sending of gratious arp failed (%s)\n", strerror(errno)));
1968 if (arp->count == CTDB_ARP_REPEAT) {
1973 event_add_timed(arp->ctdb->ev, arp,
1974 timeval_current_ofs(CTDB_ARP_INTERVAL, 0),
1975 send_gratious_arp, arp);
1982 int32_t ctdb_control_send_gratious_arp(struct ctdb_context *ctdb, TDB_DATA indata)
1984 struct ctdb_control_gratious_arp *gratious_arp = (struct ctdb_control_gratious_arp *)indata.dptr;
1985 struct control_gratious_arp *arp;
1987 /* verify the size of indata */
1988 if (indata.dsize < offsetof(struct ctdb_control_gratious_arp, iface)) {
1989 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_gratious_arp structure. Got %u require %u bytes\n",
1990 (unsigned)indata.dsize,
1991 (unsigned)offsetof(struct ctdb_control_gratious_arp, iface)));
1995 ( offsetof(struct ctdb_control_gratious_arp, iface)
1996 + gratious_arp->len ) ){
1998 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
1999 "but should be %u bytes\n",
2000 (unsigned)indata.dsize,
2001 (unsigned)(offsetof(struct ctdb_control_gratious_arp, iface)+gratious_arp->len)));
2006 arp = talloc(ctdb, struct control_gratious_arp);
2007 CTDB_NO_MEMORY(ctdb, arp);
2010 arp->addr = gratious_arp->addr;
2011 arp->iface = talloc_strdup(arp, gratious_arp->iface);
2012 CTDB_NO_MEMORY(ctdb, arp->iface);
2015 event_add_timed(arp->ctdb->ev, arp,
2016 timeval_zero(), send_gratious_arp, arp);
2021 int32_t ctdb_control_add_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2023 struct ctdb_control_ip_iface *pub = (struct ctdb_control_ip_iface *)indata.dptr;
2026 /* verify the size of indata */
2027 if (indata.dsize < offsetof(struct ctdb_control_ip_iface, iface)) {
2028 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_ip_iface structure\n"));
2032 ( offsetof(struct ctdb_control_ip_iface, iface)
2035 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2036 "but should be %u bytes\n",
2037 (unsigned)indata.dsize,
2038 (unsigned)(offsetof(struct ctdb_control_ip_iface, iface)+pub->len)));
2042 return ctdb_add_public_address(ctdb, &pub->addr, pub->mask, &pub->iface[0]);
2046 called when releaseip event finishes for del_public_address
2048 static void delete_ip_callback(struct ctdb_context *ctdb, int status,
2051 talloc_free(private_data);
2054 int32_t ctdb_control_del_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2056 struct ctdb_control_ip_iface *pub = (struct ctdb_control_ip_iface *)indata.dptr;
2057 struct ctdb_vnn *vnn;
2060 /* verify the size of indata */
2061 if (indata.dsize < offsetof(struct ctdb_control_ip_iface, iface)) {
2062 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_ip_iface structure\n"));
2066 ( offsetof(struct ctdb_control_ip_iface, iface)
2069 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2070 "but should be %u bytes\n",
2071 (unsigned)indata.dsize,
2072 (unsigned)(offsetof(struct ctdb_control_ip_iface, iface)+pub->len)));
2076 /* walk over all public addresses until we find a match */
2077 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2078 if (ctdb_same_ip(&vnn->public_address, &pub->addr)) {
2079 TALLOC_CTX *mem_ctx = talloc_new(ctdb);
2081 DLIST_REMOVE(ctdb->vnn, vnn);
2083 ret = ctdb_event_script_callback(ctdb,
2084 timeval_current_ofs(ctdb->tunable.script_timeout, 0),
2085 mem_ctx, delete_ip_callback, mem_ctx,
2086 "releaseip %s %s %u",
2088 talloc_strdup(mem_ctx, ctdb_addr_to_str(&vnn->public_address)),
2089 vnn->public_netmask_bits);