4 Copyright (C) Ronnie Sahlberg 2007
5 Copyright (C) Andrew Tridgell 2007
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, see <http://www.gnu.org/licenses/>.
21 #include "lib/events/events.h"
22 #include "lib/tdb/include/tdb.h"
23 #include "lib/util/dlinklist.h"
24 #include "system/network.h"
25 #include "system/filesys.h"
26 #include "system/wait.h"
27 #include "../include/ctdb_private.h"
28 #include "../common/rb_tree.h"
31 #define TAKEOVER_TIMEOUT() timeval_current_ofs(ctdb->tunable.takeover_timeout,0)
33 #define CTDB_ARP_INTERVAL 1
34 #define CTDB_ARP_REPEAT 3
36 struct ctdb_takeover_arp {
37 struct ctdb_context *ctdb;
40 struct ctdb_tcp_array *tcparray;
46 lists of tcp endpoints
48 struct ctdb_tcp_list {
49 struct ctdb_tcp_list *prev, *next;
50 struct ctdb_tcp_connection connection;
54 list of clients to kill on IP release
56 struct ctdb_client_ip {
57 struct ctdb_client_ip *prev, *next;
58 struct ctdb_context *ctdb;
67 static void ctdb_control_send_arp(struct event_context *ev, struct timed_event *te,
68 struct timeval t, void *private_data)
70 struct ctdb_takeover_arp *arp = talloc_get_type(private_data,
71 struct ctdb_takeover_arp);
73 struct ctdb_tcp_array *tcparray;
75 ret = ctdb_sys_send_arp(&arp->addr, arp->vnn->iface);
77 DEBUG(DEBUG_CRIT,(__location__ " sending of arp failed (%s)\n", strerror(errno)));
80 tcparray = arp->tcparray;
82 for (i=0;i<tcparray->num;i++) {
83 struct ctdb_tcp_connection *tcon;
85 tcon = &tcparray->connections[i];
86 DEBUG(DEBUG_INFO,("sending tcp tickle ack for %u->%s:%u\n",
87 (unsigned)ntohs(tcon->dst_addr.ip.sin_port),
88 ctdb_addr_to_str(&tcon->src_addr),
89 (unsigned)ntohs(tcon->src_addr.ip.sin_port)));
90 ret = ctdb_sys_send_tcp(
95 DEBUG(DEBUG_CRIT,(__location__ " Failed to send tcp tickle ack for %s\n",
96 ctdb_addr_to_str(&tcon->src_addr)));
103 if (arp->count == CTDB_ARP_REPEAT) {
108 event_add_timed(arp->ctdb->ev, arp->vnn->takeover_ctx,
109 timeval_current_ofs(CTDB_ARP_INTERVAL, 0),
110 ctdb_control_send_arp, arp);
113 struct takeover_callback_state {
114 struct ctdb_req_control *c;
115 ctdb_sock_addr *addr;
116 struct ctdb_vnn *vnn;
120 called when takeip event finishes
122 static void takeover_ip_callback(struct ctdb_context *ctdb, int status,
125 struct takeover_callback_state *state =
126 talloc_get_type(private_data, struct takeover_callback_state);
127 struct ctdb_takeover_arp *arp;
128 struct ctdb_tcp_array *tcparray;
131 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
132 ctdb_addr_to_str(state->addr),
134 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
139 if (!state->vnn->takeover_ctx) {
140 state->vnn->takeover_ctx = talloc_new(ctdb);
141 if (!state->vnn->takeover_ctx) {
146 arp = talloc_zero(state->vnn->takeover_ctx, struct ctdb_takeover_arp);
147 if (!arp) goto failed;
150 arp->addr = *state->addr;
151 arp->vnn = state->vnn;
153 tcparray = state->vnn->tcp_array;
155 /* add all of the known tcp connections for this IP to the
156 list of tcp connections to send tickle acks for */
157 arp->tcparray = talloc_steal(arp, tcparray);
159 state->vnn->tcp_array = NULL;
160 state->vnn->tcp_update_needed = true;
163 event_add_timed(arp->ctdb->ev, state->vnn->takeover_ctx,
164 timeval_zero(), ctdb_control_send_arp, arp);
166 /* the control succeeded */
167 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
172 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
178 Find the vnn of the node that has a public ip address
179 returns -1 if the address is not known as a public address
181 static struct ctdb_vnn *find_public_ip_vnn(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
183 struct ctdb_vnn *vnn;
185 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
186 if (ctdb_same_ip(&vnn->public_address, addr)) {
196 take over an ip address
198 int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb,
199 struct ctdb_req_control *c,
204 struct takeover_callback_state *state;
205 struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
206 struct ctdb_vnn *vnn;
208 /* update out vnn list */
209 vnn = find_public_ip_vnn(ctdb, &pip->addr);
211 DEBUG(DEBUG_ERR,("takeoverip called for an ip '%s' that is not a public address\n",
212 ctdb_addr_to_str(&pip->addr)));
217 /* if our kernel already has this IP, do nothing */
218 if (ctdb_sys_have_ip(&pip->addr)) {
222 state = talloc(ctdb, struct takeover_callback_state);
223 CTDB_NO_MEMORY(ctdb, state);
225 state->c = talloc_steal(ctdb, c);
226 state->addr = talloc(ctdb, ctdb_sock_addr);
227 CTDB_NO_MEMORY(ctdb, state->addr);
229 *state->addr = pip->addr;
232 DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u on interface %s\n",
233 ctdb_addr_to_str(&pip->addr),
234 vnn->public_netmask_bits,
237 ret = ctdb_event_script_callback(ctdb,
238 timeval_current_ofs(ctdb->tunable.script_timeout, 0),
239 state, takeover_ip_callback, state,
242 talloc_strdup(state, ctdb_addr_to_str(&pip->addr)),
243 vnn->public_netmask_bits);
246 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
247 ctdb_addr_to_str(&pip->addr),
253 /* tell ctdb_control.c that we will be replying asynchronously */
260 takeover an ip address old v4 style
262 int32_t ctdb_control_takeover_ipv4(struct ctdb_context *ctdb,
263 struct ctdb_req_control *c,
269 data.dsize = sizeof(struct ctdb_public_ip);
270 data.dptr = (uint8_t *)talloc_zero(c, struct ctdb_public_ip);
271 CTDB_NO_MEMORY(ctdb, data.dptr);
273 memcpy(data.dptr, indata.dptr, indata.dsize);
274 return ctdb_control_takeover_ip(ctdb, c, data, async_reply);
278 kill any clients that are registered with a IP that is being released
280 static void release_kill_clients(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
282 struct ctdb_client_ip *ip;
284 DEBUG(DEBUG_INFO,("release_kill_clients for ip %s\n",
285 ctdb_addr_to_str(addr)));
287 for (ip=ctdb->client_ip_list; ip; ip=ip->next) {
288 ctdb_sock_addr tmp_addr;
291 DEBUG(DEBUG_INFO,("checking for client %u with IP %s\n",
293 ctdb_addr_to_str(&ip->addr)));
295 if (ctdb_same_ip(&tmp_addr, addr)) {
296 struct ctdb_client *client = ctdb_reqid_find(ctdb,
299 DEBUG(DEBUG_INFO,("matched client %u with IP %s and pid %u\n",
301 ctdb_addr_to_str(&ip->addr),
304 if (client->pid != 0) {
305 DEBUG(DEBUG_INFO,(__location__ " Killing client pid %u for IP %s on client_id %u\n",
306 (unsigned)client->pid,
307 ctdb_addr_to_str(addr),
309 kill(client->pid, SIGKILL);
316 called when releaseip event finishes
318 static void release_ip_callback(struct ctdb_context *ctdb, int status,
321 struct takeover_callback_state *state =
322 talloc_get_type(private_data, struct takeover_callback_state);
325 /* send a message to all clients of this node telling them
326 that the cluster has been reconfigured and they should
327 release any sockets on this IP */
328 data.dptr = (uint8_t *)talloc_strdup(state, ctdb_addr_to_str(state->addr));
329 data.dsize = strlen((char *)data.dptr)+1;
331 DEBUG(DEBUG_INFO,(__location__ " sending RELEASE_IP for '%s'\n", data.dptr));
333 ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_RELEASE_IP, data);
335 /* kill clients that have registered with this IP */
336 release_kill_clients(ctdb, state->addr);
338 /* the control succeeded */
339 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
344 release an ip address
346 int32_t ctdb_control_release_ip(struct ctdb_context *ctdb,
347 struct ctdb_req_control *c,
352 struct takeover_callback_state *state;
353 struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
354 struct ctdb_vnn *vnn;
356 /* update our vnn list */
357 vnn = find_public_ip_vnn(ctdb, &pip->addr);
359 DEBUG(DEBUG_ERR,("takeoverip called for an ip '%s' that is not a public address\n",
360 ctdb_addr_to_str(&pip->addr)));
365 /* stop any previous arps */
366 talloc_free(vnn->takeover_ctx);
367 vnn->takeover_ctx = NULL;
369 if (!ctdb_sys_have_ip(&pip->addr)) {
370 DEBUG(DEBUG_INFO,("Redundant release of IP %s/%u on interface %s (ip not held)\n",
371 ctdb_addr_to_str(&pip->addr),
372 vnn->public_netmask_bits,
377 DEBUG(DEBUG_NOTICE,("Release of IP %s/%u on interface %s\n",
378 ctdb_addr_to_str(&pip->addr),
379 vnn->public_netmask_bits,
382 state = talloc(ctdb, struct takeover_callback_state);
383 CTDB_NO_MEMORY(ctdb, state);
385 state->c = talloc_steal(state, c);
386 state->addr = talloc(state, ctdb_sock_addr);
387 CTDB_NO_MEMORY(ctdb, state->addr);
388 *state->addr = pip->addr;
391 ret = ctdb_event_script_callback(ctdb,
392 timeval_current_ofs(ctdb->tunable.script_timeout, 0),
393 state, release_ip_callback, state,
394 "releaseip %s %s %u",
396 talloc_strdup(state, ctdb_addr_to_str(&pip->addr)),
397 vnn->public_netmask_bits);
399 DEBUG(DEBUG_ERR,(__location__ " Failed to release IP %s on interface %s\n",
400 ctdb_addr_to_str(&pip->addr),
406 /* tell the control that we will be reply asynchronously */
412 release an ip address old v4 style
414 int32_t ctdb_control_release_ipv4(struct ctdb_context *ctdb,
415 struct ctdb_req_control *c,
421 data.dsize = sizeof(struct ctdb_public_ip);
422 data.dptr = (uint8_t *)talloc_zero(c, struct ctdb_public_ip);
423 CTDB_NO_MEMORY(ctdb, data.dptr);
425 memcpy(data.dptr, indata.dptr, indata.dsize);
426 return ctdb_control_release_ip(ctdb, c, data, async_reply);
430 static int ctdb_add_public_address(struct ctdb_context *ctdb, ctdb_sock_addr *addr, unsigned mask, const char *iface)
432 struct ctdb_vnn *vnn;
434 /* Verify that we dont have an entry for this ip yet */
435 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
436 if (ctdb_same_sockaddr(addr, &vnn->public_address)) {
437 DEBUG(DEBUG_CRIT,("Same ip '%s' specified multiple times in the public address list \n",
438 ctdb_addr_to_str(addr)));
443 /* create a new vnn structure for this ip address */
444 vnn = talloc_zero(ctdb, struct ctdb_vnn);
445 CTDB_NO_MEMORY_FATAL(ctdb, vnn);
446 vnn->iface = talloc_strdup(vnn, iface);
447 vnn->public_address = *addr;
448 vnn->public_netmask_bits = mask;
451 DLIST_ADD(ctdb->vnn, vnn);
458 setup the event script directory
460 int ctdb_set_event_script_dir(struct ctdb_context *ctdb, const char *script_dir)
462 ctdb->event_script_dir = talloc_strdup(ctdb, script_dir);
463 CTDB_NO_MEMORY(ctdb, ctdb->event_script_dir);
468 setup the public address lists from a file
470 int ctdb_set_public_addresses(struct ctdb_context *ctdb, const char *alist)
476 lines = file_lines_load(alist, &nlines, ctdb);
478 ctdb_set_error(ctdb, "Failed to load public address list '%s'\n", alist);
481 while (nlines > 0 && strcmp(lines[nlines-1], "") == 0) {
485 for (i=0;i<nlines;i++) {
493 while ((*line == ' ') || (*line == '\t')) {
499 if (strcmp(line, "") == 0) {
502 tok = strtok(line, " \t");
504 tok = strtok(NULL, " \t");
506 if (NULL == ctdb->default_public_interface) {
507 DEBUG(DEBUG_CRIT,("No default public interface and no interface specified at line %u of public address list\n",
512 iface = ctdb->default_public_interface;
517 if (!addrstr || !parse_ip_mask(addrstr, iface, &addr, &mask)) {
518 DEBUG(DEBUG_CRIT,("Badly formed line %u in public address list\n", i+1));
522 if (ctdb_add_public_address(ctdb, &addr, mask, iface)) {
523 DEBUG(DEBUG_CRIT,("Failed to add line %u to the public address list\n", i+1));
536 struct ctdb_public_ip_list {
537 struct ctdb_public_ip_list *next;
543 /* Given a physical node, return the number of
544 public addresses that is currently assigned to this node.
546 static int node_ip_coverage(struct ctdb_context *ctdb,
548 struct ctdb_public_ip_list *ips)
552 for (;ips;ips=ips->next) {
553 if (ips->pnn == pnn) {
561 /* Check if this is a public ip known to the node, i.e. can that
562 node takeover this ip ?
564 static int can_node_serve_ip(struct ctdb_context *ctdb, int32_t pnn,
565 struct ctdb_public_ip_list *ip)
567 struct ctdb_all_public_ips *public_ips;
570 public_ips = ctdb->nodes[pnn]->public_ips;
572 if (public_ips == NULL) {
576 for (i=0;i<public_ips->num;i++) {
577 if (ctdb_same_ip(&ip->addr, &public_ips->ips[i].addr)) {
578 /* yes, this node can serve this public ip */
587 /* search the node lists list for a node to takeover this ip.
588 pick the node that currently are serving the least number of ips
589 so that the ips get spread out evenly.
591 static int find_takeover_node(struct ctdb_context *ctdb,
592 struct ctdb_node_map *nodemap, uint32_t mask,
593 struct ctdb_public_ip_list *ip,
594 struct ctdb_public_ip_list *all_ips)
600 for (i=0;i<nodemap->num;i++) {
601 if (nodemap->nodes[i].flags & mask) {
602 /* This node is not healty and can not be used to serve
608 /* verify that this node can serve this ip */
609 if (can_node_serve_ip(ctdb, i, ip)) {
610 /* no it couldnt so skip to the next node */
614 num = node_ip_coverage(ctdb, i, all_ips);
615 /* was this the first node we checked ? */
627 DEBUG(DEBUG_WARNING,(__location__ " Could not find node to take over public address '%s'\n",
628 ctdb_addr_to_str(&ip->addr)));
637 struct ctdb_public_ip_list *
638 add_ip_to_merged_list(struct ctdb_context *ctdb,
640 struct ctdb_public_ip_list *ip_list,
641 struct ctdb_public_ip *ip)
643 struct ctdb_public_ip_list *tmp_ip;
645 /* do we already have this ip in our merged list ?*/
646 for (tmp_ip=ip_list;tmp_ip;tmp_ip=tmp_ip->next) {
648 /* we already have this public ip in the list */
649 if (ctdb_same_ip(&tmp_ip->addr, &ip->addr)) {
654 /* this is a new public ip, we must add it to the list */
655 tmp_ip = talloc_zero(tmp_ctx, struct ctdb_public_ip_list);
656 CTDB_NO_MEMORY_NULL(ctdb, tmp_ip);
657 tmp_ip->pnn = ip->pnn;
658 tmp_ip->addr = ip->addr;
659 tmp_ip->next = ip_list;
664 struct ctdb_public_ip_list *
665 create_merged_ip_list(struct ctdb_context *ctdb, TALLOC_CTX *tmp_ctx)
668 struct ctdb_public_ip_list *ip_list = NULL;
669 struct ctdb_all_public_ips *public_ips;
671 for (i=0;i<ctdb->num_nodes;i++) {
672 public_ips = ctdb->nodes[i]->public_ips;
674 /* there were no public ips for this node */
675 if (public_ips == NULL) {
679 for (j=0;j<public_ips->num;j++) {
680 ip_list = add_ip_to_merged_list(ctdb, tmp_ctx,
681 ip_list, &public_ips->ips[j]);
689 make any IP alias changes for public addresses that are necessary
691 int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap)
693 int i, num_healthy, retries;
694 struct ctdb_public_ip ip;
695 struct ctdb_public_ipv4 ipv4;
697 struct ctdb_public_ip_list *all_ips, *tmp_ip;
698 int maxnode, maxnum=0, minnode, minnum=0, num;
700 struct timeval timeout;
701 struct client_async_data *async_data;
702 struct ctdb_client_control_state *state;
703 TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
708 /* Count how many completely healthy nodes we have */
710 for (i=0;i<nodemap->num;i++) {
711 if (!(nodemap->nodes[i].flags & (NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED))) {
716 if (num_healthy > 0) {
717 /* We have healthy nodes, so only consider them for
718 serving public addresses
720 mask = NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED;
722 /* We didnt have any completely healthy nodes so
723 use "disabled" nodes as a fallback
725 mask = NODE_FLAGS_INACTIVE;
728 /* since nodes only know about those public addresses that
729 can be served by that particular node, no single node has
730 a full list of all public addresses that exist in the cluster.
731 Walk over all node structures and create a merged list of
732 all public addresses that exist in the cluster.
734 all_ips = create_merged_ip_list(ctdb, tmp_ctx);
736 /* If we want deterministic ip allocations, i.e. that the ip addresses
737 will always be allocated the same way for a specific set of
738 available/unavailable nodes.
740 if (1 == ctdb->tunable.deterministic_public_ips) {
741 DEBUG(DEBUG_NOTICE,("Deterministic IPs enabled. Resetting all ip allocations\n"));
742 for (i=0,tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next,i++) {
743 tmp_ip->pnn = i%nodemap->num;
748 /* mark all public addresses with a masked node as being served by
751 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
752 if (tmp_ip->pnn == -1) {
755 if (nodemap->nodes[tmp_ip->pnn].flags & mask) {
760 /* verify that the assigned nodes can serve that public ip
761 and set it to -1 if not
763 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
764 if (tmp_ip->pnn == -1) {
767 if (can_node_serve_ip(ctdb, tmp_ip->pnn, tmp_ip) != 0) {
768 /* this node can not serve this ip. */
774 /* now we must redistribute all public addresses with takeover node
775 -1 among the nodes available
779 /* loop over all ip's and find a physical node to cover for
782 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
783 if (tmp_ip->pnn == -1) {
784 if (find_takeover_node(ctdb, nodemap, mask, tmp_ip, all_ips)) {
785 DEBUG(DEBUG_WARNING,("Failed to find node to cover ip %s\n",
786 ctdb_addr_to_str(&tmp_ip->addr)));
791 /* If we dont want ips to fail back after a node becomes healthy
792 again, we wont even try to reallocat the ip addresses so that
793 they are evenly spread out.
794 This can NOT be used at the same time as DeterministicIPs !
796 if (1 == ctdb->tunable.no_ip_failback) {
797 if (1 == ctdb->tunable.deterministic_public_ips) {
798 DEBUG(DEBUG_ERR, ("ERROR: You can not use 'DeterministicIPs' and 'NoIPFailback' at the same time\n"));
804 /* now, try to make sure the ip adresses are evenly distributed
806 for each ip address, loop over all nodes that can serve this
807 ip and make sure that the difference between the node
808 serving the most and the node serving the least ip's are not greater
811 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
812 if (tmp_ip->pnn == -1) {
816 /* Get the highest and lowest number of ips's served by any
817 valid node which can serve this ip.
821 for (i=0;i<nodemap->num;i++) {
822 if (nodemap->nodes[i].flags & mask) {
826 /* only check nodes that can actually serve this ip */
827 if (can_node_serve_ip(ctdb, i, tmp_ip)) {
828 /* no it couldnt so skip to the next node */
832 num = node_ip_coverage(ctdb, i, all_ips);
853 DEBUG(DEBUG_WARNING,(__location__ " Could not find maxnode. May not be able to serve ip '%s'\n",
854 ctdb_addr_to_str(&tmp_ip->addr)));
859 /* If we want deterministic IPs then dont try to reallocate
860 them to spread out the load.
862 if (1 == ctdb->tunable.deterministic_public_ips) {
866 /* if the spread between the smallest and largest coverage by
867 a node is >=2 we steal one of the ips from the node with
868 most coverage to even things out a bit.
869 try to do this at most 5 times since we dont want to spend
870 too much time balancing the ip coverage.
872 if ( (maxnum > minnum+1)
874 struct ctdb_public_ip_list *tmp;
876 /* mark one of maxnode's vnn's as unassigned and try
879 for (tmp=all_ips;tmp;tmp=tmp->next) {
880 if (tmp->pnn == maxnode) {
890 /* finished distributing the public addresses, now just send the
891 info out to the nodes
895 /* at this point ->pnn is the node which will own each IP
896 or -1 if there is no node that can cover this ip
899 /* now tell all nodes to delete any alias that they should not
900 have. This will be a NOOP on nodes that don't currently
901 hold the given alias */
902 async_data = talloc_zero(tmp_ctx, struct client_async_data);
903 CTDB_NO_MEMORY_FATAL(ctdb, async_data);
905 for (i=0;i<nodemap->num;i++) {
906 /* don't talk to unconnected nodes, but do talk to banned nodes */
907 if (nodemap->nodes[i].flags & NODE_FLAGS_DISCONNECTED) {
911 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
912 if (tmp_ip->pnn == nodemap->nodes[i].pnn) {
913 /* This node should be serving this
914 vnn so dont tell it to release the ip
918 if (tmp_ip->addr.sa.sa_family == AF_INET) {
919 ipv4.pnn = tmp_ip->pnn;
920 ipv4.sin = tmp_ip->addr.ip;
922 timeout = TAKEOVER_TIMEOUT();
923 data.dsize = sizeof(ipv4);
924 data.dptr = (uint8_t *)&ipv4;
925 state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
926 0, CTDB_CONTROL_RELEASE_IPv4, 0,
930 ip.pnn = tmp_ip->pnn;
931 ip.addr = tmp_ip->addr;
933 timeout = TAKEOVER_TIMEOUT();
934 data.dsize = sizeof(ip);
935 data.dptr = (uint8_t *)&ip;
936 state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
937 0, CTDB_CONTROL_RELEASE_IP, 0,
943 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_RELEASE_IP to node %u\n", nodemap->nodes[i].pnn));
944 talloc_free(tmp_ctx);
948 ctdb_client_async_add(async_data, state);
951 if (ctdb_client_async_wait(ctdb, async_data) != 0) {
952 DEBUG(DEBUG_ERR,(__location__ " Async control CTDB_CONTROL_RELEASE_IP failed\n"));
953 talloc_free(tmp_ctx);
956 talloc_free(async_data);
959 /* tell all nodes to get their own IPs */
960 async_data = talloc_zero(tmp_ctx, struct client_async_data);
961 CTDB_NO_MEMORY_FATAL(ctdb, async_data);
962 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
963 if (tmp_ip->pnn == -1) {
964 /* this IP won't be taken over */
968 if (tmp_ip->addr.sa.sa_family == AF_INET) {
969 ipv4.pnn = tmp_ip->pnn;
970 ipv4.sin = tmp_ip->addr.ip;
972 timeout = TAKEOVER_TIMEOUT();
973 data.dsize = sizeof(ipv4);
974 data.dptr = (uint8_t *)&ipv4;
975 state = ctdb_control_send(ctdb, tmp_ip->pnn,
976 0, CTDB_CONTROL_TAKEOVER_IPv4, 0,
980 ip.pnn = tmp_ip->pnn;
981 ip.addr = tmp_ip->addr;
983 timeout = TAKEOVER_TIMEOUT();
984 data.dsize = sizeof(ip);
985 data.dptr = (uint8_t *)&ip;
986 state = ctdb_control_send(ctdb, tmp_ip->pnn,
987 0, CTDB_CONTROL_TAKEOVER_IP, 0,
992 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_TAKEOVER_IP to node %u\n", tmp_ip->pnn));
993 talloc_free(tmp_ctx);
997 ctdb_client_async_add(async_data, state);
999 if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1000 DEBUG(DEBUG_ERR,(__location__ " Async control CTDB_CONTROL_TAKEOVER_IP failed\n"));
1001 talloc_free(tmp_ctx);
1005 talloc_free(tmp_ctx);
1011 destroy a ctdb_client_ip structure
1013 static int ctdb_client_ip_destructor(struct ctdb_client_ip *ip)
1015 DEBUG(DEBUG_DEBUG,("destroying client tcp for %s:%u (client_id %u)\n",
1016 ctdb_addr_to_str(&ip->addr),
1017 ntohs(ip->addr.ip.sin_port),
1020 DLIST_REMOVE(ip->ctdb->client_ip_list, ip);
1025 called by a client to inform us of a TCP connection that it is managing
1026 that should tickled with an ACK when IP takeover is done
1027 we handle both the old ipv4 style of packets as well as the new ipv4/6
1030 int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id,
1033 struct ctdb_client *client = ctdb_reqid_find(ctdb, client_id, struct ctdb_client);
1034 struct ctdb_control_tcp *old_addr = NULL;
1035 struct ctdb_control_tcp_addr new_addr;
1036 struct ctdb_control_tcp_addr *tcp_sock = NULL;
1037 struct ctdb_tcp_list *tcp;
1038 struct ctdb_control_tcp_vnn t;
1041 struct ctdb_client_ip *ip;
1042 struct ctdb_vnn *vnn;
1043 ctdb_sock_addr addr;
1045 switch (indata.dsize) {
1046 case sizeof(struct ctdb_control_tcp):
1047 old_addr = (struct ctdb_control_tcp *)indata.dptr;
1048 ZERO_STRUCT(new_addr);
1049 tcp_sock = &new_addr;
1050 tcp_sock->src.ip = old_addr->src;
1051 tcp_sock->dest.ip = old_addr->dest;
1053 case sizeof(struct ctdb_control_tcp_addr):
1054 tcp_sock = (struct ctdb_control_tcp_addr *)indata.dptr;
1057 DEBUG(DEBUG_ERR,(__location__ " Invalid data structure passed to ctdb_control_tcp_client. size was %d but only allowed sizes are %lu and %lu\n", (int)indata.dsize, sizeof(struct ctdb_control_tcp), sizeof(struct ctdb_control_tcp_addr)));
1061 addr = tcp_sock->src;
1062 ctdb_canonicalize_ip(&addr, &tcp_sock->src);
1063 addr = tcp_sock->dest;
1064 ctdb_canonicalize_ip(&addr, &tcp_sock->dest);
1067 memcpy(&addr, &tcp_sock->dest, sizeof(addr));
1068 vnn = find_public_ip_vnn(ctdb, &addr);
1070 switch (addr.sa.sa_family) {
1072 if (ntohl(addr.ip.sin_addr.s_addr) != INADDR_LOOPBACK) {
1073 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public address.\n",
1074 ctdb_addr_to_str(&addr)));
1078 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public ipv6 address.\n",
1079 ctdb_addr_to_str(&addr)));
1082 DEBUG(DEBUG_ERR,(__location__ " Unknown family type %d\n", addr.sa.sa_family));
1088 if (vnn->pnn != ctdb->pnn) {
1089 DEBUG(DEBUG_ERR,("Attempt to register tcp client for IP %s we don't hold - failing (client_id %u pid %u)\n",
1090 ctdb_addr_to_str(&addr),
1091 client_id, client->pid));
1092 /* failing this call will tell smbd to die */
1096 ip = talloc(client, struct ctdb_client_ip);
1097 CTDB_NO_MEMORY(ctdb, ip);
1101 ip->client_id = client_id;
1102 talloc_set_destructor(ip, ctdb_client_ip_destructor);
1103 DLIST_ADD(ctdb->client_ip_list, ip);
1105 tcp = talloc(client, struct ctdb_tcp_list);
1106 CTDB_NO_MEMORY(ctdb, tcp);
1108 tcp->connection.src_addr = tcp_sock->src;
1109 tcp->connection.dst_addr = tcp_sock->dest;
1111 DLIST_ADD(client->tcp_list, tcp);
1113 t.src = tcp_sock->src;
1114 t.dest = tcp_sock->dest;
1116 data.dptr = (uint8_t *)&t;
1117 data.dsize = sizeof(t);
1119 switch (addr.sa.sa_family) {
1121 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1122 (unsigned)ntohs(tcp_sock->dest.ip.sin_port),
1123 ctdb_addr_to_str(&tcp_sock->src),
1124 (unsigned)ntohs(tcp_sock->src.ip.sin_port), client_id, client->pid));
1127 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1128 (unsigned)ntohs(tcp_sock->dest.ip6.sin6_port),
1129 ctdb_addr_to_str(&tcp_sock->src),
1130 (unsigned)ntohs(tcp_sock->src.ip6.sin6_port), client_id, client->pid));
1133 DEBUG(DEBUG_ERR,(__location__ " Unknown family %d\n", addr.sa.sa_family));
1137 /* tell all nodes about this tcp connection */
1138 ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0,
1139 CTDB_CONTROL_TCP_ADD,
1140 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
1142 DEBUG(DEBUG_ERR,(__location__ " Failed to send CTDB_CONTROL_TCP_ADD\n"));
1150 find a tcp address on a list
1152 static struct ctdb_tcp_connection *ctdb_tcp_find(struct ctdb_tcp_array *array,
1153 struct ctdb_tcp_connection *tcp)
1157 if (array == NULL) {
1161 for (i=0;i<array->num;i++) {
1162 if (ctdb_same_sockaddr(&array->connections[i].src_addr, &tcp->src_addr) &&
1163 ctdb_same_sockaddr(&array->connections[i].dst_addr, &tcp->dst_addr)) {
1164 return &array->connections[i];
1171 called by a daemon to inform us of a TCP connection that one of its
1172 clients managing that should tickled with an ACK when IP takeover is
1175 int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata)
1177 struct ctdb_control_tcp_vnn *p = (struct ctdb_control_tcp_vnn *)indata.dptr;
1178 struct ctdb_tcp_array *tcparray;
1179 struct ctdb_tcp_connection tcp;
1180 struct ctdb_vnn *vnn;
1182 vnn = find_public_ip_vnn(ctdb, &p->dest);
1184 DEBUG(DEBUG_INFO,(__location__ " got TCP_ADD control for an address which is not a public address '%s'\n",
1185 ctdb_addr_to_str(&p->dest)));
1191 tcparray = vnn->tcp_array;
1193 /* If this is the first tickle */
1194 if (tcparray == NULL) {
1195 tcparray = talloc_size(ctdb->nodes,
1196 offsetof(struct ctdb_tcp_array, connections) +
1197 sizeof(struct ctdb_tcp_connection) * 1);
1198 CTDB_NO_MEMORY(ctdb, tcparray);
1199 vnn->tcp_array = tcparray;
1202 tcparray->connections = talloc_size(tcparray, sizeof(struct ctdb_tcp_connection));
1203 CTDB_NO_MEMORY(ctdb, tcparray->connections);
1205 tcparray->connections[tcparray->num].src_addr = p->src;
1206 tcparray->connections[tcparray->num].dst_addr = p->dest;
1212 /* Do we already have this tickle ?*/
1213 tcp.src_addr = p->src;
1214 tcp.dst_addr = p->dest;
1215 if (ctdb_tcp_find(vnn->tcp_array, &tcp) != NULL) {
1216 DEBUG(DEBUG_DEBUG,("Already had tickle info for %s:%u for vnn:%u\n",
1217 ctdb_addr_to_str(&tcp.dst_addr),
1218 ntohs(tcp.dst_addr.ip.sin_port),
1223 /* A new tickle, we must add it to the array */
1224 tcparray->connections = talloc_realloc(tcparray, tcparray->connections,
1225 struct ctdb_tcp_connection,
1227 CTDB_NO_MEMORY(ctdb, tcparray->connections);
1229 vnn->tcp_array = tcparray;
1230 tcparray->connections[tcparray->num].src_addr = p->src;
1231 tcparray->connections[tcparray->num].dst_addr = p->dest;
1234 DEBUG(DEBUG_INFO,("Added tickle info for %s:%u from vnn %u\n",
1235 ctdb_addr_to_str(&tcp.dst_addr),
1236 ntohs(tcp.dst_addr.ip.sin_port),
1244 called by a daemon to inform us of a TCP connection that one of its
1245 clients managing that should tickled with an ACK when IP takeover is
1248 static void ctdb_remove_tcp_connection(struct ctdb_context *ctdb, struct ctdb_tcp_connection *conn)
1250 struct ctdb_tcp_connection *tcpp;
1251 struct ctdb_vnn *vnn = find_public_ip_vnn(ctdb, &conn->dst_addr);
1254 DEBUG(DEBUG_ERR,(__location__ " unable to find public address %s\n",
1255 ctdb_addr_to_str(&conn->dst_addr)));
1259 /* if the array is empty we cant remove it
1260 and we dont need to do anything
1262 if (vnn->tcp_array == NULL) {
1263 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist (array is empty) %s:%u\n",
1264 ctdb_addr_to_str(&conn->dst_addr),
1265 ntohs(conn->dst_addr.ip.sin_port)));
1270 /* See if we know this connection
1271 if we dont know this connection then we dont need to do anything
1273 tcpp = ctdb_tcp_find(vnn->tcp_array, conn);
1275 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist %s:%u\n",
1276 ctdb_addr_to_str(&conn->dst_addr),
1277 ntohs(conn->dst_addr.ip.sin_port)));
1282 /* We need to remove this entry from the array.
1283 Instead of allocating a new array and copying data to it
1284 we cheat and just copy the last entry in the existing array
1285 to the entry that is to be removed and just shring the
1288 *tcpp = vnn->tcp_array->connections[vnn->tcp_array->num - 1];
1289 vnn->tcp_array->num--;
1291 /* If we deleted the last entry we also need to remove the entire array
1293 if (vnn->tcp_array->num == 0) {
1294 talloc_free(vnn->tcp_array);
1295 vnn->tcp_array = NULL;
1298 vnn->tcp_update_needed = true;
1300 DEBUG(DEBUG_INFO,("Removed tickle info for %s:%u\n",
1301 ctdb_addr_to_str(&conn->src_addr),
1302 ntohs(conn->src_addr.ip.sin_port)));
1307 called when a daemon restarts - send all tickes for all public addresses
1308 we are serving immediately to the new node.
1310 int32_t ctdb_control_startup(struct ctdb_context *ctdb, uint32_t vnn)
1312 /*XXX here we should send all tickes we are serving to the new node */
1318 called when a client structure goes away - hook to remove
1319 elements from the tcp_list in all daemons
1321 void ctdb_takeover_client_destructor_hook(struct ctdb_client *client)
1323 while (client->tcp_list) {
1324 struct ctdb_tcp_list *tcp = client->tcp_list;
1325 DLIST_REMOVE(client->tcp_list, tcp);
1326 ctdb_remove_tcp_connection(client->ctdb, &tcp->connection);
1332 release all IPs on shutdown
1334 void ctdb_release_all_ips(struct ctdb_context *ctdb)
1336 struct ctdb_vnn *vnn;
1338 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1339 if (!ctdb_sys_have_ip(&vnn->public_address)) {
1342 if (vnn->pnn == ctdb->pnn) {
1345 ctdb_event_script(ctdb, "releaseip %s %s %u",
1347 talloc_strdup(ctdb, ctdb_addr_to_str(&vnn->public_address)),
1348 vnn->public_netmask_bits);
1349 release_kill_clients(ctdb, &vnn->public_address);
1355 get list of public IPs
1357 int32_t ctdb_control_get_public_ips(struct ctdb_context *ctdb,
1358 struct ctdb_req_control *c, TDB_DATA *outdata)
1361 struct ctdb_all_public_ips *ips;
1362 struct ctdb_vnn *vnn;
1364 /* count how many public ip structures we have */
1366 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1370 len = offsetof(struct ctdb_all_public_ips, ips) +
1371 num*sizeof(struct ctdb_public_ip);
1372 ips = talloc_zero_size(outdata, len);
1373 CTDB_NO_MEMORY(ctdb, ips);
1375 outdata->dsize = len;
1376 outdata->dptr = (uint8_t *)ips;
1380 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1381 ips->ips[i].pnn = vnn->pnn;
1382 ips->ips[i].addr = vnn->public_address;
1391 get list of public IPs, old ipv4 style. only returns ipv4 addresses
1393 int32_t ctdb_control_get_public_ipsv4(struct ctdb_context *ctdb,
1394 struct ctdb_req_control *c, TDB_DATA *outdata)
1397 struct ctdb_all_public_ipsv4 *ips;
1398 struct ctdb_vnn *vnn;
1400 /* count how many public ip structures we have */
1402 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1403 if (vnn->public_address.sa.sa_family != AF_INET) {
1409 len = offsetof(struct ctdb_all_public_ipsv4, ips) +
1410 num*sizeof(struct ctdb_public_ipv4);
1411 ips = talloc_zero_size(outdata, len);
1412 CTDB_NO_MEMORY(ctdb, ips);
1414 outdata->dsize = len;
1415 outdata->dptr = (uint8_t *)ips;
1419 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1420 if (vnn->public_address.sa.sa_family != AF_INET) {
1423 ips->ips[i].pnn = vnn->pnn;
1424 ips->ips[i].sin = vnn->public_address.ip;
1433 structure containing the listening socket and the list of tcp connections
1434 that the ctdb daemon is to kill
1436 struct ctdb_kill_tcp {
1437 struct ctdb_vnn *vnn;
1438 struct ctdb_context *ctdb;
1440 struct fd_event *fde;
1441 trbt_tree_t *connections;
1446 a tcp connection that is to be killed
1448 struct ctdb_killtcp_con {
1449 ctdb_sock_addr src_addr;
1450 ctdb_sock_addr dst_addr;
1452 struct ctdb_kill_tcp *killtcp;
1455 /* this function is used to create a key to represent this socketpair
1456 in the killtcp tree.
1457 this key is used to insert and lookup matching socketpairs that are
1458 to be tickled and RST
1460 #define KILLTCP_KEYLEN 10
1461 static uint32_t *killtcp_key(ctdb_sock_addr *src, ctdb_sock_addr *dst)
1463 static uint32_t key[KILLTCP_KEYLEN];
1465 bzero(key, sizeof(key));
1467 if (src->sa.sa_family != dst->sa.sa_family) {
1468 DEBUG(DEBUG_ERR, (__location__ " ERROR, different families passed :%u vs %u\n", src->sa.sa_family, dst->sa.sa_family));
1472 switch (src->sa.sa_family) {
1474 key[0] = dst->ip.sin_addr.s_addr;
1475 key[1] = src->ip.sin_addr.s_addr;
1476 key[2] = dst->ip.sin_port;
1477 key[3] = src->ip.sin_port;
1480 key[0] = dst->ip6.sin6_addr.s6_addr32[3];
1481 key[1] = src->ip6.sin6_addr.s6_addr32[3];
1482 key[2] = dst->ip6.sin6_addr.s6_addr32[2];
1483 key[3] = src->ip6.sin6_addr.s6_addr32[2];
1484 key[4] = dst->ip6.sin6_addr.s6_addr32[1];
1485 key[5] = src->ip6.sin6_addr.s6_addr32[1];
1486 key[6] = dst->ip6.sin6_addr.s6_addr32[0];
1487 key[7] = src->ip6.sin6_addr.s6_addr32[0];
1488 key[8] = dst->ip6.sin6_port;
1489 key[9] = src->ip6.sin6_port;
1492 DEBUG(DEBUG_ERR, (__location__ " ERROR, unknown family passed :%u\n", src->sa.sa_family));
1500 called when we get a read event on the raw socket
1502 static void capture_tcp_handler(struct event_context *ev, struct fd_event *fde,
1503 uint16_t flags, void *private_data)
1505 struct ctdb_kill_tcp *killtcp = talloc_get_type(private_data, struct ctdb_kill_tcp);
1506 struct ctdb_killtcp_con *con;
1507 ctdb_sock_addr src, dst;
1508 uint32_t ack_seq, seq;
1510 if (!(flags & EVENT_FD_READ)) {
1514 if (ctdb_sys_read_tcp_packet(killtcp->capture_fd,
1515 killtcp->private_data,
1517 &ack_seq, &seq) != 0) {
1518 /* probably a non-tcp ACK packet */
1522 /* check if we have this guy in our list of connections
1525 con = trbt_lookuparray32(killtcp->connections,
1526 KILLTCP_KEYLEN, killtcp_key(&src, &dst));
1528 /* no this was some other packet we can just ignore */
1532 /* This one has been tickled !
1533 now reset him and remove him from the list.
1535 DEBUG(DEBUG_INFO, ("sending a tcp reset to kill connection :%d -> %s:%d\n",
1536 ntohs(con->dst_addr.ip.sin_port),
1537 ctdb_addr_to_str(&con->src_addr),
1538 ntohs(con->src_addr.ip.sin_port)));
1540 ctdb_sys_send_tcp(&con->dst_addr, &con->src_addr, ack_seq, seq, 1);
1545 /* when traversing the list of all tcp connections to send tickle acks to
1546 (so that we can capture the ack coming back and kill the connection
1548 this callback is called for each connection we are currently trying to kill
1550 static void tickle_connection_traverse(void *param, void *data)
1552 struct ctdb_killtcp_con *con = talloc_get_type(data, struct ctdb_killtcp_con);
1554 /* have tried too many times, just give up */
1555 if (con->count >= 5) {
1560 /* othervise, try tickling it again */
1563 (ctdb_sock_addr *)&con->dst_addr,
1564 (ctdb_sock_addr *)&con->src_addr,
1570 called every second until all sentenced connections have been reset
1572 static void ctdb_tickle_sentenced_connections(struct event_context *ev, struct timed_event *te,
1573 struct timeval t, void *private_data)
1575 struct ctdb_kill_tcp *killtcp = talloc_get_type(private_data, struct ctdb_kill_tcp);
1578 /* loop over all connections sending tickle ACKs */
1579 trbt_traversearray32(killtcp->connections, KILLTCP_KEYLEN, tickle_connection_traverse, NULL);
1582 /* If there are no more connections to kill we can remove the
1583 entire killtcp structure
1585 if ( (killtcp->connections == NULL) ||
1586 (killtcp->connections->root == NULL) ) {
1587 talloc_free(killtcp);
1591 /* try tickling them again in a seconds time
1593 event_add_timed(killtcp->ctdb->ev, killtcp, timeval_current_ofs(1, 0),
1594 ctdb_tickle_sentenced_connections, killtcp);
1598 destroy the killtcp structure
1600 static int ctdb_killtcp_destructor(struct ctdb_kill_tcp *killtcp)
1602 killtcp->vnn->killtcp = NULL;
1607 /* nothing fancy here, just unconditionally replace any existing
1608 connection structure with the new one.
1610 dont even free the old one if it did exist, that one is talloc_stolen
1611 by the same node in the tree anyway and will be deleted when the new data
1614 static void *add_killtcp_callback(void *parm, void *data)
1620 add a tcp socket to the list of connections we want to RST
1622 static int ctdb_killtcp_add_connection(struct ctdb_context *ctdb,
1626 ctdb_sock_addr src, dst;
1627 struct ctdb_kill_tcp *killtcp;
1628 struct ctdb_killtcp_con *con;
1629 struct ctdb_vnn *vnn;
1631 ctdb_canonicalize_ip(s, &src);
1632 ctdb_canonicalize_ip(d, &dst);
1634 vnn = find_public_ip_vnn(ctdb, &dst);
1636 vnn = find_public_ip_vnn(ctdb, &src);
1639 /* if it is not a public ip it could be our 'single ip' */
1640 if (ctdb->single_ip_vnn) {
1641 if (ctdb_same_ip(&ctdb->single_ip_vnn->public_address, &dst)) {
1642 vnn = ctdb->single_ip_vnn;
1647 DEBUG(DEBUG_ERR,(__location__ " Could not killtcp, not a public address\n"));
1651 killtcp = vnn->killtcp;
1653 /* If this is the first connection to kill we must allocate
1656 if (killtcp == NULL) {
1657 killtcp = talloc_zero(ctdb, struct ctdb_kill_tcp);
1658 CTDB_NO_MEMORY(ctdb, killtcp);
1661 killtcp->ctdb = ctdb;
1662 killtcp->capture_fd = -1;
1663 killtcp->connections = trbt_create(killtcp, 0);
1665 vnn->killtcp = killtcp;
1666 talloc_set_destructor(killtcp, ctdb_killtcp_destructor);
1671 /* create a structure that describes this connection we want to
1672 RST and store it in killtcp->connections
1674 con = talloc(killtcp, struct ctdb_killtcp_con);
1675 CTDB_NO_MEMORY(ctdb, con);
1676 con->src_addr = src;
1677 con->dst_addr = dst;
1679 con->killtcp = killtcp;
1682 trbt_insertarray32_callback(killtcp->connections,
1683 KILLTCP_KEYLEN, killtcp_key(&con->dst_addr, &con->src_addr),
1684 add_killtcp_callback, con);
1687 If we dont have a socket to listen on yet we must create it
1689 if (killtcp->capture_fd == -1) {
1690 killtcp->capture_fd = ctdb_sys_open_capture_socket(vnn->iface, &killtcp->private_data);
1691 if (killtcp->capture_fd == -1) {
1692 DEBUG(DEBUG_CRIT,(__location__ " Failed to open capturing socket for killtcp\n"));
1698 if (killtcp->fde == NULL) {
1699 killtcp->fde = event_add_fd(ctdb->ev, killtcp, killtcp->capture_fd,
1700 EVENT_FD_READ | EVENT_FD_AUTOCLOSE,
1701 capture_tcp_handler, killtcp);
1703 /* We also need to set up some events to tickle all these connections
1704 until they are all reset
1706 event_add_timed(ctdb->ev, killtcp, timeval_current_ofs(1, 0),
1707 ctdb_tickle_sentenced_connections, killtcp);
1710 /* tickle him once now */
1719 talloc_free(vnn->killtcp);
1720 vnn->killtcp = NULL;
1725 kill a TCP connection.
1727 int32_t ctdb_control_kill_tcp(struct ctdb_context *ctdb, TDB_DATA indata)
1729 struct ctdb_control_killtcp *killtcp = (struct ctdb_control_killtcp *)indata.dptr;
1731 return ctdb_killtcp_add_connection(ctdb, &killtcp->src_addr, &killtcp->dst_addr);
1735 called by a daemon to inform us of the entire list of TCP tickles for
1736 a particular public address.
1737 this control should only be sent by the node that is currently serving
1738 that public address.
1740 int32_t ctdb_control_set_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata)
1742 struct ctdb_control_tcp_tickle_list *list = (struct ctdb_control_tcp_tickle_list *)indata.dptr;
1743 struct ctdb_tcp_array *tcparray;
1744 struct ctdb_vnn *vnn;
1746 /* We must at least have tickles.num or else we cant verify the size
1747 of the received data blob
1749 if (indata.dsize < offsetof(struct ctdb_control_tcp_tickle_list,
1750 tickles.connections)) {
1751 DEBUG(DEBUG_ERR,("Bad indata in ctdb_control_set_tcp_tickle_list. Not enough data for the tickle.num field\n"));
1755 /* verify that the size of data matches what we expect */
1756 if (indata.dsize < offsetof(struct ctdb_control_tcp_tickle_list,
1757 tickles.connections)
1758 + sizeof(struct ctdb_tcp_connection)
1759 * list->tickles.num) {
1760 DEBUG(DEBUG_ERR,("Bad indata in ctdb_control_set_tcp_tickle_list\n"));
1764 vnn = find_public_ip_vnn(ctdb, &list->addr);
1766 DEBUG(DEBUG_INFO,(__location__ " Could not set tcp tickle list, '%s' is not a public address\n",
1767 ctdb_addr_to_str(&list->addr)));
1772 /* remove any old ticklelist we might have */
1773 talloc_free(vnn->tcp_array);
1774 vnn->tcp_array = NULL;
1776 tcparray = talloc(ctdb->nodes, struct ctdb_tcp_array);
1777 CTDB_NO_MEMORY(ctdb, tcparray);
1779 tcparray->num = list->tickles.num;
1781 tcparray->connections = talloc_array(tcparray, struct ctdb_tcp_connection, tcparray->num);
1782 CTDB_NO_MEMORY(ctdb, tcparray->connections);
1784 memcpy(tcparray->connections, &list->tickles.connections[0],
1785 sizeof(struct ctdb_tcp_connection)*tcparray->num);
1787 /* We now have a new fresh tickle list array for this vnn */
1788 vnn->tcp_array = talloc_steal(vnn, tcparray);
1794 called to return the full list of tickles for the puclic address associated
1795 with the provided vnn
1797 int32_t ctdb_control_get_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata)
1799 ctdb_sock_addr *addr = (ctdb_sock_addr *)indata.dptr;
1800 struct ctdb_control_tcp_tickle_list *list;
1801 struct ctdb_tcp_array *tcparray;
1803 struct ctdb_vnn *vnn;
1805 vnn = find_public_ip_vnn(ctdb, addr);
1807 DEBUG(DEBUG_ERR,(__location__ " Could not get tcp tickle list, '%s' is not a public address\n",
1808 ctdb_addr_to_str(addr)));
1813 tcparray = vnn->tcp_array;
1815 num = tcparray->num;
1820 outdata->dsize = offsetof(struct ctdb_control_tcp_tickle_list,
1821 tickles.connections)
1822 + sizeof(struct ctdb_tcp_connection) * num;
1824 outdata->dptr = talloc_size(outdata, outdata->dsize);
1825 CTDB_NO_MEMORY(ctdb, outdata->dptr);
1826 list = (struct ctdb_control_tcp_tickle_list *)outdata->dptr;
1829 list->tickles.num = num;
1831 memcpy(&list->tickles.connections[0], tcparray->connections,
1832 sizeof(struct ctdb_tcp_connection) * num);
1840 set the list of all tcp tickles for a public address
1842 static int ctdb_ctrl_set_tcp_tickles(struct ctdb_context *ctdb,
1843 struct timeval timeout, uint32_t destnode,
1844 ctdb_sock_addr *addr,
1845 struct ctdb_tcp_array *tcparray)
1849 struct ctdb_control_tcp_tickle_list *list;
1852 num = tcparray->num;
1857 data.dsize = offsetof(struct ctdb_control_tcp_tickle_list,
1858 tickles.connections) +
1859 sizeof(struct ctdb_tcp_connection) * num;
1860 data.dptr = talloc_size(ctdb, data.dsize);
1861 CTDB_NO_MEMORY(ctdb, data.dptr);
1863 list = (struct ctdb_control_tcp_tickle_list *)data.dptr;
1865 list->tickles.num = num;
1867 memcpy(&list->tickles.connections[0], tcparray->connections, sizeof(struct ctdb_tcp_connection) * num);
1870 ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0,
1871 CTDB_CONTROL_SET_TCP_TICKLE_LIST,
1872 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
1874 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set tcp tickles failed\n"));
1878 talloc_free(data.dptr);
1885 perform tickle updates if required
1887 static void ctdb_update_tcp_tickles(struct event_context *ev,
1888 struct timed_event *te,
1889 struct timeval t, void *private_data)
1891 struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
1893 struct ctdb_vnn *vnn;
1895 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1896 /* we only send out updates for public addresses that
1899 if (ctdb->pnn != vnn->pnn) {
1902 /* We only send out the updates if we need to */
1903 if (!vnn->tcp_update_needed) {
1906 ret = ctdb_ctrl_set_tcp_tickles(ctdb,
1908 CTDB_BROADCAST_CONNECTED,
1909 &vnn->public_address,
1912 DEBUG(DEBUG_ERR,("Failed to send the tickle update for public address %s\n",
1913 ctdb_addr_to_str(&vnn->public_address)));
1917 event_add_timed(ctdb->ev, ctdb->tickle_update_context,
1918 timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
1919 ctdb_update_tcp_tickles, ctdb);
1924 start periodic update of tcp tickles
1926 void ctdb_start_tcp_tickle_update(struct ctdb_context *ctdb)
1928 ctdb->tickle_update_context = talloc_new(ctdb);
1930 event_add_timed(ctdb->ev, ctdb->tickle_update_context,
1931 timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
1932 ctdb_update_tcp_tickles, ctdb);
1938 struct control_gratious_arp {
1939 struct ctdb_context *ctdb;
1940 ctdb_sock_addr addr;
1946 send a control_gratuitous arp
1948 static void send_gratious_arp(struct event_context *ev, struct timed_event *te,
1949 struct timeval t, void *private_data)
1952 struct control_gratious_arp *arp = talloc_get_type(private_data,
1953 struct control_gratious_arp);
1955 ret = ctdb_sys_send_arp(&arp->addr, arp->iface);
1957 DEBUG(DEBUG_ERR,(__location__ " sending of gratious arp failed (%s)\n", strerror(errno)));
1962 if (arp->count == CTDB_ARP_REPEAT) {
1967 event_add_timed(arp->ctdb->ev, arp,
1968 timeval_current_ofs(CTDB_ARP_INTERVAL, 0),
1969 send_gratious_arp, arp);
1976 int32_t ctdb_control_send_gratious_arp(struct ctdb_context *ctdb, TDB_DATA indata)
1978 struct ctdb_control_gratious_arp *gratious_arp = (struct ctdb_control_gratious_arp *)indata.dptr;
1979 struct control_gratious_arp *arp;
1981 /* verify the size of indata */
1982 if (indata.dsize < offsetof(struct ctdb_control_gratious_arp, iface)) {
1983 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_gratious_arp structure. Got %u require %u bytes\n",
1984 (unsigned)indata.dsize,
1985 (unsigned)offsetof(struct ctdb_control_gratious_arp, iface)));
1989 ( offsetof(struct ctdb_control_gratious_arp, iface)
1990 + gratious_arp->len ) ){
1992 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
1993 "but should be %u bytes\n",
1994 (unsigned)indata.dsize,
1995 (unsigned)(offsetof(struct ctdb_control_gratious_arp, iface)+gratious_arp->len)));
2000 arp = talloc(ctdb, struct control_gratious_arp);
2001 CTDB_NO_MEMORY(ctdb, arp);
2004 arp->addr = gratious_arp->addr;
2005 arp->iface = talloc_strdup(arp, gratious_arp->iface);
2006 CTDB_NO_MEMORY(ctdb, arp->iface);
2009 event_add_timed(arp->ctdb->ev, arp,
2010 timeval_zero(), send_gratious_arp, arp);
2015 int32_t ctdb_control_add_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2017 struct ctdb_control_ip_iface *pub = (struct ctdb_control_ip_iface *)indata.dptr;
2020 /* verify the size of indata */
2021 if (indata.dsize < offsetof(struct ctdb_control_ip_iface, iface)) {
2022 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_ip_iface structure\n"));
2026 ( offsetof(struct ctdb_control_ip_iface, iface)
2029 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2030 "but should be %u bytes\n",
2031 (unsigned)indata.dsize,
2032 (unsigned)(offsetof(struct ctdb_control_ip_iface, iface)+pub->len)));
2036 return ctdb_add_public_address(ctdb, &pub->addr, pub->mask, &pub->iface[0]);
2040 called when releaseip event finishes for del_public_address
2042 static void delete_ip_callback(struct ctdb_context *ctdb, int status,
2045 talloc_free(private_data);
2048 int32_t ctdb_control_del_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2050 struct ctdb_control_ip_iface *pub = (struct ctdb_control_ip_iface *)indata.dptr;
2051 struct ctdb_vnn *vnn;
2054 /* verify the size of indata */
2055 if (indata.dsize < offsetof(struct ctdb_control_ip_iface, iface)) {
2056 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_ip_iface structure\n"));
2060 ( offsetof(struct ctdb_control_ip_iface, iface)
2063 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2064 "but should be %u bytes\n",
2065 (unsigned)indata.dsize,
2066 (unsigned)(offsetof(struct ctdb_control_ip_iface, iface)+pub->len)));
2070 /* walk over all public addresses until we find a match */
2071 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2072 if (ctdb_same_ip(&vnn->public_address, &pub->addr)) {
2073 TALLOC_CTX *mem_ctx = talloc_new(ctdb);
2075 DLIST_REMOVE(ctdb->vnn, vnn);
2077 ret = ctdb_event_script_callback(ctdb,
2078 timeval_current_ofs(ctdb->tunable.script_timeout, 0),
2079 mem_ctx, delete_ip_callback, mem_ctx,
2080 "releaseip %s %s %u",
2082 talloc_strdup(mem_ctx, ctdb_addr_to_str(&vnn->public_address)),
2083 vnn->public_netmask_bits);