4 Copyright (C) Ronnie Sahlberg 2007
5 Copyright (C) Andrew Tridgell 2007
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, see <http://www.gnu.org/licenses/>.
21 #include "lib/events/events.h"
22 #include "lib/tdb/include/tdb.h"
23 #include "lib/util/dlinklist.h"
24 #include "system/network.h"
25 #include "system/filesys.h"
26 #include "system/wait.h"
27 #include "../include/ctdb_private.h"
28 #include "../common/rb_tree.h"
31 #define TAKEOVER_TIMEOUT() timeval_current_ofs(ctdb->tunable.takeover_timeout,0)
33 #define CTDB_ARP_INTERVAL 1
34 #define CTDB_ARP_REPEAT 3
36 struct ctdb_takeover_arp {
37 struct ctdb_context *ctdb;
40 struct ctdb_tcp_array *tcparray;
46 lists of tcp endpoints
48 struct ctdb_tcp_list {
49 struct ctdb_tcp_list *prev, *next;
50 struct ctdb_tcp_connection connection;
54 list of clients to kill on IP release
56 struct ctdb_client_ip {
57 struct ctdb_client_ip *prev, *next;
58 struct ctdb_context *ctdb;
67 static void ctdb_control_send_arp(struct event_context *ev, struct timed_event *te,
68 struct timeval t, void *private_data)
70 struct ctdb_takeover_arp *arp = talloc_get_type(private_data,
71 struct ctdb_takeover_arp);
73 struct ctdb_tcp_array *tcparray;
75 ret = ctdb_sys_send_arp(&arp->addr, arp->vnn->iface);
77 DEBUG(DEBUG_CRIT,(__location__ " sending of arp failed (%s)\n", strerror(errno)));
80 tcparray = arp->tcparray;
82 for (i=0;i<tcparray->num;i++) {
83 struct ctdb_tcp_connection *tcon;
85 tcon = &tcparray->connections[i];
86 DEBUG(DEBUG_INFO,("sending tcp tickle ack for %u->%s:%u\n",
87 (unsigned)ntohs(tcon->dst_addr.ip.sin_port),
88 ctdb_addr_to_str(&tcon->src_addr),
89 (unsigned)ntohs(tcon->src_addr.ip.sin_port)));
90 ret = ctdb_sys_send_tcp(
95 DEBUG(DEBUG_CRIT,(__location__ " Failed to send tcp tickle ack for %s\n",
96 ctdb_addr_to_str(&tcon->src_addr)));
103 if (arp->count == CTDB_ARP_REPEAT) {
108 event_add_timed(arp->ctdb->ev, arp->vnn->takeover_ctx,
109 timeval_current_ofs(CTDB_ARP_INTERVAL, 0),
110 ctdb_control_send_arp, arp);
113 struct takeover_callback_state {
114 struct ctdb_req_control *c;
115 ctdb_sock_addr *addr;
116 struct ctdb_vnn *vnn;
120 called when takeip event finishes
122 static void takeover_ip_callback(struct ctdb_context *ctdb, int status,
125 struct takeover_callback_state *state =
126 talloc_get_type(private_data, struct takeover_callback_state);
127 struct ctdb_takeover_arp *arp;
128 struct ctdb_tcp_array *tcparray;
131 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
132 ctdb_addr_to_str(state->addr),
134 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
139 if (!state->vnn->takeover_ctx) {
140 state->vnn->takeover_ctx = talloc_new(ctdb);
141 if (!state->vnn->takeover_ctx) {
146 arp = talloc_zero(state->vnn->takeover_ctx, struct ctdb_takeover_arp);
147 if (!arp) goto failed;
150 arp->addr = *state->addr;
151 arp->vnn = state->vnn;
153 tcparray = state->vnn->tcp_array;
155 /* add all of the known tcp connections for this IP to the
156 list of tcp connections to send tickle acks for */
157 arp->tcparray = talloc_steal(arp, tcparray);
159 state->vnn->tcp_array = NULL;
160 state->vnn->tcp_update_needed = true;
163 event_add_timed(arp->ctdb->ev, state->vnn->takeover_ctx,
164 timeval_zero(), ctdb_control_send_arp, arp);
166 /* the control succeeded */
167 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
172 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
178 Find the vnn of the node that has a public ip address
179 returns -1 if the address is not known as a public address
181 static struct ctdb_vnn *find_public_ip_vnn(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
183 struct ctdb_vnn *vnn;
185 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
186 if (ctdb_same_ip(&vnn->public_address, addr)) {
196 take over an ip address
198 int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb,
199 struct ctdb_req_control *c,
204 struct takeover_callback_state *state;
205 struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
206 struct ctdb_vnn *vnn;
208 /* update out vnn list */
209 vnn = find_public_ip_vnn(ctdb, &pip->addr);
211 DEBUG(DEBUG_ERR,("takeoverip called for an ip '%s' that is not a public address\n",
212 ctdb_addr_to_str(&pip->addr)));
217 /* if our kernel already has this IP, do nothing */
218 if (ctdb_sys_have_ip(&pip->addr)) {
222 state = talloc(ctdb, struct takeover_callback_state);
223 CTDB_NO_MEMORY(ctdb, state);
225 state->c = talloc_steal(ctdb, c);
226 state->addr = talloc(ctdb, ctdb_sock_addr);
227 CTDB_NO_MEMORY(ctdb, state->addr);
229 *state->addr = pip->addr;
232 DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u on interface %s\n",
233 ctdb_addr_to_str(&pip->addr),
234 vnn->public_netmask_bits,
237 ret = ctdb_event_script_callback(ctdb,
238 timeval_current_ofs(ctdb->tunable.script_timeout, 0),
239 state, takeover_ip_callback, state,
242 talloc_strdup(state, ctdb_addr_to_str(&pip->addr)),
243 vnn->public_netmask_bits);
246 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
247 ctdb_addr_to_str(&pip->addr),
253 /* tell ctdb_control.c that we will be replying asynchronously */
260 takeover an ip address old v4 style
262 int32_t ctdb_control_takeover_ipv4(struct ctdb_context *ctdb,
263 struct ctdb_req_control *c,
269 data.dsize = sizeof(struct ctdb_public_ip);
270 data.dptr = (uint8_t *)talloc_zero(c, struct ctdb_public_ip);
271 CTDB_NO_MEMORY(ctdb, data.dptr);
273 memcpy(data.dptr, indata.dptr, indata.dsize);
274 return ctdb_control_takeover_ip(ctdb, c, data, async_reply);
278 kill any clients that are registered with a IP that is being released
280 static void release_kill_clients(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
282 struct ctdb_client_ip *ip;
284 DEBUG(DEBUG_INFO,("release_kill_clients for ip %s\n",
285 ctdb_addr_to_str(addr)));
287 for (ip=ctdb->client_ip_list; ip; ip=ip->next) {
288 ctdb_sock_addr tmp_addr;
291 DEBUG(DEBUG_INFO,("checking for client %u with IP %s\n",
293 ctdb_addr_to_str(&ip->addr)));
295 if (ctdb_same_ip(&tmp_addr, addr)) {
296 struct ctdb_client *client = ctdb_reqid_find(ctdb,
299 DEBUG(DEBUG_INFO,("matched client %u with IP %s and pid %u\n",
301 ctdb_addr_to_str(&ip->addr),
304 if (client->pid != 0) {
305 DEBUG(DEBUG_INFO,(__location__ " Killing client pid %u for IP %s on client_id %u\n",
306 (unsigned)client->pid,
307 ctdb_addr_to_str(addr),
309 kill(client->pid, SIGKILL);
316 called when releaseip event finishes
318 static void release_ip_callback(struct ctdb_context *ctdb, int status,
321 struct takeover_callback_state *state =
322 talloc_get_type(private_data, struct takeover_callback_state);
325 /* send a message to all clients of this node telling them
326 that the cluster has been reconfigured and they should
327 release any sockets on this IP */
328 data.dptr = (uint8_t *)talloc_strdup(state, ctdb_addr_to_str(state->addr));
329 data.dsize = strlen((char *)data.dptr)+1;
331 DEBUG(DEBUG_INFO,(__location__ " sending RELEASE_IP for '%s'\n", data.dptr));
333 ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_RELEASE_IP, data);
335 /* kill clients that have registered with this IP */
336 release_kill_clients(ctdb, state->addr);
338 /* the control succeeded */
339 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
344 release an ip address
346 int32_t ctdb_control_release_ip(struct ctdb_context *ctdb,
347 struct ctdb_req_control *c,
352 struct takeover_callback_state *state;
353 struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
354 struct ctdb_vnn *vnn;
356 /* update our vnn list */
357 vnn = find_public_ip_vnn(ctdb, &pip->addr);
359 DEBUG(DEBUG_ERR,("takeoverip called for an ip '%s' that is not a public address\n",
360 ctdb_addr_to_str(&pip->addr)));
365 /* stop any previous arps */
366 talloc_free(vnn->takeover_ctx);
367 vnn->takeover_ctx = NULL;
369 if (!ctdb_sys_have_ip(&pip->addr)) {
370 DEBUG(DEBUG_INFO,("Redundant release of IP %s/%u on interface %s (ip not held)\n",
371 ctdb_addr_to_str(&pip->addr),
372 vnn->public_netmask_bits,
377 DEBUG(DEBUG_NOTICE,("Release of IP %s/%u on interface %s\n",
378 ctdb_addr_to_str(&pip->addr),
379 vnn->public_netmask_bits,
382 state = talloc(ctdb, struct takeover_callback_state);
383 CTDB_NO_MEMORY(ctdb, state);
385 state->c = talloc_steal(state, c);
386 state->addr = talloc(state, ctdb_sock_addr);
387 CTDB_NO_MEMORY(ctdb, state->addr);
388 *state->addr = pip->addr;
391 ret = ctdb_event_script_callback(ctdb,
392 timeval_current_ofs(ctdb->tunable.script_timeout, 0),
393 state, release_ip_callback, state,
394 "releaseip %s %s %u",
396 talloc_strdup(state, ctdb_addr_to_str(&pip->addr)),
397 vnn->public_netmask_bits);
399 DEBUG(DEBUG_ERR,(__location__ " Failed to release IP %s on interface %s\n",
400 ctdb_addr_to_str(&pip->addr),
406 /* tell the control that we will be reply asynchronously */
412 release an ip address old v4 style
414 int32_t ctdb_control_release_ipv4(struct ctdb_context *ctdb,
415 struct ctdb_req_control *c,
421 data.dsize = sizeof(struct ctdb_public_ip);
422 data.dptr = (uint8_t *)talloc_zero(c, struct ctdb_public_ip);
423 CTDB_NO_MEMORY(ctdb, data.dptr);
425 memcpy(data.dptr, indata.dptr, indata.dsize);
426 return ctdb_control_release_ip(ctdb, c, data, async_reply);
430 static int ctdb_add_public_address(struct ctdb_context *ctdb, ctdb_sock_addr *addr, unsigned mask, const char *iface)
432 struct ctdb_vnn *vnn;
434 /* Verify that we dont have an entry for this ip yet */
435 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
436 if (ctdb_same_sockaddr(addr, &vnn->public_address)) {
437 DEBUG(DEBUG_CRIT,("Same ip '%s' specified multiple times in the public address list \n",
438 ctdb_addr_to_str(addr)));
443 /* create a new vnn structure for this ip address */
444 vnn = talloc_zero(ctdb, struct ctdb_vnn);
445 CTDB_NO_MEMORY_FATAL(ctdb, vnn);
446 vnn->iface = talloc_strdup(vnn, iface);
447 vnn->public_address = *addr;
448 vnn->public_netmask_bits = mask;
451 DLIST_ADD(ctdb->vnn, vnn);
458 setup the event script directory
460 int ctdb_set_event_script_dir(struct ctdb_context *ctdb, const char *script_dir)
462 ctdb->event_script_dir = talloc_strdup(ctdb, script_dir);
463 CTDB_NO_MEMORY(ctdb, ctdb->event_script_dir);
468 setup the public address lists from a file
470 int ctdb_set_public_addresses(struct ctdb_context *ctdb, const char *alist)
476 lines = file_lines_load(alist, &nlines, ctdb);
478 ctdb_set_error(ctdb, "Failed to load public address list '%s'\n", alist);
481 while (nlines > 0 && strcmp(lines[nlines-1], "") == 0) {
485 for (i=0;i<nlines;i++) {
492 while ((*line == ' ') || (*line == '\t')) {
498 if (strcmp(line, "") == 0) {
501 tok = strtok(line, " \t");
502 if (!tok || !parse_ip_mask(tok, &addr, &mask)) {
503 DEBUG(DEBUG_CRIT,("Badly formed line %u in public address list\n", i+1));
507 tok = strtok(NULL, " \t");
509 if (NULL == ctdb->default_public_interface) {
510 DEBUG(DEBUG_CRIT,("No default public interface and no interface specified at line %u of public address list\n",
515 iface = ctdb->default_public_interface;
520 if (ctdb_add_public_address(ctdb, &addr, mask, iface)) {
521 DEBUG(DEBUG_CRIT,("Failed to add line %u to the public address list\n", i+1));
534 struct ctdb_public_ip_list {
535 struct ctdb_public_ip_list *next;
541 /* Given a physical node, return the number of
542 public addresses that is currently assigned to this node.
544 static int node_ip_coverage(struct ctdb_context *ctdb,
546 struct ctdb_public_ip_list *ips)
550 for (;ips;ips=ips->next) {
551 if (ips->pnn == pnn) {
559 /* Check if this is a public ip known to the node, i.e. can that
560 node takeover this ip ?
562 static int can_node_serve_ip(struct ctdb_context *ctdb, int32_t pnn,
563 struct ctdb_public_ip_list *ip)
565 struct ctdb_all_public_ips *public_ips;
568 public_ips = ctdb->nodes[pnn]->public_ips;
570 if (public_ips == NULL) {
574 for (i=0;i<public_ips->num;i++) {
575 if (ctdb_same_ip(&ip->addr, &public_ips->ips[i].addr)) {
576 /* yes, this node can serve this public ip */
585 /* search the node lists list for a node to takeover this ip.
586 pick the node that currently are serving the least number of ips
587 so that the ips get spread out evenly.
589 static int find_takeover_node(struct ctdb_context *ctdb,
590 struct ctdb_node_map *nodemap, uint32_t mask,
591 struct ctdb_public_ip_list *ip,
592 struct ctdb_public_ip_list *all_ips)
598 for (i=0;i<nodemap->num;i++) {
599 if (nodemap->nodes[i].flags & mask) {
600 /* This node is not healty and can not be used to serve
606 /* verify that this node can serve this ip */
607 if (can_node_serve_ip(ctdb, i, ip)) {
608 /* no it couldnt so skip to the next node */
612 num = node_ip_coverage(ctdb, i, all_ips);
613 /* was this the first node we checked ? */
625 DEBUG(DEBUG_WARNING,(__location__ " Could not find node to take over public address '%s'\n",
626 ctdb_addr_to_str(&ip->addr)));
635 struct ctdb_public_ip_list *
636 add_ip_to_merged_list(struct ctdb_context *ctdb,
638 struct ctdb_public_ip_list *ip_list,
639 struct ctdb_public_ip *ip)
641 struct ctdb_public_ip_list *tmp_ip;
643 /* do we already have this ip in our merged list ?*/
644 for (tmp_ip=ip_list;tmp_ip;tmp_ip=tmp_ip->next) {
646 /* we already have this public ip in the list */
647 if (ctdb_same_ip(&tmp_ip->addr, &ip->addr)) {
652 /* this is a new public ip, we must add it to the list */
653 tmp_ip = talloc_zero(tmp_ctx, struct ctdb_public_ip_list);
654 CTDB_NO_MEMORY_NULL(ctdb, tmp_ip);
655 tmp_ip->pnn = ip->pnn;
656 tmp_ip->addr = ip->addr;
657 tmp_ip->next = ip_list;
662 struct ctdb_public_ip_list *
663 create_merged_ip_list(struct ctdb_context *ctdb, TALLOC_CTX *tmp_ctx)
666 struct ctdb_public_ip_list *ip_list = NULL;
667 struct ctdb_all_public_ips *public_ips;
669 for (i=0;i<ctdb->num_nodes;i++) {
670 public_ips = ctdb->nodes[i]->public_ips;
672 /* there were no public ips for this node */
673 if (public_ips == NULL) {
677 for (j=0;j<public_ips->num;j++) {
678 ip_list = add_ip_to_merged_list(ctdb, tmp_ctx,
679 ip_list, &public_ips->ips[j]);
687 make any IP alias changes for public addresses that are necessary
689 int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap)
691 int i, num_healthy, retries;
692 struct ctdb_public_ip ip;
693 struct ctdb_public_ipv4 ipv4;
695 struct ctdb_public_ip_list *all_ips, *tmp_ip;
696 int maxnode, maxnum=0, minnode, minnum=0, num;
698 struct timeval timeout;
699 struct client_async_data *async_data;
700 struct ctdb_client_control_state *state;
701 TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
706 /* Count how many completely healthy nodes we have */
708 for (i=0;i<nodemap->num;i++) {
709 if (!(nodemap->nodes[i].flags & (NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED))) {
714 if (num_healthy > 0) {
715 /* We have healthy nodes, so only consider them for
716 serving public addresses
718 mask = NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED;
720 /* We didnt have any completely healthy nodes so
721 use "disabled" nodes as a fallback
723 mask = NODE_FLAGS_INACTIVE;
726 /* since nodes only know about those public addresses that
727 can be served by that particular node, no single node has
728 a full list of all public addresses that exist in the cluster.
729 Walk over all node structures and create a merged list of
730 all public addresses that exist in the cluster.
732 all_ips = create_merged_ip_list(ctdb, tmp_ctx);
734 /* If we want deterministic ip allocations, i.e. that the ip addresses
735 will always be allocated the same way for a specific set of
736 available/unavailable nodes.
738 if (1 == ctdb->tunable.deterministic_public_ips) {
739 DEBUG(DEBUG_NOTICE,("Deterministic IPs enabled. Resetting all ip allocations\n"));
740 for (i=0,tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next,i++) {
741 tmp_ip->pnn = i%nodemap->num;
746 /* mark all public addresses with a masked node as being served by
749 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
750 if (tmp_ip->pnn == -1) {
753 if (nodemap->nodes[tmp_ip->pnn].flags & mask) {
758 /* verify that the assigned nodes can serve that public ip
759 and set it to -1 if not
761 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
762 if (tmp_ip->pnn == -1) {
765 if (can_node_serve_ip(ctdb, tmp_ip->pnn, tmp_ip) != 0) {
766 /* this node can not serve this ip. */
772 /* now we must redistribute all public addresses with takeover node
773 -1 among the nodes available
777 /* loop over all ip's and find a physical node to cover for
780 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
781 if (tmp_ip->pnn == -1) {
782 if (find_takeover_node(ctdb, nodemap, mask, tmp_ip, all_ips)) {
783 DEBUG(DEBUG_WARNING,("Failed to find node to cover ip %s\n",
784 ctdb_addr_to_str(&tmp_ip->addr)));
789 /* If we dont want ips to fail back after a node becomes healthy
790 again, we wont even try to reallocat the ip addresses so that
791 they are evenly spread out.
792 This can NOT be used at the same time as DeterministicIPs !
794 if (1 == ctdb->tunable.no_ip_failback) {
795 if (1 == ctdb->tunable.deterministic_public_ips) {
796 DEBUG(DEBUG_ERR, ("ERROR: You can not use 'DeterministicIPs' and 'NoIPFailback' at the same time\n"));
802 /* now, try to make sure the ip adresses are evenly distributed
804 for each ip address, loop over all nodes that can serve this
805 ip and make sure that the difference between the node
806 serving the most and the node serving the least ip's are not greater
809 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
810 if (tmp_ip->pnn == -1) {
814 /* Get the highest and lowest number of ips's served by any
815 valid node which can serve this ip.
819 for (i=0;i<nodemap->num;i++) {
820 if (nodemap->nodes[i].flags & mask) {
824 /* only check nodes that can actually serve this ip */
825 if (can_node_serve_ip(ctdb, i, tmp_ip)) {
826 /* no it couldnt so skip to the next node */
830 num = node_ip_coverage(ctdb, i, all_ips);
851 DEBUG(DEBUG_WARNING,(__location__ " Could not find maxnode. May not be able to serve ip '%s'\n",
852 ctdb_addr_to_str(&tmp_ip->addr)));
857 /* If we want deterministic IPs then dont try to reallocate
858 them to spread out the load.
860 if (1 == ctdb->tunable.deterministic_public_ips) {
864 /* if the spread between the smallest and largest coverage by
865 a node is >=2 we steal one of the ips from the node with
866 most coverage to even things out a bit.
867 try to do this at most 5 times since we dont want to spend
868 too much time balancing the ip coverage.
870 if ( (maxnum > minnum+1)
872 struct ctdb_public_ip_list *tmp;
874 /* mark one of maxnode's vnn's as unassigned and try
877 for (tmp=all_ips;tmp;tmp=tmp->next) {
878 if (tmp->pnn == maxnode) {
888 /* finished distributing the public addresses, now just send the
889 info out to the nodes
893 /* at this point ->pnn is the node which will own each IP
894 or -1 if there is no node that can cover this ip
897 /* now tell all nodes to delete any alias that they should not
898 have. This will be a NOOP on nodes that don't currently
899 hold the given alias */
900 async_data = talloc_zero(tmp_ctx, struct client_async_data);
901 CTDB_NO_MEMORY_FATAL(ctdb, async_data);
903 for (i=0;i<nodemap->num;i++) {
904 /* don't talk to unconnected nodes, but do talk to banned nodes */
905 if (nodemap->nodes[i].flags & NODE_FLAGS_DISCONNECTED) {
909 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
910 if (tmp_ip->pnn == nodemap->nodes[i].pnn) {
911 /* This node should be serving this
912 vnn so dont tell it to release the ip
916 if (tmp_ip->addr.sa.sa_family == AF_INET) {
917 ipv4.pnn = tmp_ip->pnn;
918 ipv4.sin = tmp_ip->addr.ip;
920 timeout = TAKEOVER_TIMEOUT();
921 data.dsize = sizeof(ipv4);
922 data.dptr = (uint8_t *)&ipv4;
923 state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
924 0, CTDB_CONTROL_RELEASE_IPv4, 0,
928 ip.pnn = tmp_ip->pnn;
929 ip.addr = tmp_ip->addr;
931 timeout = TAKEOVER_TIMEOUT();
932 data.dsize = sizeof(ip);
933 data.dptr = (uint8_t *)&ip;
934 state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
935 0, CTDB_CONTROL_RELEASE_IP, 0,
941 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_RELEASE_IP to node %u\n", nodemap->nodes[i].pnn));
942 talloc_free(tmp_ctx);
946 ctdb_client_async_add(async_data, state);
949 if (ctdb_client_async_wait(ctdb, async_data) != 0) {
950 DEBUG(DEBUG_ERR,(__location__ " Async control CTDB_CONTROL_RELEASE_IP failed\n"));
951 talloc_free(tmp_ctx);
954 talloc_free(async_data);
957 /* tell all nodes to get their own IPs */
958 async_data = talloc_zero(tmp_ctx, struct client_async_data);
959 CTDB_NO_MEMORY_FATAL(ctdb, async_data);
960 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
961 if (tmp_ip->pnn == -1) {
962 /* this IP won't be taken over */
966 if (tmp_ip->addr.sa.sa_family == AF_INET) {
967 ipv4.pnn = tmp_ip->pnn;
968 ipv4.sin = tmp_ip->addr.ip;
970 timeout = TAKEOVER_TIMEOUT();
971 data.dsize = sizeof(ipv4);
972 data.dptr = (uint8_t *)&ipv4;
973 state = ctdb_control_send(ctdb, tmp_ip->pnn,
974 0, CTDB_CONTROL_TAKEOVER_IPv4, 0,
978 ip.pnn = tmp_ip->pnn;
979 ip.addr = tmp_ip->addr;
981 timeout = TAKEOVER_TIMEOUT();
982 data.dsize = sizeof(ip);
983 data.dptr = (uint8_t *)&ip;
984 state = ctdb_control_send(ctdb, tmp_ip->pnn,
985 0, CTDB_CONTROL_TAKEOVER_IP, 0,
990 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_TAKEOVER_IP to node %u\n", tmp_ip->pnn));
991 talloc_free(tmp_ctx);
995 ctdb_client_async_add(async_data, state);
997 if (ctdb_client_async_wait(ctdb, async_data) != 0) {
998 DEBUG(DEBUG_ERR,(__location__ " Async control CTDB_CONTROL_TAKEOVER_IP failed\n"));
999 talloc_free(tmp_ctx);
1003 talloc_free(tmp_ctx);
1009 destroy a ctdb_client_ip structure
1011 static int ctdb_client_ip_destructor(struct ctdb_client_ip *ip)
1013 DEBUG(DEBUG_DEBUG,("destroying client tcp for %s:%u (client_id %u)\n",
1014 ctdb_addr_to_str(&ip->addr),
1015 ntohs(ip->addr.ip.sin_port),
1018 DLIST_REMOVE(ip->ctdb->client_ip_list, ip);
1023 called by a client to inform us of a TCP connection that it is managing
1024 that should tickled with an ACK when IP takeover is done
1026 //qqq we need a new version of this control that takes ctdb_sock_addr
1027 //and have samba move to that instead.
1028 // This is IPV4 ONLY
1029 int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id,
1032 struct ctdb_client *client = ctdb_reqid_find(ctdb, client_id, struct ctdb_client);
1033 struct ctdb_control_tcp *p = (struct ctdb_control_tcp *)indata.dptr;
1034 struct ctdb_tcp_list *tcp;
1035 struct ctdb_control_tcp_vnn t;
1038 struct ctdb_client_ip *ip;
1039 struct ctdb_vnn *vnn;
1040 ctdb_sock_addr addr;
1044 vnn = find_public_ip_vnn(ctdb, &addr);
1046 if (ntohl(p->dest.sin_addr.s_addr) != INADDR_LOOPBACK) {
1047 DEBUG(DEBUG_INFO,("Could not add client IP %s. This is not a public address.\n",
1048 ctdb_addr_to_str((ctdb_sock_addr *)&p->dest)));
1053 if (vnn->pnn != ctdb->pnn) {
1054 DEBUG(DEBUG_ERR,("Attempt to register tcp client for IP %s we don't hold - failing (client_id %u pid %u)\n",
1055 ctdb_addr_to_str((ctdb_sock_addr *)&p->dest),
1056 client_id, client->pid));
1057 /* failing this call will tell smbd to die */
1061 ip = talloc(client, struct ctdb_client_ip);
1062 CTDB_NO_MEMORY(ctdb, ip);
1065 ip->addr.ip = p->dest;
1066 ip->client_id = client_id;
1067 talloc_set_destructor(ip, ctdb_client_ip_destructor);
1068 DLIST_ADD(ctdb->client_ip_list, ip);
1070 tcp = talloc(client, struct ctdb_tcp_list);
1071 CTDB_NO_MEMORY(ctdb, tcp);
1073 tcp->connection.src_addr.ip = p->src;
1074 tcp->connection.dst_addr.ip = p->dest;
1076 DLIST_ADD(client->tcp_list, tcp);
1079 t.dest.ip = p->dest;
1081 data.dptr = (uint8_t *)&t;
1082 data.dsize = sizeof(t);
1084 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1085 (unsigned)ntohs(p->dest.sin_port),
1086 ctdb_addr_to_str((ctdb_sock_addr *)&p->src),
1087 (unsigned)ntohs(p->src.sin_port), client_id, client->pid));
1089 /* tell all nodes about this tcp connection */
1090 ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0,
1091 CTDB_CONTROL_TCP_ADD,
1092 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
1094 DEBUG(DEBUG_ERR,(__location__ " Failed to send CTDB_CONTROL_TCP_ADD\n"));
1102 find a tcp address on a list
1104 static struct ctdb_tcp_connection *ctdb_tcp_find(struct ctdb_tcp_array *array,
1105 struct ctdb_tcp_connection *tcp)
1109 if (array == NULL) {
1113 for (i=0;i<array->num;i++) {
1114 if (ctdb_same_sockaddr(&array->connections[i].src_addr, &tcp->src_addr) &&
1115 ctdb_same_sockaddr(&array->connections[i].dst_addr, &tcp->dst_addr)) {
1116 return &array->connections[i];
1123 called by a daemon to inform us of a TCP connection that one of its
1124 clients managing that should tickled with an ACK when IP takeover is
1127 int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata)
1129 struct ctdb_control_tcp_vnn *p = (struct ctdb_control_tcp_vnn *)indata.dptr;
1130 struct ctdb_tcp_array *tcparray;
1131 struct ctdb_tcp_connection tcp;
1132 struct ctdb_vnn *vnn;
1134 vnn = find_public_ip_vnn(ctdb, &p->dest);
1136 DEBUG(DEBUG_INFO,(__location__ " got TCP_ADD control for an address which is not a public address '%s'\n",
1137 ctdb_addr_to_str(&p->dest)));
1143 tcparray = vnn->tcp_array;
1145 /* If this is the first tickle */
1146 if (tcparray == NULL) {
1147 tcparray = talloc_size(ctdb->nodes,
1148 offsetof(struct ctdb_tcp_array, connections) +
1149 sizeof(struct ctdb_tcp_connection) * 1);
1150 CTDB_NO_MEMORY(ctdb, tcparray);
1151 vnn->tcp_array = tcparray;
1154 tcparray->connections = talloc_size(tcparray, sizeof(struct ctdb_tcp_connection));
1155 CTDB_NO_MEMORY(ctdb, tcparray->connections);
1157 tcparray->connections[tcparray->num].src_addr = p->src;
1158 tcparray->connections[tcparray->num].dst_addr = p->dest;
1164 /* Do we already have this tickle ?*/
1165 tcp.src_addr = p->src;
1166 tcp.dst_addr = p->dest;
1167 if (ctdb_tcp_find(vnn->tcp_array, &tcp) != NULL) {
1168 DEBUG(DEBUG_DEBUG,("Already had tickle info for %s:%u for vnn:%u\n",
1169 ctdb_addr_to_str(&tcp.dst_addr),
1170 ntohs(tcp.dst_addr.ip.sin_port),
1175 /* A new tickle, we must add it to the array */
1176 tcparray->connections = talloc_realloc(tcparray, tcparray->connections,
1177 struct ctdb_tcp_connection,
1179 CTDB_NO_MEMORY(ctdb, tcparray->connections);
1181 vnn->tcp_array = tcparray;
1182 tcparray->connections[tcparray->num].src_addr = p->src;
1183 tcparray->connections[tcparray->num].dst_addr = p->dest;
1186 DEBUG(DEBUG_INFO,("Added tickle info for %s:%u from vnn %u\n",
1187 ctdb_addr_to_str(&tcp.dst_addr),
1188 ntohs(tcp.dst_addr.ip.sin_port),
1196 called by a daemon to inform us of a TCP connection that one of its
1197 clients managing that should tickled with an ACK when IP takeover is
1200 static void ctdb_remove_tcp_connection(struct ctdb_context *ctdb, struct ctdb_tcp_connection *conn)
1202 struct ctdb_tcp_connection *tcpp;
1203 struct ctdb_vnn *vnn = find_public_ip_vnn(ctdb, &conn->dst_addr);
1206 DEBUG(DEBUG_ERR,(__location__ " unable to find public address %s\n",
1207 ctdb_addr_to_str(&conn->dst_addr)));
1211 /* if the array is empty we cant remove it
1212 and we dont need to do anything
1214 if (vnn->tcp_array == NULL) {
1215 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist (array is empty) %s:%u\n",
1216 ctdb_addr_to_str(&conn->dst_addr),
1217 ntohs(conn->dst_addr.ip.sin_port)));
1222 /* See if we know this connection
1223 if we dont know this connection then we dont need to do anything
1225 tcpp = ctdb_tcp_find(vnn->tcp_array, conn);
1227 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist %s:%u\n",
1228 ctdb_addr_to_str(&conn->dst_addr),
1229 ntohs(conn->dst_addr.ip.sin_port)));
1234 /* We need to remove this entry from the array.
1235 Instead of allocating a new array and copying data to it
1236 we cheat and just copy the last entry in the existing array
1237 to the entry that is to be removed and just shring the
1240 *tcpp = vnn->tcp_array->connections[vnn->tcp_array->num - 1];
1241 vnn->tcp_array->num--;
1243 /* If we deleted the last entry we also need to remove the entire array
1245 if (vnn->tcp_array->num == 0) {
1246 talloc_free(vnn->tcp_array);
1247 vnn->tcp_array = NULL;
1250 vnn->tcp_update_needed = true;
1252 DEBUG(DEBUG_INFO,("Removed tickle info for %s:%u\n",
1253 ctdb_addr_to_str(&conn->src_addr),
1254 ntohs(conn->src_addr.ip.sin_port)));
1259 called when a daemon restarts - send all tickes for all public addresses
1260 we are serving immediately to the new node.
1262 int32_t ctdb_control_startup(struct ctdb_context *ctdb, uint32_t vnn)
1264 /*XXX here we should send all tickes we are serving to the new node */
1270 called when a client structure goes away - hook to remove
1271 elements from the tcp_list in all daemons
1273 void ctdb_takeover_client_destructor_hook(struct ctdb_client *client)
1275 while (client->tcp_list) {
1276 struct ctdb_tcp_list *tcp = client->tcp_list;
1277 DLIST_REMOVE(client->tcp_list, tcp);
1278 ctdb_remove_tcp_connection(client->ctdb, &tcp->connection);
1284 release all IPs on shutdown
1286 void ctdb_release_all_ips(struct ctdb_context *ctdb)
1288 struct ctdb_vnn *vnn;
1290 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1291 if (!ctdb_sys_have_ip(&vnn->public_address)) {
1294 if (vnn->pnn == ctdb->pnn) {
1297 ctdb_event_script(ctdb, "releaseip %s %s %u",
1299 talloc_strdup(ctdb, ctdb_addr_to_str(&vnn->public_address)),
1300 vnn->public_netmask_bits);
1301 release_kill_clients(ctdb, &vnn->public_address);
1307 get list of public IPs
1309 int32_t ctdb_control_get_public_ips(struct ctdb_context *ctdb,
1310 struct ctdb_req_control *c, TDB_DATA *outdata)
1313 struct ctdb_all_public_ips *ips;
1314 struct ctdb_vnn *vnn;
1316 /* count how many public ip structures we have */
1318 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1322 len = offsetof(struct ctdb_all_public_ips, ips) +
1323 num*sizeof(struct ctdb_public_ip);
1324 ips = talloc_zero_size(outdata, len);
1325 CTDB_NO_MEMORY(ctdb, ips);
1327 outdata->dsize = len;
1328 outdata->dptr = (uint8_t *)ips;
1332 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1333 ips->ips[i].pnn = vnn->pnn;
1334 ips->ips[i].addr = vnn->public_address;
1343 get list of public IPs, old ipv4 style. only returns ipv4 addresses
1345 int32_t ctdb_control_get_public_ipsv4(struct ctdb_context *ctdb,
1346 struct ctdb_req_control *c, TDB_DATA *outdata)
1349 struct ctdb_all_public_ipsv4 *ips;
1350 struct ctdb_vnn *vnn;
1352 /* count how many public ip structures we have */
1354 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1355 if (vnn->public_address.sa.sa_family != AF_INET) {
1361 len = offsetof(struct ctdb_all_public_ipsv4, ips) +
1362 num*sizeof(struct ctdb_public_ipv4);
1363 ips = talloc_zero_size(outdata, len);
1364 CTDB_NO_MEMORY(ctdb, ips);
1366 outdata->dsize = len;
1367 outdata->dptr = (uint8_t *)ips;
1371 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1372 if (vnn->public_address.sa.sa_family != AF_INET) {
1375 ips->ips[i].pnn = vnn->pnn;
1376 ips->ips[i].sin = vnn->public_address.ip;
1385 structure containing the listening socket and the list of tcp connections
1386 that the ctdb daemon is to kill
1388 struct ctdb_kill_tcp {
1389 struct ctdb_vnn *vnn;
1390 struct ctdb_context *ctdb;
1392 struct fd_event *fde;
1393 trbt_tree_t *connections;
1398 a tcp connection that is to be killed
1400 struct ctdb_killtcp_con {
1401 ctdb_sock_addr src_addr;
1402 ctdb_sock_addr dst_addr;
1404 struct ctdb_kill_tcp *killtcp;
1407 /* this function is used to create a key to represent this socketpair
1408 in the killtcp tree.
1409 this key is used to insert and lookup matching socketpairs that are
1410 to be tickled and RST
1412 #define KILLTCP_KEYLEN 10
1413 static uint32_t *killtcp_key(ctdb_sock_addr *src, ctdb_sock_addr *dst)
1415 static uint32_t key[KILLTCP_KEYLEN];
1417 bzero(key, sizeof(key));
1419 if (src->sa.sa_family != dst->sa.sa_family) {
1420 DEBUG(DEBUG_ERR, (__location__ " ERROR, different families passed :%u vs %u\n", src->sa.sa_family, dst->sa.sa_family));
1424 switch (src->sa.sa_family) {
1426 key[0] = dst->ip.sin_addr.s_addr;
1427 key[1] = src->ip.sin_addr.s_addr;
1428 key[2] = dst->ip.sin_port;
1429 key[3] = src->ip.sin_port;
1432 key[0] = dst->ip6.sin6_addr.s6_addr32[3];
1433 key[1] = src->ip6.sin6_addr.s6_addr32[3];
1434 key[2] = dst->ip6.sin6_addr.s6_addr32[2];
1435 key[3] = src->ip6.sin6_addr.s6_addr32[2];
1436 key[4] = dst->ip6.sin6_addr.s6_addr32[1];
1437 key[5] = src->ip6.sin6_addr.s6_addr32[1];
1438 key[6] = dst->ip6.sin6_addr.s6_addr32[0];
1439 key[7] = src->ip6.sin6_addr.s6_addr32[0];
1440 key[8] = dst->ip6.sin6_port;
1441 key[9] = src->ip6.sin6_port;
1444 DEBUG(DEBUG_ERR, (__location__ " ERROR, unknown family passed :%u\n", src->sa.sa_family));
1452 called when we get a read event on the raw socket
1454 static void capture_tcp_handler(struct event_context *ev, struct fd_event *fde,
1455 uint16_t flags, void *private_data)
1457 struct ctdb_kill_tcp *killtcp = talloc_get_type(private_data, struct ctdb_kill_tcp);
1458 struct ctdb_killtcp_con *con;
1459 ctdb_sock_addr src, dst;
1460 uint32_t ack_seq, seq;
1462 if (!(flags & EVENT_FD_READ)) {
1466 if (ctdb_sys_read_tcp_packet(killtcp->capture_fd,
1467 killtcp->private_data,
1469 &ack_seq, &seq) != 0) {
1470 /* probably a non-tcp ACK packet */
1474 /* check if we have this guy in our list of connections
1477 con = trbt_lookuparray32(killtcp->connections,
1478 KILLTCP_KEYLEN, killtcp_key(&src, &dst));
1480 /* no this was some other packet we can just ignore */
1484 /* This one has been tickled !
1485 now reset him and remove him from the list.
1487 DEBUG(DEBUG_INFO, ("sending a tcp reset to kill connection :%d -> %s:%d\n",
1488 ntohs(con->dst_addr.ip.sin_port),
1489 ctdb_addr_to_str(&con->src_addr),
1490 ntohs(con->src_addr.ip.sin_port)));
1492 ctdb_sys_send_tcp(&con->dst_addr, &con->src_addr, ack_seq, seq, 1);
1497 /* when traversing the list of all tcp connections to send tickle acks to
1498 (so that we can capture the ack coming back and kill the connection
1500 this callback is called for each connection we are currently trying to kill
1502 static void tickle_connection_traverse(void *param, void *data)
1504 struct ctdb_killtcp_con *con = talloc_get_type(data, struct ctdb_killtcp_con);
1506 /* have tried too many times, just give up */
1507 if (con->count >= 5) {
1512 /* othervise, try tickling it again */
1515 (ctdb_sock_addr *)&con->dst_addr,
1516 (ctdb_sock_addr *)&con->src_addr,
1522 called every second until all sentenced connections have been reset
1524 static void ctdb_tickle_sentenced_connections(struct event_context *ev, struct timed_event *te,
1525 struct timeval t, void *private_data)
1527 struct ctdb_kill_tcp *killtcp = talloc_get_type(private_data, struct ctdb_kill_tcp);
1530 /* loop over all connections sending tickle ACKs */
1531 trbt_traversearray32(killtcp->connections, KILLTCP_KEYLEN, tickle_connection_traverse, NULL);
1534 /* If there are no more connections to kill we can remove the
1535 entire killtcp structure
1537 if ( (killtcp->connections == NULL) ||
1538 (killtcp->connections->root == NULL) ) {
1539 talloc_free(killtcp);
1543 /* try tickling them again in a seconds time
1545 event_add_timed(killtcp->ctdb->ev, killtcp, timeval_current_ofs(1, 0),
1546 ctdb_tickle_sentenced_connections, killtcp);
1550 destroy the killtcp structure
1552 static int ctdb_killtcp_destructor(struct ctdb_kill_tcp *killtcp)
1554 killtcp->vnn->killtcp = NULL;
1559 /* nothing fancy here, just unconditionally replace any existing
1560 connection structure with the new one.
1562 dont even free the old one if it did exist, that one is talloc_stolen
1563 by the same node in the tree anyway and will be deleted when the new data
1566 static void *add_killtcp_callback(void *parm, void *data)
1572 add a tcp socket to the list of connections we want to RST
1574 static int ctdb_killtcp_add_connection(struct ctdb_context *ctdb,
1578 ctdb_sock_addr src, dst;
1579 struct ctdb_kill_tcp *killtcp;
1580 struct ctdb_killtcp_con *con;
1581 struct ctdb_vnn *vnn;
1583 ctdb_canonicalize_ip(s, &src);
1584 ctdb_canonicalize_ip(d, &dst);
1586 vnn = find_public_ip_vnn(ctdb, &dst);
1588 vnn = find_public_ip_vnn(ctdb, &src);
1591 /* if it is not a public ip it could be our 'single ip' */
1592 if (ctdb->single_ip_vnn) {
1593 if (ctdb_same_ip(&ctdb->single_ip_vnn->public_address, &dst)) {
1594 vnn = ctdb->single_ip_vnn;
1599 DEBUG(DEBUG_ERR,(__location__ " Could not killtcp, not a public address\n"));
1603 killtcp = vnn->killtcp;
1605 /* If this is the first connection to kill we must allocate
1608 if (killtcp == NULL) {
1609 killtcp = talloc_zero(ctdb, struct ctdb_kill_tcp);
1610 CTDB_NO_MEMORY(ctdb, killtcp);
1613 killtcp->ctdb = ctdb;
1614 killtcp->capture_fd = -1;
1615 killtcp->connections = trbt_create(killtcp, 0);
1617 vnn->killtcp = killtcp;
1618 talloc_set_destructor(killtcp, ctdb_killtcp_destructor);
1623 /* create a structure that describes this connection we want to
1624 RST and store it in killtcp->connections
1626 con = talloc(killtcp, struct ctdb_killtcp_con);
1627 CTDB_NO_MEMORY(ctdb, con);
1628 con->src_addr = src;
1629 con->dst_addr = dst;
1631 con->killtcp = killtcp;
1634 trbt_insertarray32_callback(killtcp->connections,
1635 KILLTCP_KEYLEN, killtcp_key(&con->dst_addr, &con->src_addr),
1636 add_killtcp_callback, con);
1639 If we dont have a socket to listen on yet we must create it
1641 if (killtcp->capture_fd == -1) {
1642 killtcp->capture_fd = ctdb_sys_open_capture_socket(vnn->iface, &killtcp->private_data);
1643 if (killtcp->capture_fd == -1) {
1644 DEBUG(DEBUG_CRIT,(__location__ " Failed to open capturing socket for killtcp\n"));
1650 if (killtcp->fde == NULL) {
1651 killtcp->fde = event_add_fd(ctdb->ev, killtcp, killtcp->capture_fd,
1652 EVENT_FD_READ | EVENT_FD_AUTOCLOSE,
1653 capture_tcp_handler, killtcp);
1655 /* We also need to set up some events to tickle all these connections
1656 until they are all reset
1658 event_add_timed(ctdb->ev, killtcp, timeval_current_ofs(1, 0),
1659 ctdb_tickle_sentenced_connections, killtcp);
1662 /* tickle him once now */
1671 talloc_free(vnn->killtcp);
1672 vnn->killtcp = NULL;
1677 kill a TCP connection.
1679 int32_t ctdb_control_kill_tcp(struct ctdb_context *ctdb, TDB_DATA indata)
1681 struct ctdb_control_killtcp *killtcp = (struct ctdb_control_killtcp *)indata.dptr;
1683 return ctdb_killtcp_add_connection(ctdb, &killtcp->src_addr, &killtcp->dst_addr);
1687 called by a daemon to inform us of the entire list of TCP tickles for
1688 a particular public address.
1689 this control should only be sent by the node that is currently serving
1690 that public address.
1692 int32_t ctdb_control_set_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata)
1694 struct ctdb_control_tcp_tickle_list *list = (struct ctdb_control_tcp_tickle_list *)indata.dptr;
1695 struct ctdb_tcp_array *tcparray;
1696 struct ctdb_vnn *vnn;
1698 /* We must at least have tickles.num or else we cant verify the size
1699 of the received data blob
1701 if (indata.dsize < offsetof(struct ctdb_control_tcp_tickle_list,
1702 tickles.connections)) {
1703 DEBUG(DEBUG_ERR,("Bad indata in ctdb_control_set_tcp_tickle_list. Not enough data for the tickle.num field\n"));
1707 /* verify that the size of data matches what we expect */
1708 if (indata.dsize < offsetof(struct ctdb_control_tcp_tickle_list,
1709 tickles.connections)
1710 + sizeof(struct ctdb_tcp_connection)
1711 * list->tickles.num) {
1712 DEBUG(DEBUG_ERR,("Bad indata in ctdb_control_set_tcp_tickle_list\n"));
1716 vnn = find_public_ip_vnn(ctdb, &list->addr);
1718 DEBUG(DEBUG_INFO,(__location__ " Could not set tcp tickle list, '%s' is not a public address\n",
1719 ctdb_addr_to_str(&list->addr)));
1724 /* remove any old ticklelist we might have */
1725 talloc_free(vnn->tcp_array);
1726 vnn->tcp_array = NULL;
1728 tcparray = talloc(ctdb->nodes, struct ctdb_tcp_array);
1729 CTDB_NO_MEMORY(ctdb, tcparray);
1731 tcparray->num = list->tickles.num;
1733 tcparray->connections = talloc_array(tcparray, struct ctdb_tcp_connection, tcparray->num);
1734 CTDB_NO_MEMORY(ctdb, tcparray->connections);
1736 memcpy(tcparray->connections, &list->tickles.connections[0],
1737 sizeof(struct ctdb_tcp_connection)*tcparray->num);
1739 /* We now have a new fresh tickle list array for this vnn */
1740 vnn->tcp_array = talloc_steal(vnn, tcparray);
1746 called to return the full list of tickles for the puclic address associated
1747 with the provided vnn
1749 int32_t ctdb_control_get_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata)
1751 ctdb_sock_addr *addr = (ctdb_sock_addr *)indata.dptr;
1752 struct ctdb_control_tcp_tickle_list *list;
1753 struct ctdb_tcp_array *tcparray;
1755 struct ctdb_vnn *vnn;
1757 vnn = find_public_ip_vnn(ctdb, addr);
1759 DEBUG(DEBUG_ERR,(__location__ " Could not get tcp tickle list, '%s' is not a public address\n",
1760 ctdb_addr_to_str(addr)));
1765 tcparray = vnn->tcp_array;
1767 num = tcparray->num;
1772 outdata->dsize = offsetof(struct ctdb_control_tcp_tickle_list,
1773 tickles.connections)
1774 + sizeof(struct ctdb_tcp_connection) * num;
1776 outdata->dptr = talloc_size(outdata, outdata->dsize);
1777 CTDB_NO_MEMORY(ctdb, outdata->dptr);
1778 list = (struct ctdb_control_tcp_tickle_list *)outdata->dptr;
1781 list->tickles.num = num;
1783 memcpy(&list->tickles.connections[0], tcparray->connections,
1784 sizeof(struct ctdb_tcp_connection) * num);
1792 set the list of all tcp tickles for a public address
1794 static int ctdb_ctrl_set_tcp_tickles(struct ctdb_context *ctdb,
1795 struct timeval timeout, uint32_t destnode,
1796 ctdb_sock_addr *addr,
1797 struct ctdb_tcp_array *tcparray)
1801 struct ctdb_control_tcp_tickle_list *list;
1804 num = tcparray->num;
1809 data.dsize = offsetof(struct ctdb_control_tcp_tickle_list,
1810 tickles.connections) +
1811 sizeof(struct ctdb_tcp_connection) * num;
1812 data.dptr = talloc_size(ctdb, data.dsize);
1813 CTDB_NO_MEMORY(ctdb, data.dptr);
1815 list = (struct ctdb_control_tcp_tickle_list *)data.dptr;
1817 list->tickles.num = num;
1819 memcpy(&list->tickles.connections[0], tcparray->connections, sizeof(struct ctdb_tcp_connection) * num);
1822 ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0,
1823 CTDB_CONTROL_SET_TCP_TICKLE_LIST,
1824 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
1826 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set tcp tickles failed\n"));
1830 talloc_free(data.dptr);
1837 perform tickle updates if required
1839 static void ctdb_update_tcp_tickles(struct event_context *ev,
1840 struct timed_event *te,
1841 struct timeval t, void *private_data)
1843 struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
1845 struct ctdb_vnn *vnn;
1847 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1848 /* we only send out updates for public addresses that
1851 if (ctdb->pnn != vnn->pnn) {
1854 /* We only send out the updates if we need to */
1855 if (!vnn->tcp_update_needed) {
1858 ret = ctdb_ctrl_set_tcp_tickles(ctdb,
1860 CTDB_BROADCAST_CONNECTED,
1861 &vnn->public_address,
1864 DEBUG(DEBUG_ERR,("Failed to send the tickle update for public address %s\n",
1865 ctdb_addr_to_str(&vnn->public_address)));
1869 event_add_timed(ctdb->ev, ctdb->tickle_update_context,
1870 timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
1871 ctdb_update_tcp_tickles, ctdb);
1876 start periodic update of tcp tickles
1878 void ctdb_start_tcp_tickle_update(struct ctdb_context *ctdb)
1880 ctdb->tickle_update_context = talloc_new(ctdb);
1882 event_add_timed(ctdb->ev, ctdb->tickle_update_context,
1883 timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
1884 ctdb_update_tcp_tickles, ctdb);
1890 struct control_gratious_arp {
1891 struct ctdb_context *ctdb;
1892 ctdb_sock_addr addr;
1898 send a control_gratuitous arp
1900 static void send_gratious_arp(struct event_context *ev, struct timed_event *te,
1901 struct timeval t, void *private_data)
1904 struct control_gratious_arp *arp = talloc_get_type(private_data,
1905 struct control_gratious_arp);
1907 ret = ctdb_sys_send_arp(&arp->addr, arp->iface);
1909 DEBUG(DEBUG_ERR,(__location__ " sending of gratious arp failed (%s)\n", strerror(errno)));
1914 if (arp->count == CTDB_ARP_REPEAT) {
1919 event_add_timed(arp->ctdb->ev, arp,
1920 timeval_current_ofs(CTDB_ARP_INTERVAL, 0),
1921 send_gratious_arp, arp);
1928 int32_t ctdb_control_send_gratious_arp(struct ctdb_context *ctdb, TDB_DATA indata)
1930 struct ctdb_control_gratious_arp *gratious_arp = (struct ctdb_control_gratious_arp *)indata.dptr;
1931 struct control_gratious_arp *arp;
1933 /* verify the size of indata */
1934 if (indata.dsize < offsetof(struct ctdb_control_gratious_arp, iface)) {
1935 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_gratious_arp structure. Got %u require %u bytes\n",
1936 (unsigned)indata.dsize,
1937 (unsigned)offsetof(struct ctdb_control_gratious_arp, iface)));
1941 ( offsetof(struct ctdb_control_gratious_arp, iface)
1942 + gratious_arp->len ) ){
1944 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
1945 "but should be %u bytes\n",
1946 (unsigned)indata.dsize,
1947 (unsigned)(offsetof(struct ctdb_control_gratious_arp, iface)+gratious_arp->len)));
1952 arp = talloc(ctdb, struct control_gratious_arp);
1953 CTDB_NO_MEMORY(ctdb, arp);
1956 arp->addr = gratious_arp->addr;
1957 arp->iface = talloc_strdup(arp, gratious_arp->iface);
1958 CTDB_NO_MEMORY(ctdb, arp->iface);
1961 event_add_timed(arp->ctdb->ev, arp,
1962 timeval_zero(), send_gratious_arp, arp);
1967 int32_t ctdb_control_add_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
1969 struct ctdb_control_ip_iface *pub = (struct ctdb_control_ip_iface *)indata.dptr;
1972 /* verify the size of indata */
1973 if (indata.dsize < offsetof(struct ctdb_control_ip_iface, iface)) {
1974 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_ip_iface structure\n"));
1978 ( offsetof(struct ctdb_control_ip_iface, iface)
1981 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
1982 "but should be %u bytes\n",
1983 (unsigned)indata.dsize,
1984 (unsigned)(offsetof(struct ctdb_control_ip_iface, iface)+pub->len)));
1988 return ctdb_add_public_address(ctdb, &pub->addr, pub->mask, &pub->iface[0]);
1992 called when releaseip event finishes for del_public_address
1994 static void delete_ip_callback(struct ctdb_context *ctdb, int status,
1997 talloc_free(private_data);
2000 int32_t ctdb_control_del_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2002 struct ctdb_control_ip_iface *pub = (struct ctdb_control_ip_iface *)indata.dptr;
2003 struct ctdb_vnn *vnn;
2006 /* verify the size of indata */
2007 if (indata.dsize < offsetof(struct ctdb_control_ip_iface, iface)) {
2008 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_ip_iface structure\n"));
2012 ( offsetof(struct ctdb_control_ip_iface, iface)
2015 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2016 "but should be %u bytes\n",
2017 (unsigned)indata.dsize,
2018 (unsigned)(offsetof(struct ctdb_control_ip_iface, iface)+pub->len)));
2022 /* walk over all public addresses until we find a match */
2023 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2024 if (ctdb_same_ip(&vnn->public_address, &pub->addr)) {
2025 TALLOC_CTX *mem_ctx = talloc_new(ctdb);
2027 DLIST_REMOVE(ctdb->vnn, vnn);
2029 ret = ctdb_event_script_callback(ctdb,
2030 timeval_current_ofs(ctdb->tunable.script_timeout, 0),
2031 mem_ctx, delete_ip_callback, mem_ctx,
2032 "releaseip %s %s %u",
2034 talloc_strdup(mem_ctx, ctdb_addr_to_str(&vnn->public_address)),
2035 vnn->public_netmask_bits);