4 Copyright (C) Ronnie Sahlberg 2007
5 Copyright (C) Andrew Tridgell 2007
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, see <http://www.gnu.org/licenses/>.
21 #include "lib/events/events.h"
22 #include "lib/tdb/include/tdb.h"
23 #include "lib/util/dlinklist.h"
24 #include "system/network.h"
25 #include "system/filesys.h"
26 #include "system/wait.h"
27 #include "include/ctdb_protocol.h"
28 #include "include/ctdb_private.h"
29 #include "common/rb_tree.h"
32 #define TAKEOVER_TIMEOUT() timeval_current_ofs(ctdb->tunable.takeover_timeout,0)
34 #define CTDB_ARP_INTERVAL 1
35 #define CTDB_ARP_REPEAT 3
38 struct ctdb_iface *prev, *next;
44 static const char *ctdb_vnn_iface_string(const struct ctdb_vnn *vnn)
47 return vnn->iface->name;
53 static int ctdb_add_local_iface(struct ctdb_context *ctdb, const char *iface)
57 /* Verify that we dont have an entry for this ip yet */
58 for (i=ctdb->ifaces;i;i=i->next) {
59 if (strcmp(i->name, iface) == 0) {
64 /* create a new structure for this interface */
65 i = talloc_zero(ctdb, struct ctdb_iface);
66 CTDB_NO_MEMORY_FATAL(ctdb, i);
67 i->name = talloc_strdup(i, iface);
68 CTDB_NO_MEMORY(ctdb, i->name);
71 DLIST_ADD(ctdb->ifaces, i);
76 static struct ctdb_iface *ctdb_find_iface(struct ctdb_context *ctdb,
81 /* Verify that we dont have an entry for this ip yet */
82 for (i=ctdb->ifaces;i;i=i->next) {
83 if (strcmp(i->name, iface) == 0) {
91 static struct ctdb_iface *ctdb_vnn_best_iface(struct ctdb_context *ctdb,
95 struct ctdb_iface *cur = NULL;
96 struct ctdb_iface *best = NULL;
98 for (i=0; vnn->ifaces[i]; i++) {
100 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
114 if (cur->references < best->references) {
123 static int32_t ctdb_vnn_assign_iface(struct ctdb_context *ctdb,
124 struct ctdb_vnn *vnn)
126 struct ctdb_iface *best = NULL;
129 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
130 "still assigned to iface '%s'\n",
131 ctdb_addr_to_str(&vnn->public_address),
132 ctdb_vnn_iface_string(vnn)));
136 best = ctdb_vnn_best_iface(ctdb, vnn);
138 DEBUG(DEBUG_ERR, (__location__ " public address '%s' "
139 "cannot assign to iface any iface\n",
140 ctdb_addr_to_str(&vnn->public_address)));
146 vnn->pnn = ctdb->pnn;
148 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
149 "now assigned to iface '%s' refs[%d]\n",
150 ctdb_addr_to_str(&vnn->public_address),
151 ctdb_vnn_iface_string(vnn),
156 static void ctdb_vnn_unassign_iface(struct ctdb_context *ctdb,
157 struct ctdb_vnn *vnn)
159 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
160 "now unassigned (old iface '%s' refs[%d])\n",
161 ctdb_addr_to_str(&vnn->public_address),
162 ctdb_vnn_iface_string(vnn),
163 vnn->iface?vnn->iface->references:0));
165 vnn->iface->references--;
168 if (vnn->pnn == ctdb->pnn) {
173 static bool ctdb_vnn_available(struct ctdb_context *ctdb,
174 struct ctdb_vnn *vnn)
178 if (vnn->iface && vnn->iface->link_up) {
182 for (i=0; vnn->ifaces[i]; i++) {
183 struct ctdb_iface *cur;
185 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
198 struct ctdb_takeover_arp {
199 struct ctdb_context *ctdb;
202 struct ctdb_tcp_array *tcparray;
203 struct ctdb_vnn *vnn;
208 lists of tcp endpoints
210 struct ctdb_tcp_list {
211 struct ctdb_tcp_list *prev, *next;
212 struct ctdb_tcp_connection connection;
216 list of clients to kill on IP release
218 struct ctdb_client_ip {
219 struct ctdb_client_ip *prev, *next;
220 struct ctdb_context *ctdb;
227 send a gratuitous arp
229 static void ctdb_control_send_arp(struct event_context *ev, struct timed_event *te,
230 struct timeval t, void *private_data)
232 struct ctdb_takeover_arp *arp = talloc_get_type(private_data,
233 struct ctdb_takeover_arp);
235 struct ctdb_tcp_array *tcparray;
236 const char *iface = ctdb_vnn_iface_string(arp->vnn);
238 ret = ctdb_sys_send_arp(&arp->addr, iface);
240 DEBUG(DEBUG_CRIT,(__location__ " sending of arp failed on iface '%s' (%s)\n",
241 iface, strerror(errno)));
244 tcparray = arp->tcparray;
246 for (i=0;i<tcparray->num;i++) {
247 struct ctdb_tcp_connection *tcon;
249 tcon = &tcparray->connections[i];
250 DEBUG(DEBUG_INFO,("sending tcp tickle ack for %u->%s:%u\n",
251 (unsigned)ntohs(tcon->dst_addr.ip.sin_port),
252 ctdb_addr_to_str(&tcon->src_addr),
253 (unsigned)ntohs(tcon->src_addr.ip.sin_port)));
254 ret = ctdb_sys_send_tcp(
259 DEBUG(DEBUG_CRIT,(__location__ " Failed to send tcp tickle ack for %s\n",
260 ctdb_addr_to_str(&tcon->src_addr)));
267 if (arp->count == CTDB_ARP_REPEAT) {
272 event_add_timed(arp->ctdb->ev, arp->vnn->takeover_ctx,
273 timeval_current_ofs(CTDB_ARP_INTERVAL, 100000),
274 ctdb_control_send_arp, arp);
277 static int32_t ctdb_announce_vnn_iface(struct ctdb_context *ctdb,
278 struct ctdb_vnn *vnn)
280 struct ctdb_takeover_arp *arp;
281 struct ctdb_tcp_array *tcparray;
283 if (!vnn->takeover_ctx) {
284 vnn->takeover_ctx = talloc_new(vnn);
285 if (!vnn->takeover_ctx) {
290 arp = talloc_zero(vnn->takeover_ctx, struct ctdb_takeover_arp);
296 arp->addr = vnn->public_address;
299 tcparray = vnn->tcp_array;
301 /* add all of the known tcp connections for this IP to the
302 list of tcp connections to send tickle acks for */
303 arp->tcparray = talloc_steal(arp, tcparray);
305 vnn->tcp_array = NULL;
306 vnn->tcp_update_needed = true;
309 event_add_timed(arp->ctdb->ev, vnn->takeover_ctx,
310 timeval_zero(), ctdb_control_send_arp, arp);
315 struct takeover_callback_state {
316 struct ctdb_req_control *c;
317 ctdb_sock_addr *addr;
318 struct ctdb_vnn *vnn;
321 struct ctdb_do_takeip_state {
322 struct ctdb_req_control *c;
323 struct ctdb_vnn *vnn;
327 called when takeip event finishes
329 static void ctdb_do_takeip_callback(struct ctdb_context *ctdb, int status,
332 struct ctdb_do_takeip_state *state =
333 talloc_get_type(private_data, struct ctdb_do_takeip_state);
337 if (status == -ETIME) {
340 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
341 ctdb_addr_to_str(&state->vnn->public_address),
342 ctdb_vnn_iface_string(state->vnn)));
343 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
348 ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
350 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
355 /* the control succeeded */
356 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
362 take over an ip address
364 static int32_t ctdb_do_takeip(struct ctdb_context *ctdb,
365 struct ctdb_req_control *c,
366 struct ctdb_vnn *vnn)
369 struct ctdb_do_takeip_state *state;
371 ret = ctdb_vnn_assign_iface(ctdb, vnn);
373 DEBUG(DEBUG_ERR,("Takeover of IP %s/%u failed to "
374 "assin a usable interface\n",
375 ctdb_addr_to_str(&vnn->public_address),
376 vnn->public_netmask_bits));
380 state = talloc(vnn, struct ctdb_do_takeip_state);
381 CTDB_NO_MEMORY(ctdb, state);
383 state->c = talloc_steal(ctdb, c);
386 DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u on interface %s\n",
387 ctdb_addr_to_str(&vnn->public_address),
388 vnn->public_netmask_bits,
389 ctdb_vnn_iface_string(vnn)));
391 ret = ctdb_event_script_callback(ctdb,
393 ctdb_do_takeip_callback,
398 ctdb_vnn_iface_string(vnn),
399 ctdb_addr_to_str(&vnn->public_address),
400 vnn->public_netmask_bits);
403 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
404 ctdb_addr_to_str(&vnn->public_address),
405 ctdb_vnn_iface_string(vnn)));
413 struct ctdb_do_updateip_state {
414 struct ctdb_req_control *c;
415 struct ctdb_iface *old;
416 struct ctdb_vnn *vnn;
420 called when updateip event finishes
422 static void ctdb_do_updateip_callback(struct ctdb_context *ctdb, int status,
425 struct ctdb_do_updateip_state *state =
426 talloc_get_type(private_data, struct ctdb_do_updateip_state);
430 if (status == -ETIME) {
433 DEBUG(DEBUG_ERR,(__location__ " Failed to move IP %s from interface %s to %s\n",
434 ctdb_addr_to_str(&state->vnn->public_address),
436 ctdb_vnn_iface_string(state->vnn)));
439 * All we can do is reset the old interface
440 * and let the next run fix it
442 ctdb_vnn_unassign_iface(ctdb, state->vnn);
443 state->vnn->iface = state->old;
444 state->vnn->iface->references++;
446 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
451 ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
453 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
458 /* the control succeeded */
459 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
465 update (move) an ip address
467 static int32_t ctdb_do_updateip(struct ctdb_context *ctdb,
468 struct ctdb_req_control *c,
469 struct ctdb_vnn *vnn)
472 struct ctdb_do_updateip_state *state;
473 struct ctdb_iface *old = vnn->iface;
475 ctdb_vnn_unassign_iface(ctdb, vnn);
476 ret = ctdb_vnn_assign_iface(ctdb, vnn);
478 DEBUG(DEBUG_ERR,("update of IP %s/%u failed to "
479 "assin a usable interface (old iface '%s')\n",
480 ctdb_addr_to_str(&vnn->public_address),
481 vnn->public_netmask_bits,
486 if (vnn->iface == old) {
487 DEBUG(DEBUG_ERR,("update of IP %s/%u trying to "
488 "assin a same interface '%s'\n",
489 ctdb_addr_to_str(&vnn->public_address),
490 vnn->public_netmask_bits,
495 state = talloc(vnn, struct ctdb_do_updateip_state);
496 CTDB_NO_MEMORY(ctdb, state);
498 state->c = talloc_steal(ctdb, c);
502 DEBUG(DEBUG_NOTICE,("Update of IP %s/%u from "
503 "interface %s to %s\n",
504 ctdb_addr_to_str(&vnn->public_address),
505 vnn->public_netmask_bits,
507 ctdb_vnn_iface_string(vnn)));
509 ret = ctdb_event_script_callback(ctdb,
511 ctdb_do_updateip_callback,
514 CTDB_EVENT_UPDATE_IP,
517 ctdb_vnn_iface_string(vnn),
518 ctdb_addr_to_str(&vnn->public_address),
519 vnn->public_netmask_bits);
521 DEBUG(DEBUG_ERR,(__location__ " Failed update IP %s from interface %s to %s\n",
522 ctdb_addr_to_str(&vnn->public_address),
523 old->name, ctdb_vnn_iface_string(vnn)));
532 Find the vnn of the node that has a public ip address
533 returns -1 if the address is not known as a public address
535 static struct ctdb_vnn *find_public_ip_vnn(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
537 struct ctdb_vnn *vnn;
539 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
540 if (ctdb_same_ip(&vnn->public_address, addr)) {
549 take over an ip address
551 int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb,
552 struct ctdb_req_control *c,
557 struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
558 struct ctdb_vnn *vnn;
559 bool have_ip = false;
560 bool do_updateip = false;
561 bool do_takeip = false;
562 struct ctdb_iface *best_iface = NULL;
564 if (pip->pnn != ctdb->pnn) {
565 DEBUG(DEBUG_ERR,(__location__" takeoverip called for an ip '%s' "
566 "with pnn %d, but we're node %d\n",
567 ctdb_addr_to_str(&pip->addr),
568 pip->pnn, ctdb->pnn));
572 /* update out vnn list */
573 vnn = find_public_ip_vnn(ctdb, &pip->addr);
575 DEBUG(DEBUG_INFO,("takeoverip called for an ip '%s' that is not a public address\n",
576 ctdb_addr_to_str(&pip->addr)));
580 have_ip = ctdb_sys_have_ip(&pip->addr);
581 best_iface = ctdb_vnn_best_iface(ctdb, vnn);
582 if (best_iface == NULL) {
583 DEBUG(DEBUG_ERR,("takeoverip of IP %s/%u failed to find"
584 "a usable interface (old %s, have_ip %d)\n",
585 ctdb_addr_to_str(&vnn->public_address),
586 vnn->public_netmask_bits,
587 ctdb_vnn_iface_string(vnn),
592 if (vnn->iface == NULL && have_ip) {
593 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
594 "but we have no interface assigned, has someone manually configured it?"
596 ctdb_addr_to_str(&vnn->public_address)));
601 if (vnn->pnn != ctdb->pnn && have_ip) {
602 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
603 "and we have it on iface[%s], but it was assigned to node %d"
604 "and we are node %d, banning ourself\n",
605 ctdb_addr_to_str(&vnn->public_address),
606 ctdb_vnn_iface_string(vnn), vnn->pnn, ctdb->pnn));
612 if (vnn->iface->link_up) {
613 /* only move when the rebalance gains something */
614 if (vnn->iface->references > (best_iface->references + 1)) {
617 } else if (vnn->iface != best_iface) {
624 ctdb_vnn_unassign_iface(ctdb, vnn);
631 ret = ctdb_do_takeip(ctdb, c, vnn);
635 } else if (do_updateip) {
636 ret = ctdb_do_updateip(ctdb, c, vnn);
642 * The interface is up and the kernel known the ip
645 DEBUG(DEBUG_INFO,("Redundant takeover of IP %s/%u on interface %s (ip already held)\n",
646 ctdb_addr_to_str(&pip->addr),
647 vnn->public_netmask_bits,
648 ctdb_vnn_iface_string(vnn)));
652 /* tell ctdb_control.c that we will be replying asynchronously */
659 takeover an ip address old v4 style
661 int32_t ctdb_control_takeover_ipv4(struct ctdb_context *ctdb,
662 struct ctdb_req_control *c,
668 data.dsize = sizeof(struct ctdb_public_ip);
669 data.dptr = (uint8_t *)talloc_zero(c, struct ctdb_public_ip);
670 CTDB_NO_MEMORY(ctdb, data.dptr);
672 memcpy(data.dptr, indata.dptr, indata.dsize);
673 return ctdb_control_takeover_ip(ctdb, c, data, async_reply);
677 kill any clients that are registered with a IP that is being released
679 static void release_kill_clients(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
681 struct ctdb_client_ip *ip;
683 DEBUG(DEBUG_INFO,("release_kill_clients for ip %s\n",
684 ctdb_addr_to_str(addr)));
686 for (ip=ctdb->client_ip_list; ip; ip=ip->next) {
687 ctdb_sock_addr tmp_addr;
690 DEBUG(DEBUG_INFO,("checking for client %u with IP %s\n",
692 ctdb_addr_to_str(&ip->addr)));
694 if (ctdb_same_ip(&tmp_addr, addr)) {
695 struct ctdb_client *client = ctdb_reqid_find(ctdb,
698 DEBUG(DEBUG_INFO,("matched client %u with IP %s and pid %u\n",
700 ctdb_addr_to_str(&ip->addr),
703 if (client->pid != 0) {
704 DEBUG(DEBUG_INFO,(__location__ " Killing client pid %u for IP %s on client_id %u\n",
705 (unsigned)client->pid,
706 ctdb_addr_to_str(addr),
708 kill(client->pid, SIGKILL);
715 called when releaseip event finishes
717 static void release_ip_callback(struct ctdb_context *ctdb, int status,
720 struct takeover_callback_state *state =
721 talloc_get_type(private_data, struct takeover_callback_state);
724 if (status == -ETIME) {
728 /* send a message to all clients of this node telling them
729 that the cluster has been reconfigured and they should
730 release any sockets on this IP */
731 data.dptr = (uint8_t *)talloc_strdup(state, ctdb_addr_to_str(state->addr));
732 CTDB_NO_MEMORY_VOID(ctdb, data.dptr);
733 data.dsize = strlen((char *)data.dptr)+1;
735 DEBUG(DEBUG_INFO,(__location__ " sending RELEASE_IP for '%s'\n", data.dptr));
737 ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_RELEASE_IP, data);
739 /* kill clients that have registered with this IP */
740 release_kill_clients(ctdb, state->addr);
742 ctdb_vnn_unassign_iface(ctdb, state->vnn);
744 /* the control succeeded */
745 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
750 release an ip address
752 int32_t ctdb_control_release_ip(struct ctdb_context *ctdb,
753 struct ctdb_req_control *c,
758 struct takeover_callback_state *state;
759 struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
760 struct ctdb_vnn *vnn;
762 /* update our vnn list */
763 vnn = find_public_ip_vnn(ctdb, &pip->addr);
765 DEBUG(DEBUG_INFO,("releaseip called for an ip '%s' that is not a public address\n",
766 ctdb_addr_to_str(&pip->addr)));
771 /* stop any previous arps */
772 talloc_free(vnn->takeover_ctx);
773 vnn->takeover_ctx = NULL;
775 if (!ctdb_sys_have_ip(&pip->addr)) {
776 DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u on interface %s (ip not held)\n",
777 ctdb_addr_to_str(&pip->addr),
778 vnn->public_netmask_bits,
779 ctdb_vnn_iface_string(vnn)));
780 ctdb_vnn_unassign_iface(ctdb, vnn);
784 if (vnn->iface == NULL) {
785 DEBUG(DEBUG_CRIT,(__location__ " release_ip of IP %s is known to the kernel, "
786 "but we have no interface assigned, has someone manually configured it?"
788 ctdb_addr_to_str(&vnn->public_address)));
793 DEBUG(DEBUG_NOTICE,("Release of IP %s/%u on interface %s node:%d\n",
794 ctdb_addr_to_str(&pip->addr),
795 vnn->public_netmask_bits,
796 ctdb_vnn_iface_string(vnn),
799 state = talloc(ctdb, struct takeover_callback_state);
800 CTDB_NO_MEMORY(ctdb, state);
802 state->c = talloc_steal(state, c);
803 state->addr = talloc(state, ctdb_sock_addr);
804 CTDB_NO_MEMORY(ctdb, state->addr);
805 *state->addr = pip->addr;
808 ret = ctdb_event_script_callback(ctdb,
809 state, release_ip_callback, state,
811 CTDB_EVENT_RELEASE_IP,
813 ctdb_vnn_iface_string(vnn),
814 ctdb_addr_to_str(&pip->addr),
815 vnn->public_netmask_bits);
817 DEBUG(DEBUG_ERR,(__location__ " Failed to release IP %s on interface %s\n",
818 ctdb_addr_to_str(&pip->addr),
819 ctdb_vnn_iface_string(vnn)));
824 /* tell the control that we will be reply asynchronously */
830 release an ip address old v4 style
832 int32_t ctdb_control_release_ipv4(struct ctdb_context *ctdb,
833 struct ctdb_req_control *c,
839 data.dsize = sizeof(struct ctdb_public_ip);
840 data.dptr = (uint8_t *)talloc_zero(c, struct ctdb_public_ip);
841 CTDB_NO_MEMORY(ctdb, data.dptr);
843 memcpy(data.dptr, indata.dptr, indata.dsize);
844 return ctdb_control_release_ip(ctdb, c, data, async_reply);
848 static int ctdb_add_public_address(struct ctdb_context *ctdb,
849 ctdb_sock_addr *addr,
850 unsigned mask, const char *ifaces)
852 struct ctdb_vnn *vnn;
859 /* Verify that we dont have an entry for this ip yet */
860 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
861 if (ctdb_same_sockaddr(addr, &vnn->public_address)) {
862 DEBUG(DEBUG_CRIT,("Same ip '%s' specified multiple times in the public address list \n",
863 ctdb_addr_to_str(addr)));
868 /* create a new vnn structure for this ip address */
869 vnn = talloc_zero(ctdb, struct ctdb_vnn);
870 CTDB_NO_MEMORY_FATAL(ctdb, vnn);
871 vnn->ifaces = talloc_array(vnn, const char *, num + 2);
872 tmp = talloc_strdup(vnn, ifaces);
873 CTDB_NO_MEMORY_FATAL(ctdb, tmp);
874 for (iface = strtok(tmp, ","); iface; iface = strtok(NULL, ",")) {
875 vnn->ifaces = talloc_realloc(vnn, vnn->ifaces, const char *, num + 2);
876 CTDB_NO_MEMORY_FATAL(ctdb, vnn->ifaces);
877 vnn->ifaces[num] = talloc_strdup(vnn, iface);
878 CTDB_NO_MEMORY_FATAL(ctdb, vnn->ifaces[num]);
882 vnn->ifaces[num] = NULL;
883 vnn->public_address = *addr;
884 vnn->public_netmask_bits = mask;
887 for (i=0; vnn->ifaces[i]; i++) {
888 ret = ctdb_add_local_iface(ctdb, vnn->ifaces[i]);
890 DEBUG(DEBUG_CRIT, (__location__ " failed to add iface[%s] "
891 "for public_address[%s]\n",
892 vnn->ifaces[i], ctdb_addr_to_str(addr)));
898 DLIST_ADD(ctdb->vnn, vnn);
904 setup the event script directory
906 int ctdb_set_event_script_dir(struct ctdb_context *ctdb, const char *script_dir)
908 ctdb->event_script_dir = talloc_strdup(ctdb, script_dir);
909 CTDB_NO_MEMORY(ctdb, ctdb->event_script_dir);
914 setup the public address lists from a file
916 int ctdb_set_public_addresses(struct ctdb_context *ctdb, const char *alist)
922 lines = file_lines_load(alist, &nlines, ctdb);
924 ctdb_set_error(ctdb, "Failed to load public address list '%s'\n", alist);
927 while (nlines > 0 && strcmp(lines[nlines-1], "") == 0) {
931 for (i=0;i<nlines;i++) {
939 while ((*line == ' ') || (*line == '\t')) {
945 if (strcmp(line, "") == 0) {
948 tok = strtok(line, " \t");
950 tok = strtok(NULL, " \t");
952 if (NULL == ctdb->default_public_interface) {
953 DEBUG(DEBUG_CRIT,("No default public interface and no interface specified at line %u of public address list\n",
958 ifaces = ctdb->default_public_interface;
963 if (!addrstr || !parse_ip_mask(addrstr, ifaces, &addr, &mask)) {
964 DEBUG(DEBUG_CRIT,("Badly formed line %u in public address list\n", i+1));
968 if (ctdb_add_public_address(ctdb, &addr, mask, ifaces)) {
969 DEBUG(DEBUG_CRIT,("Failed to add line %u to the public address list\n", i+1));
979 int ctdb_set_single_public_ip(struct ctdb_context *ctdb,
983 struct ctdb_vnn *svnn;
987 svnn = talloc_zero(ctdb, struct ctdb_vnn);
988 CTDB_NO_MEMORY(ctdb, svnn);
990 svnn->ifaces = talloc_array(svnn, const char *, 2);
991 CTDB_NO_MEMORY(ctdb, svnn->ifaces);
992 svnn->ifaces[0] = talloc_strdup(svnn->ifaces, iface);
993 CTDB_NO_MEMORY(ctdb, svnn->ifaces[0]);
994 svnn->ifaces[1] = NULL;
996 ok = parse_ip(ip, iface, 0, &svnn->public_address);
1002 ret = ctdb_add_local_iface(ctdb, svnn->ifaces[0]);
1004 DEBUG(DEBUG_CRIT, (__location__ " failed to add iface[%s] "
1005 "for single_ip[%s]\n",
1007 ctdb_addr_to_str(&svnn->public_address)));
1012 ret = ctdb_vnn_assign_iface(ctdb, svnn);
1018 ctdb->single_ip_vnn = svnn;
1022 struct ctdb_public_ip_list {
1023 struct ctdb_public_ip_list *next;
1025 ctdb_sock_addr addr;
1029 /* Given a physical node, return the number of
1030 public addresses that is currently assigned to this node.
1032 static int node_ip_coverage(struct ctdb_context *ctdb,
1034 struct ctdb_public_ip_list *ips)
1038 for (;ips;ips=ips->next) {
1039 if (ips->pnn == pnn) {
1047 /* Check if this is a public ip known to the node, i.e. can that
1048 node takeover this ip ?
1050 static int can_node_serve_ip(struct ctdb_context *ctdb, int32_t pnn,
1051 struct ctdb_public_ip_list *ip)
1053 struct ctdb_all_public_ips *public_ips;
1056 public_ips = ctdb->nodes[pnn]->available_public_ips;
1058 if (public_ips == NULL) {
1062 for (i=0;i<public_ips->num;i++) {
1063 if (ctdb_same_ip(&ip->addr, &public_ips->ips[i].addr)) {
1064 /* yes, this node can serve this public ip */
1073 /* search the node lists list for a node to takeover this ip.
1074 pick the node that currently are serving the least number of ips
1075 so that the ips get spread out evenly.
1077 static int find_takeover_node(struct ctdb_context *ctdb,
1078 struct ctdb_node_map *nodemap, uint32_t mask,
1079 struct ctdb_public_ip_list *ip,
1080 struct ctdb_public_ip_list *all_ips)
1082 int pnn, min=0, num;
1086 for (i=0;i<nodemap->num;i++) {
1087 if (nodemap->nodes[i].flags & mask) {
1088 /* This node is not healty and can not be used to serve
1094 /* verify that this node can serve this ip */
1095 if (can_node_serve_ip(ctdb, i, ip)) {
1096 /* no it couldnt so skip to the next node */
1100 num = node_ip_coverage(ctdb, i, all_ips);
1101 /* was this the first node we checked ? */
1113 DEBUG(DEBUG_WARNING,(__location__ " Could not find node to take over public address '%s'\n",
1114 ctdb_addr_to_str(&ip->addr)));
1124 static uint32_t *ip_key(ctdb_sock_addr *ip)
1126 static uint32_t key[IP_KEYLEN];
1128 bzero(key, sizeof(key));
1130 switch (ip->sa.sa_family) {
1132 key[3] = htonl(ip->ip.sin_addr.s_addr);
1135 key[0] = htonl(ip->ip6.sin6_addr.s6_addr32[0]);
1136 key[1] = htonl(ip->ip6.sin6_addr.s6_addr32[1]);
1137 key[2] = htonl(ip->ip6.sin6_addr.s6_addr32[2]);
1138 key[3] = htonl(ip->ip6.sin6_addr.s6_addr32[3]);
1141 DEBUG(DEBUG_ERR, (__location__ " ERROR, unknown family passed :%u\n", ip->sa.sa_family));
1148 static void *add_ip_callback(void *parm, void *data)
1153 void getips_count_callback(void *param, void *data)
1155 struct ctdb_public_ip_list **ip_list = (struct ctdb_public_ip_list **)param;
1156 struct ctdb_public_ip_list *new_ip = (struct ctdb_public_ip_list *)data;
1158 new_ip->next = *ip_list;
1162 static struct ctdb_public_ip_list *
1163 create_merged_ip_list(struct ctdb_context *ctdb)
1166 struct ctdb_public_ip_list *ip_list;
1167 struct ctdb_all_public_ips *public_ips;
1169 if (ctdb->ip_tree != NULL) {
1170 talloc_free(ctdb->ip_tree);
1171 ctdb->ip_tree = NULL;
1173 ctdb->ip_tree = trbt_create(ctdb, 0);
1175 for (i=0;i<ctdb->num_nodes;i++) {
1176 public_ips = ctdb->nodes[i]->known_public_ips;
1178 if (ctdb->nodes[i]->flags & NODE_FLAGS_DELETED) {
1182 /* there were no public ips for this node */
1183 if (public_ips == NULL) {
1187 for (j=0;j<public_ips->num;j++) {
1188 struct ctdb_public_ip_list *tmp_ip;
1190 tmp_ip = talloc_zero(ctdb->ip_tree, struct ctdb_public_ip_list);
1191 CTDB_NO_MEMORY_NULL(ctdb, tmp_ip);
1192 tmp_ip->pnn = public_ips->ips[j].pnn;
1193 tmp_ip->addr = public_ips->ips[j].addr;
1194 tmp_ip->next = NULL;
1196 trbt_insertarray32_callback(ctdb->ip_tree,
1197 IP_KEYLEN, ip_key(&public_ips->ips[j].addr),
1204 trbt_traversearray32(ctdb->ip_tree, IP_KEYLEN, getips_count_callback, &ip_list);
1210 make any IP alias changes for public addresses that are necessary
1212 int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap)
1214 int i, num_healthy, retries;
1215 struct ctdb_public_ip ip;
1216 struct ctdb_public_ipv4 ipv4;
1218 struct ctdb_public_ip_list *all_ips, *tmp_ip;
1219 int maxnode, maxnum=0, minnode, minnum=0, num;
1221 struct client_async_data *async_data;
1222 struct ctdb_client_control_state *state;
1223 TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
1228 /* Count how many completely healthy nodes we have */
1230 for (i=0;i<nodemap->num;i++) {
1231 if (!(nodemap->nodes[i].flags & (NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED))) {
1236 if (num_healthy > 0) {
1237 /* We have healthy nodes, so only consider them for
1238 serving public addresses
1240 mask = NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED;
1242 /* We didnt have any completely healthy nodes so
1243 use "disabled" nodes as a fallback
1245 mask = NODE_FLAGS_INACTIVE;
1248 /* since nodes only know about those public addresses that
1249 can be served by that particular node, no single node has
1250 a full list of all public addresses that exist in the cluster.
1251 Walk over all node structures and create a merged list of
1252 all public addresses that exist in the cluster.
1254 keep the tree of ips around as ctdb->ip_tree
1256 all_ips = create_merged_ip_list(ctdb);
1258 /* If we want deterministic ip allocations, i.e. that the ip addresses
1259 will always be allocated the same way for a specific set of
1260 available/unavailable nodes.
1262 if (1 == ctdb->tunable.deterministic_public_ips) {
1263 DEBUG(DEBUG_NOTICE,("Deterministic IPs enabled. Resetting all ip allocations\n"));
1264 for (i=0,tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next,i++) {
1265 tmp_ip->pnn = i%nodemap->num;
1270 /* mark all public addresses with a masked node as being served by
1273 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1274 if (tmp_ip->pnn == -1) {
1277 if (nodemap->nodes[tmp_ip->pnn].flags & mask) {
1282 /* verify that the assigned nodes can serve that public ip
1283 and set it to -1 if not
1285 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1286 if (tmp_ip->pnn == -1) {
1289 if (can_node_serve_ip(ctdb, tmp_ip->pnn, tmp_ip) != 0) {
1290 /* this node can not serve this ip. */
1296 /* now we must redistribute all public addresses with takeover node
1297 -1 among the nodes available
1301 /* loop over all ip's and find a physical node to cover for
1304 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1305 if (tmp_ip->pnn == -1) {
1306 if (find_takeover_node(ctdb, nodemap, mask, tmp_ip, all_ips)) {
1307 DEBUG(DEBUG_WARNING,("Failed to find node to cover ip %s\n",
1308 ctdb_addr_to_str(&tmp_ip->addr)));
1313 /* If we dont want ips to fail back after a node becomes healthy
1314 again, we wont even try to reallocat the ip addresses so that
1315 they are evenly spread out.
1316 This can NOT be used at the same time as DeterministicIPs !
1318 if (1 == ctdb->tunable.no_ip_failback) {
1319 if (1 == ctdb->tunable.deterministic_public_ips) {
1320 DEBUG(DEBUG_ERR, ("ERROR: You can not use 'DeterministicIPs' and 'NoIPFailback' at the same time\n"));
1326 /* now, try to make sure the ip adresses are evenly distributed
1328 for each ip address, loop over all nodes that can serve this
1329 ip and make sure that the difference between the node
1330 serving the most and the node serving the least ip's are not greater
1333 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1334 if (tmp_ip->pnn == -1) {
1338 /* Get the highest and lowest number of ips's served by any
1339 valid node which can serve this ip.
1343 for (i=0;i<nodemap->num;i++) {
1344 if (nodemap->nodes[i].flags & mask) {
1348 /* only check nodes that can actually serve this ip */
1349 if (can_node_serve_ip(ctdb, i, tmp_ip)) {
1350 /* no it couldnt so skip to the next node */
1354 num = node_ip_coverage(ctdb, i, all_ips);
1355 if (maxnode == -1) {
1364 if (minnode == -1) {
1374 if (maxnode == -1) {
1375 DEBUG(DEBUG_WARNING,(__location__ " Could not find maxnode. May not be able to serve ip '%s'\n",
1376 ctdb_addr_to_str(&tmp_ip->addr)));
1381 /* If we want deterministic IPs then dont try to reallocate
1382 them to spread out the load.
1384 if (1 == ctdb->tunable.deterministic_public_ips) {
1388 /* if the spread between the smallest and largest coverage by
1389 a node is >=2 we steal one of the ips from the node with
1390 most coverage to even things out a bit.
1391 try to do this at most 5 times since we dont want to spend
1392 too much time balancing the ip coverage.
1394 if ( (maxnum > minnum+1)
1396 struct ctdb_public_ip_list *tmp;
1398 /* mark one of maxnode's vnn's as unassigned and try
1401 for (tmp=all_ips;tmp;tmp=tmp->next) {
1402 if (tmp->pnn == maxnode) {
1412 /* finished distributing the public addresses, now just send the
1413 info out to the nodes
1417 /* at this point ->pnn is the node which will own each IP
1418 or -1 if there is no node that can cover this ip
1421 /* now tell all nodes to delete any alias that they should not
1422 have. This will be a NOOP on nodes that don't currently
1423 hold the given alias */
1424 async_data = talloc_zero(tmp_ctx, struct client_async_data);
1425 CTDB_NO_MEMORY_FATAL(ctdb, async_data);
1427 for (i=0;i<nodemap->num;i++) {
1428 /* don't talk to unconnected nodes, but do talk to banned nodes */
1429 if (nodemap->nodes[i].flags & NODE_FLAGS_DISCONNECTED) {
1433 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1434 if (tmp_ip->pnn == nodemap->nodes[i].pnn) {
1435 /* This node should be serving this
1436 vnn so dont tell it to release the ip
1440 if (tmp_ip->addr.sa.sa_family == AF_INET) {
1441 ipv4.pnn = tmp_ip->pnn;
1442 ipv4.sin = tmp_ip->addr.ip;
1445 data.dsize = sizeof(ipv4);
1446 data.dptr = (uint8_t *)&ipv4;
1447 state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
1448 0, CTDB_CONTROL_RELEASE_IPv4, 0,
1451 if (state != NULL) {
1452 event_add_timed(ctdb->ev, state, TAKEOVER_TIMEOUT(), ctdb_control_timeout_func, state);
1455 ip.pnn = tmp_ip->pnn;
1456 ip.addr = tmp_ip->addr;
1458 data.dsize = sizeof(ip);
1459 data.dptr = (uint8_t *)&ip;
1460 state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
1461 0, CTDB_CONTROL_RELEASE_IP, 0,
1464 if (state != NULL) {
1465 event_add_timed(ctdb->ev, state, TAKEOVER_TIMEOUT(), ctdb_control_timeout_func, state);
1469 if (state == NULL) {
1470 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_RELEASE_IP to node %u\n", nodemap->nodes[i].pnn));
1471 talloc_free(tmp_ctx);
1475 ctdb_client_async_add(async_data, state);
1478 if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1479 DEBUG(DEBUG_ERR,(__location__ " Async control CTDB_CONTROL_RELEASE_IP failed\n"));
1480 talloc_free(tmp_ctx);
1483 talloc_free(async_data);
1486 /* tell all nodes to get their own IPs */
1487 async_data = talloc_zero(tmp_ctx, struct client_async_data);
1488 CTDB_NO_MEMORY_FATAL(ctdb, async_data);
1489 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1490 if (tmp_ip->pnn == -1) {
1491 /* this IP won't be taken over */
1495 if (tmp_ip->addr.sa.sa_family == AF_INET) {
1496 ipv4.pnn = tmp_ip->pnn;
1497 ipv4.sin = tmp_ip->addr.ip;
1499 data.dsize = sizeof(ipv4);
1500 data.dptr = (uint8_t *)&ipv4;
1501 state = ctdb_control_send(ctdb, tmp_ip->pnn,
1502 0, CTDB_CONTROL_TAKEOVER_IPv4, 0,
1505 if (state != NULL) {
1506 event_add_timed(ctdb->ev, state, TAKEOVER_TIMEOUT(), ctdb_control_timeout_func, state);
1509 ip.pnn = tmp_ip->pnn;
1510 ip.addr = tmp_ip->addr;
1512 data.dsize = sizeof(ip);
1513 data.dptr = (uint8_t *)&ip;
1514 state = ctdb_control_send(ctdb, tmp_ip->pnn,
1515 0, CTDB_CONTROL_TAKEOVER_IP, 0,
1518 if (state != NULL) {
1519 event_add_timed(ctdb->ev, state, TAKEOVER_TIMEOUT(), ctdb_control_timeout_func, state);
1522 if (state == NULL) {
1523 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_TAKEOVER_IP to node %u\n", tmp_ip->pnn));
1524 talloc_free(tmp_ctx);
1528 ctdb_client_async_add(async_data, state);
1530 if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1531 DEBUG(DEBUG_ERR,(__location__ " Async control CTDB_CONTROL_TAKEOVER_IP failed\n"));
1532 talloc_free(tmp_ctx);
1536 talloc_free(tmp_ctx);
1542 destroy a ctdb_client_ip structure
1544 static int ctdb_client_ip_destructor(struct ctdb_client_ip *ip)
1546 DEBUG(DEBUG_DEBUG,("destroying client tcp for %s:%u (client_id %u)\n",
1547 ctdb_addr_to_str(&ip->addr),
1548 ntohs(ip->addr.ip.sin_port),
1551 DLIST_REMOVE(ip->ctdb->client_ip_list, ip);
1556 called by a client to inform us of a TCP connection that it is managing
1557 that should tickled with an ACK when IP takeover is done
1558 we handle both the old ipv4 style of packets as well as the new ipv4/6
1561 int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id,
1564 struct ctdb_client *client = ctdb_reqid_find(ctdb, client_id, struct ctdb_client);
1565 struct ctdb_control_tcp *old_addr = NULL;
1566 struct ctdb_control_tcp_addr new_addr;
1567 struct ctdb_control_tcp_addr *tcp_sock = NULL;
1568 struct ctdb_tcp_list *tcp;
1569 struct ctdb_control_tcp_vnn t;
1572 struct ctdb_client_ip *ip;
1573 struct ctdb_vnn *vnn;
1574 ctdb_sock_addr addr;
1576 switch (indata.dsize) {
1577 case sizeof(struct ctdb_control_tcp):
1578 old_addr = (struct ctdb_control_tcp *)indata.dptr;
1579 ZERO_STRUCT(new_addr);
1580 tcp_sock = &new_addr;
1581 tcp_sock->src.ip = old_addr->src;
1582 tcp_sock->dest.ip = old_addr->dest;
1584 case sizeof(struct ctdb_control_tcp_addr):
1585 tcp_sock = (struct ctdb_control_tcp_addr *)indata.dptr;
1588 DEBUG(DEBUG_ERR,(__location__ " Invalid data structure passed "
1589 "to ctdb_control_tcp_client. size was %d but "
1590 "only allowed sizes are %lu and %lu\n",
1592 (long unsigned)sizeof(struct ctdb_control_tcp),
1593 (long unsigned)sizeof(struct ctdb_control_tcp_addr)));
1597 addr = tcp_sock->src;
1598 ctdb_canonicalize_ip(&addr, &tcp_sock->src);
1599 addr = tcp_sock->dest;
1600 ctdb_canonicalize_ip(&addr, &tcp_sock->dest);
1603 memcpy(&addr, &tcp_sock->dest, sizeof(addr));
1604 vnn = find_public_ip_vnn(ctdb, &addr);
1606 switch (addr.sa.sa_family) {
1608 if (ntohl(addr.ip.sin_addr.s_addr) != INADDR_LOOPBACK) {
1609 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public address.\n",
1610 ctdb_addr_to_str(&addr)));
1614 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public ipv6 address.\n",
1615 ctdb_addr_to_str(&addr)));
1618 DEBUG(DEBUG_ERR,(__location__ " Unknown family type %d\n", addr.sa.sa_family));
1624 if (vnn->pnn != ctdb->pnn) {
1625 DEBUG(DEBUG_ERR,("Attempt to register tcp client for IP %s we don't hold - failing (client_id %u pid %u)\n",
1626 ctdb_addr_to_str(&addr),
1627 client_id, client->pid));
1628 /* failing this call will tell smbd to die */
1632 ip = talloc(client, struct ctdb_client_ip);
1633 CTDB_NO_MEMORY(ctdb, ip);
1637 ip->client_id = client_id;
1638 talloc_set_destructor(ip, ctdb_client_ip_destructor);
1639 DLIST_ADD(ctdb->client_ip_list, ip);
1641 tcp = talloc(client, struct ctdb_tcp_list);
1642 CTDB_NO_MEMORY(ctdb, tcp);
1644 tcp->connection.src_addr = tcp_sock->src;
1645 tcp->connection.dst_addr = tcp_sock->dest;
1647 DLIST_ADD(client->tcp_list, tcp);
1649 t.src = tcp_sock->src;
1650 t.dest = tcp_sock->dest;
1652 data.dptr = (uint8_t *)&t;
1653 data.dsize = sizeof(t);
1655 switch (addr.sa.sa_family) {
1657 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1658 (unsigned)ntohs(tcp_sock->dest.ip.sin_port),
1659 ctdb_addr_to_str(&tcp_sock->src),
1660 (unsigned)ntohs(tcp_sock->src.ip.sin_port), client_id, client->pid));
1663 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1664 (unsigned)ntohs(tcp_sock->dest.ip6.sin6_port),
1665 ctdb_addr_to_str(&tcp_sock->src),
1666 (unsigned)ntohs(tcp_sock->src.ip6.sin6_port), client_id, client->pid));
1669 DEBUG(DEBUG_ERR,(__location__ " Unknown family %d\n", addr.sa.sa_family));
1673 /* tell all nodes about this tcp connection */
1674 ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0,
1675 CTDB_CONTROL_TCP_ADD,
1676 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
1678 DEBUG(DEBUG_ERR,(__location__ " Failed to send CTDB_CONTROL_TCP_ADD\n"));
1686 find a tcp address on a list
1688 static struct ctdb_tcp_connection *ctdb_tcp_find(struct ctdb_tcp_array *array,
1689 struct ctdb_tcp_connection *tcp)
1693 if (array == NULL) {
1697 for (i=0;i<array->num;i++) {
1698 if (ctdb_same_sockaddr(&array->connections[i].src_addr, &tcp->src_addr) &&
1699 ctdb_same_sockaddr(&array->connections[i].dst_addr, &tcp->dst_addr)) {
1700 return &array->connections[i];
1707 called by a daemon to inform us of a TCP connection that one of its
1708 clients managing that should tickled with an ACK when IP takeover is
1711 int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata)
1713 struct ctdb_control_tcp_vnn *p = (struct ctdb_control_tcp_vnn *)indata.dptr;
1714 struct ctdb_tcp_array *tcparray;
1715 struct ctdb_tcp_connection tcp;
1716 struct ctdb_vnn *vnn;
1718 vnn = find_public_ip_vnn(ctdb, &p->dest);
1720 DEBUG(DEBUG_INFO,(__location__ " got TCP_ADD control for an address which is not a public address '%s'\n",
1721 ctdb_addr_to_str(&p->dest)));
1727 tcparray = vnn->tcp_array;
1729 /* If this is the first tickle */
1730 if (tcparray == NULL) {
1731 tcparray = talloc_size(ctdb->nodes,
1732 offsetof(struct ctdb_tcp_array, connections) +
1733 sizeof(struct ctdb_tcp_connection) * 1);
1734 CTDB_NO_MEMORY(ctdb, tcparray);
1735 vnn->tcp_array = tcparray;
1738 tcparray->connections = talloc_size(tcparray, sizeof(struct ctdb_tcp_connection));
1739 CTDB_NO_MEMORY(ctdb, tcparray->connections);
1741 tcparray->connections[tcparray->num].src_addr = p->src;
1742 tcparray->connections[tcparray->num].dst_addr = p->dest;
1748 /* Do we already have this tickle ?*/
1749 tcp.src_addr = p->src;
1750 tcp.dst_addr = p->dest;
1751 if (ctdb_tcp_find(vnn->tcp_array, &tcp) != NULL) {
1752 DEBUG(DEBUG_DEBUG,("Already had tickle info for %s:%u for vnn:%u\n",
1753 ctdb_addr_to_str(&tcp.dst_addr),
1754 ntohs(tcp.dst_addr.ip.sin_port),
1759 /* A new tickle, we must add it to the array */
1760 tcparray->connections = talloc_realloc(tcparray, tcparray->connections,
1761 struct ctdb_tcp_connection,
1763 CTDB_NO_MEMORY(ctdb, tcparray->connections);
1765 vnn->tcp_array = tcparray;
1766 tcparray->connections[tcparray->num].src_addr = p->src;
1767 tcparray->connections[tcparray->num].dst_addr = p->dest;
1770 DEBUG(DEBUG_INFO,("Added tickle info for %s:%u from vnn %u\n",
1771 ctdb_addr_to_str(&tcp.dst_addr),
1772 ntohs(tcp.dst_addr.ip.sin_port),
1780 called by a daemon to inform us of a TCP connection that one of its
1781 clients managing that should tickled with an ACK when IP takeover is
1784 static void ctdb_remove_tcp_connection(struct ctdb_context *ctdb, struct ctdb_tcp_connection *conn)
1786 struct ctdb_tcp_connection *tcpp;
1787 struct ctdb_vnn *vnn = find_public_ip_vnn(ctdb, &conn->dst_addr);
1790 DEBUG(DEBUG_ERR,(__location__ " unable to find public address %s\n",
1791 ctdb_addr_to_str(&conn->dst_addr)));
1795 /* if the array is empty we cant remove it
1796 and we dont need to do anything
1798 if (vnn->tcp_array == NULL) {
1799 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist (array is empty) %s:%u\n",
1800 ctdb_addr_to_str(&conn->dst_addr),
1801 ntohs(conn->dst_addr.ip.sin_port)));
1806 /* See if we know this connection
1807 if we dont know this connection then we dont need to do anything
1809 tcpp = ctdb_tcp_find(vnn->tcp_array, conn);
1811 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist %s:%u\n",
1812 ctdb_addr_to_str(&conn->dst_addr),
1813 ntohs(conn->dst_addr.ip.sin_port)));
1818 /* We need to remove this entry from the array.
1819 Instead of allocating a new array and copying data to it
1820 we cheat and just copy the last entry in the existing array
1821 to the entry that is to be removed and just shring the
1824 *tcpp = vnn->tcp_array->connections[vnn->tcp_array->num - 1];
1825 vnn->tcp_array->num--;
1827 /* If we deleted the last entry we also need to remove the entire array
1829 if (vnn->tcp_array->num == 0) {
1830 talloc_free(vnn->tcp_array);
1831 vnn->tcp_array = NULL;
1834 vnn->tcp_update_needed = true;
1836 DEBUG(DEBUG_INFO,("Removed tickle info for %s:%u\n",
1837 ctdb_addr_to_str(&conn->src_addr),
1838 ntohs(conn->src_addr.ip.sin_port)));
1843 called when a daemon restarts - send all tickes for all public addresses
1844 we are serving immediately to the new node.
1846 int32_t ctdb_control_startup(struct ctdb_context *ctdb, uint32_t vnn)
1848 /*XXX here we should send all tickes we are serving to the new node */
1854 called when a client structure goes away - hook to remove
1855 elements from the tcp_list in all daemons
1857 void ctdb_takeover_client_destructor_hook(struct ctdb_client *client)
1859 while (client->tcp_list) {
1860 struct ctdb_tcp_list *tcp = client->tcp_list;
1861 DLIST_REMOVE(client->tcp_list, tcp);
1862 ctdb_remove_tcp_connection(client->ctdb, &tcp->connection);
1868 release all IPs on shutdown
1870 void ctdb_release_all_ips(struct ctdb_context *ctdb)
1872 struct ctdb_vnn *vnn;
1874 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1875 if (!ctdb_sys_have_ip(&vnn->public_address)) {
1876 ctdb_vnn_unassign_iface(ctdb, vnn);
1882 ctdb_event_script_args(ctdb, CTDB_EVENT_RELEASE_IP, "%s %s %u",
1883 ctdb_vnn_iface_string(vnn),
1884 ctdb_addr_to_str(&vnn->public_address),
1885 vnn->public_netmask_bits);
1886 release_kill_clients(ctdb, &vnn->public_address);
1887 ctdb_vnn_unassign_iface(ctdb, vnn);
1893 get list of public IPs
1895 int32_t ctdb_control_get_public_ips(struct ctdb_context *ctdb,
1896 struct ctdb_req_control *c, TDB_DATA *outdata)
1899 struct ctdb_all_public_ips *ips;
1900 struct ctdb_vnn *vnn;
1901 bool only_available = false;
1903 if (c->flags & CTDB_PUBLIC_IP_FLAGS_ONLY_AVAILABLE) {
1904 only_available = true;
1907 /* count how many public ip structures we have */
1909 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1913 len = offsetof(struct ctdb_all_public_ips, ips) +
1914 num*sizeof(struct ctdb_public_ip);
1915 ips = talloc_zero_size(outdata, len);
1916 CTDB_NO_MEMORY(ctdb, ips);
1919 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1920 if (only_available && !ctdb_vnn_available(ctdb, vnn)) {
1923 ips->ips[i].pnn = vnn->pnn;
1924 ips->ips[i].addr = vnn->public_address;
1928 len = offsetof(struct ctdb_all_public_ips, ips) +
1929 i*sizeof(struct ctdb_public_ip);
1931 outdata->dsize = len;
1932 outdata->dptr = (uint8_t *)ips;
1939 get list of public IPs, old ipv4 style. only returns ipv4 addresses
1941 int32_t ctdb_control_get_public_ipsv4(struct ctdb_context *ctdb,
1942 struct ctdb_req_control *c, TDB_DATA *outdata)
1945 struct ctdb_all_public_ipsv4 *ips;
1946 struct ctdb_vnn *vnn;
1948 /* count how many public ip structures we have */
1950 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1951 if (vnn->public_address.sa.sa_family != AF_INET) {
1957 len = offsetof(struct ctdb_all_public_ipsv4, ips) +
1958 num*sizeof(struct ctdb_public_ipv4);
1959 ips = talloc_zero_size(outdata, len);
1960 CTDB_NO_MEMORY(ctdb, ips);
1962 outdata->dsize = len;
1963 outdata->dptr = (uint8_t *)ips;
1967 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1968 if (vnn->public_address.sa.sa_family != AF_INET) {
1971 ips->ips[i].pnn = vnn->pnn;
1972 ips->ips[i].sin = vnn->public_address.ip;
1979 int32_t ctdb_control_get_public_ip_info(struct ctdb_context *ctdb,
1980 struct ctdb_req_control *c,
1985 ctdb_sock_addr *addr;
1986 struct ctdb_control_public_ip_info *info;
1987 struct ctdb_vnn *vnn;
1989 addr = (ctdb_sock_addr *)indata.dptr;
1991 vnn = find_public_ip_vnn(ctdb, addr);
1993 /* if it is not a public ip it could be our 'single ip' */
1994 if (ctdb->single_ip_vnn) {
1995 if (ctdb_same_ip(&ctdb->single_ip_vnn->public_address, addr)) {
1996 vnn = ctdb->single_ip_vnn;
2001 DEBUG(DEBUG_ERR,(__location__ " Could not get public ip info, "
2002 "'%s'not a public address\n",
2003 ctdb_addr_to_str(addr)));
2007 /* count how many public ip structures we have */
2009 for (;vnn->ifaces[num];) {
2013 len = offsetof(struct ctdb_control_public_ip_info, ifaces) +
2014 num*sizeof(struct ctdb_control_iface_info);
2015 info = talloc_zero_size(outdata, len);
2016 CTDB_NO_MEMORY(ctdb, info);
2018 info->ip.addr = vnn->public_address;
2019 info->ip.pnn = vnn->pnn;
2020 info->active_idx = 0xFFFFFFFF;
2022 for (i=0; vnn->ifaces[i]; i++) {
2023 struct ctdb_iface *cur;
2025 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
2027 DEBUG(DEBUG_CRIT, (__location__ " internal error iface[%s] unknown\n",
2031 if (vnn->iface == cur) {
2032 info->active_idx = i;
2034 strcpy(info->ifaces[i].name, cur->name);
2035 info->ifaces[i].link_state = cur->link_up;
2036 info->ifaces[i].references = cur->references;
2039 len = offsetof(struct ctdb_control_public_ip_info, ifaces) +
2040 i*sizeof(struct ctdb_control_iface_info);
2042 outdata->dsize = len;
2043 outdata->dptr = (uint8_t *)info;
2048 int32_t ctdb_control_get_ifaces(struct ctdb_context *ctdb,
2049 struct ctdb_req_control *c,
2053 struct ctdb_control_get_ifaces *ifaces;
2054 struct ctdb_iface *cur;
2056 /* count how many public ip structures we have */
2058 for (cur=ctdb->ifaces;cur;cur=cur->next) {
2062 len = offsetof(struct ctdb_control_get_ifaces, ifaces) +
2063 num*sizeof(struct ctdb_control_iface_info);
2064 ifaces = talloc_zero_size(outdata, len);
2065 CTDB_NO_MEMORY(ctdb, ifaces);
2068 for (cur=ctdb->ifaces;cur;cur=cur->next) {
2069 strcpy(ifaces->ifaces[i].name, cur->name);
2070 ifaces->ifaces[i].link_state = cur->link_up;
2071 ifaces->ifaces[i].references = cur->references;
2075 len = offsetof(struct ctdb_control_get_ifaces, ifaces) +
2076 i*sizeof(struct ctdb_control_iface_info);
2078 outdata->dsize = len;
2079 outdata->dptr = (uint8_t *)ifaces;
2084 int32_t ctdb_control_set_iface_link(struct ctdb_context *ctdb,
2085 struct ctdb_req_control *c,
2088 struct ctdb_control_iface_info *info;
2089 struct ctdb_iface *iface;
2090 bool link_up = false;
2092 info = (struct ctdb_control_iface_info *)indata.dptr;
2094 if (info->name[CTDB_IFACE_SIZE] != '\0') {
2095 int len = strnlen(info->name, CTDB_IFACE_SIZE);
2096 DEBUG(DEBUG_ERR, (__location__ " name[%*.*s] not terminated\n",
2097 len, len, info->name));
2101 switch (info->link_state) {
2109 DEBUG(DEBUG_ERR, (__location__ " link_state[%u] invalid\n",
2110 (unsigned int)info->link_state));
2114 if (info->references != 0) {
2115 DEBUG(DEBUG_ERR, (__location__ " references[%u] should be 0\n",
2116 (unsigned int)info->references));
2120 iface = ctdb_find_iface(ctdb, info->name);
2121 if (iface == NULL) {
2122 DEBUG(DEBUG_ERR, (__location__ "iface[%s] is unknown\n",
2127 if (link_up == iface->link_up) {
2131 DEBUG(iface->link_up?DEBUG_ERR:DEBUG_NOTICE,
2132 ("iface[%s] has changed it's link status %s => %s\n",
2134 iface->link_up?"up":"down",
2135 link_up?"up":"down"));
2137 iface->link_up = link_up;
2143 structure containing the listening socket and the list of tcp connections
2144 that the ctdb daemon is to kill
2146 struct ctdb_kill_tcp {
2147 struct ctdb_vnn *vnn;
2148 struct ctdb_context *ctdb;
2150 struct fd_event *fde;
2151 trbt_tree_t *connections;
2156 a tcp connection that is to be killed
2158 struct ctdb_killtcp_con {
2159 ctdb_sock_addr src_addr;
2160 ctdb_sock_addr dst_addr;
2162 struct ctdb_kill_tcp *killtcp;
2165 /* this function is used to create a key to represent this socketpair
2166 in the killtcp tree.
2167 this key is used to insert and lookup matching socketpairs that are
2168 to be tickled and RST
2170 #define KILLTCP_KEYLEN 10
2171 static uint32_t *killtcp_key(ctdb_sock_addr *src, ctdb_sock_addr *dst)
2173 static uint32_t key[KILLTCP_KEYLEN];
2175 bzero(key, sizeof(key));
2177 if (src->sa.sa_family != dst->sa.sa_family) {
2178 DEBUG(DEBUG_ERR, (__location__ " ERROR, different families passed :%u vs %u\n", src->sa.sa_family, dst->sa.sa_family));
2182 switch (src->sa.sa_family) {
2184 key[0] = dst->ip.sin_addr.s_addr;
2185 key[1] = src->ip.sin_addr.s_addr;
2186 key[2] = dst->ip.sin_port;
2187 key[3] = src->ip.sin_port;
2190 key[0] = dst->ip6.sin6_addr.s6_addr32[3];
2191 key[1] = src->ip6.sin6_addr.s6_addr32[3];
2192 key[2] = dst->ip6.sin6_addr.s6_addr32[2];
2193 key[3] = src->ip6.sin6_addr.s6_addr32[2];
2194 key[4] = dst->ip6.sin6_addr.s6_addr32[1];
2195 key[5] = src->ip6.sin6_addr.s6_addr32[1];
2196 key[6] = dst->ip6.sin6_addr.s6_addr32[0];
2197 key[7] = src->ip6.sin6_addr.s6_addr32[0];
2198 key[8] = dst->ip6.sin6_port;
2199 key[9] = src->ip6.sin6_port;
2202 DEBUG(DEBUG_ERR, (__location__ " ERROR, unknown family passed :%u\n", src->sa.sa_family));
2210 called when we get a read event on the raw socket
2212 static void capture_tcp_handler(struct event_context *ev, struct fd_event *fde,
2213 uint16_t flags, void *private_data)
2215 struct ctdb_kill_tcp *killtcp = talloc_get_type(private_data, struct ctdb_kill_tcp);
2216 struct ctdb_killtcp_con *con;
2217 ctdb_sock_addr src, dst;
2218 uint32_t ack_seq, seq;
2220 if (!(flags & EVENT_FD_READ)) {
2224 if (ctdb_sys_read_tcp_packet(killtcp->capture_fd,
2225 killtcp->private_data,
2227 &ack_seq, &seq) != 0) {
2228 /* probably a non-tcp ACK packet */
2232 /* check if we have this guy in our list of connections
2235 con = trbt_lookuparray32(killtcp->connections,
2236 KILLTCP_KEYLEN, killtcp_key(&src, &dst));
2238 /* no this was some other packet we can just ignore */
2242 /* This one has been tickled !
2243 now reset him and remove him from the list.
2245 DEBUG(DEBUG_INFO, ("sending a tcp reset to kill connection :%d -> %s:%d\n",
2246 ntohs(con->dst_addr.ip.sin_port),
2247 ctdb_addr_to_str(&con->src_addr),
2248 ntohs(con->src_addr.ip.sin_port)));
2250 ctdb_sys_send_tcp(&con->dst_addr, &con->src_addr, ack_seq, seq, 1);
2255 /* when traversing the list of all tcp connections to send tickle acks to
2256 (so that we can capture the ack coming back and kill the connection
2258 this callback is called for each connection we are currently trying to kill
2260 static void tickle_connection_traverse(void *param, void *data)
2262 struct ctdb_killtcp_con *con = talloc_get_type(data, struct ctdb_killtcp_con);
2264 /* have tried too many times, just give up */
2265 if (con->count >= 5) {
2270 /* othervise, try tickling it again */
2273 (ctdb_sock_addr *)&con->dst_addr,
2274 (ctdb_sock_addr *)&con->src_addr,
2280 called every second until all sentenced connections have been reset
2282 static void ctdb_tickle_sentenced_connections(struct event_context *ev, struct timed_event *te,
2283 struct timeval t, void *private_data)
2285 struct ctdb_kill_tcp *killtcp = talloc_get_type(private_data, struct ctdb_kill_tcp);
2288 /* loop over all connections sending tickle ACKs */
2289 trbt_traversearray32(killtcp->connections, KILLTCP_KEYLEN, tickle_connection_traverse, NULL);
2292 /* If there are no more connections to kill we can remove the
2293 entire killtcp structure
2295 if ( (killtcp->connections == NULL) ||
2296 (killtcp->connections->root == NULL) ) {
2297 talloc_free(killtcp);
2301 /* try tickling them again in a seconds time
2303 event_add_timed(killtcp->ctdb->ev, killtcp, timeval_current_ofs(1, 0),
2304 ctdb_tickle_sentenced_connections, killtcp);
2308 destroy the killtcp structure
2310 static int ctdb_killtcp_destructor(struct ctdb_kill_tcp *killtcp)
2312 killtcp->vnn->killtcp = NULL;
2317 /* nothing fancy here, just unconditionally replace any existing
2318 connection structure with the new one.
2320 dont even free the old one if it did exist, that one is talloc_stolen
2321 by the same node in the tree anyway and will be deleted when the new data
2324 static void *add_killtcp_callback(void *parm, void *data)
2330 add a tcp socket to the list of connections we want to RST
2332 static int ctdb_killtcp_add_connection(struct ctdb_context *ctdb,
2336 ctdb_sock_addr src, dst;
2337 struct ctdb_kill_tcp *killtcp;
2338 struct ctdb_killtcp_con *con;
2339 struct ctdb_vnn *vnn;
2341 ctdb_canonicalize_ip(s, &src);
2342 ctdb_canonicalize_ip(d, &dst);
2344 vnn = find_public_ip_vnn(ctdb, &dst);
2346 vnn = find_public_ip_vnn(ctdb, &src);
2349 /* if it is not a public ip it could be our 'single ip' */
2350 if (ctdb->single_ip_vnn) {
2351 if (ctdb_same_ip(&ctdb->single_ip_vnn->public_address, &dst)) {
2352 vnn = ctdb->single_ip_vnn;
2357 DEBUG(DEBUG_ERR,(__location__ " Could not killtcp, not a public address\n"));
2361 killtcp = vnn->killtcp;
2363 /* If this is the first connection to kill we must allocate
2366 if (killtcp == NULL) {
2367 killtcp = talloc_zero(ctdb, struct ctdb_kill_tcp);
2368 CTDB_NO_MEMORY(ctdb, killtcp);
2371 killtcp->ctdb = ctdb;
2372 killtcp->capture_fd = -1;
2373 killtcp->connections = trbt_create(killtcp, 0);
2375 vnn->killtcp = killtcp;
2376 talloc_set_destructor(killtcp, ctdb_killtcp_destructor);
2381 /* create a structure that describes this connection we want to
2382 RST and store it in killtcp->connections
2384 con = talloc(killtcp, struct ctdb_killtcp_con);
2385 CTDB_NO_MEMORY(ctdb, con);
2386 con->src_addr = src;
2387 con->dst_addr = dst;
2389 con->killtcp = killtcp;
2392 trbt_insertarray32_callback(killtcp->connections,
2393 KILLTCP_KEYLEN, killtcp_key(&con->dst_addr, &con->src_addr),
2394 add_killtcp_callback, con);
2397 If we dont have a socket to listen on yet we must create it
2399 if (killtcp->capture_fd == -1) {
2400 const char *iface = ctdb_vnn_iface_string(vnn);
2401 killtcp->capture_fd = ctdb_sys_open_capture_socket(iface, &killtcp->private_data);
2402 if (killtcp->capture_fd == -1) {
2403 DEBUG(DEBUG_CRIT,(__location__ " Failed to open capturing "
2404 "socket on iface '%s' for killtcp (%s)\n",
2405 iface, strerror(errno)));
2411 if (killtcp->fde == NULL) {
2412 killtcp->fde = event_add_fd(ctdb->ev, killtcp, killtcp->capture_fd,
2413 EVENT_FD_READ | EVENT_FD_AUTOCLOSE,
2414 capture_tcp_handler, killtcp);
2416 /* We also need to set up some events to tickle all these connections
2417 until they are all reset
2419 event_add_timed(ctdb->ev, killtcp, timeval_current_ofs(1, 0),
2420 ctdb_tickle_sentenced_connections, killtcp);
2423 /* tickle him once now */
2432 talloc_free(vnn->killtcp);
2433 vnn->killtcp = NULL;
2438 kill a TCP connection.
2440 int32_t ctdb_control_kill_tcp(struct ctdb_context *ctdb, TDB_DATA indata)
2442 struct ctdb_control_killtcp *killtcp = (struct ctdb_control_killtcp *)indata.dptr;
2444 return ctdb_killtcp_add_connection(ctdb, &killtcp->src_addr, &killtcp->dst_addr);
2448 called by a daemon to inform us of the entire list of TCP tickles for
2449 a particular public address.
2450 this control should only be sent by the node that is currently serving
2451 that public address.
2453 int32_t ctdb_control_set_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata)
2455 struct ctdb_control_tcp_tickle_list *list = (struct ctdb_control_tcp_tickle_list *)indata.dptr;
2456 struct ctdb_tcp_array *tcparray;
2457 struct ctdb_vnn *vnn;
2459 /* We must at least have tickles.num or else we cant verify the size
2460 of the received data blob
2462 if (indata.dsize < offsetof(struct ctdb_control_tcp_tickle_list,
2463 tickles.connections)) {
2464 DEBUG(DEBUG_ERR,("Bad indata in ctdb_control_set_tcp_tickle_list. Not enough data for the tickle.num field\n"));
2468 /* verify that the size of data matches what we expect */
2469 if (indata.dsize < offsetof(struct ctdb_control_tcp_tickle_list,
2470 tickles.connections)
2471 + sizeof(struct ctdb_tcp_connection)
2472 * list->tickles.num) {
2473 DEBUG(DEBUG_ERR,("Bad indata in ctdb_control_set_tcp_tickle_list\n"));
2477 vnn = find_public_ip_vnn(ctdb, &list->addr);
2479 DEBUG(DEBUG_INFO,(__location__ " Could not set tcp tickle list, '%s' is not a public address\n",
2480 ctdb_addr_to_str(&list->addr)));
2485 /* remove any old ticklelist we might have */
2486 talloc_free(vnn->tcp_array);
2487 vnn->tcp_array = NULL;
2489 tcparray = talloc(ctdb->nodes, struct ctdb_tcp_array);
2490 CTDB_NO_MEMORY(ctdb, tcparray);
2492 tcparray->num = list->tickles.num;
2494 tcparray->connections = talloc_array(tcparray, struct ctdb_tcp_connection, tcparray->num);
2495 CTDB_NO_MEMORY(ctdb, tcparray->connections);
2497 memcpy(tcparray->connections, &list->tickles.connections[0],
2498 sizeof(struct ctdb_tcp_connection)*tcparray->num);
2500 /* We now have a new fresh tickle list array for this vnn */
2501 vnn->tcp_array = talloc_steal(vnn, tcparray);
2507 called to return the full list of tickles for the puclic address associated
2508 with the provided vnn
2510 int32_t ctdb_control_get_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata)
2512 ctdb_sock_addr *addr = (ctdb_sock_addr *)indata.dptr;
2513 struct ctdb_control_tcp_tickle_list *list;
2514 struct ctdb_tcp_array *tcparray;
2516 struct ctdb_vnn *vnn;
2518 vnn = find_public_ip_vnn(ctdb, addr);
2520 DEBUG(DEBUG_ERR,(__location__ " Could not get tcp tickle list, '%s' is not a public address\n",
2521 ctdb_addr_to_str(addr)));
2526 tcparray = vnn->tcp_array;
2528 num = tcparray->num;
2533 outdata->dsize = offsetof(struct ctdb_control_tcp_tickle_list,
2534 tickles.connections)
2535 + sizeof(struct ctdb_tcp_connection) * num;
2537 outdata->dptr = talloc_size(outdata, outdata->dsize);
2538 CTDB_NO_MEMORY(ctdb, outdata->dptr);
2539 list = (struct ctdb_control_tcp_tickle_list *)outdata->dptr;
2542 list->tickles.num = num;
2544 memcpy(&list->tickles.connections[0], tcparray->connections,
2545 sizeof(struct ctdb_tcp_connection) * num);
2553 set the list of all tcp tickles for a public address
2555 static int ctdb_ctrl_set_tcp_tickles(struct ctdb_context *ctdb,
2556 struct timeval timeout, uint32_t destnode,
2557 ctdb_sock_addr *addr,
2558 struct ctdb_tcp_array *tcparray)
2562 struct ctdb_control_tcp_tickle_list *list;
2565 num = tcparray->num;
2570 data.dsize = offsetof(struct ctdb_control_tcp_tickle_list,
2571 tickles.connections) +
2572 sizeof(struct ctdb_tcp_connection) * num;
2573 data.dptr = talloc_size(ctdb, data.dsize);
2574 CTDB_NO_MEMORY(ctdb, data.dptr);
2576 list = (struct ctdb_control_tcp_tickle_list *)data.dptr;
2578 list->tickles.num = num;
2580 memcpy(&list->tickles.connections[0], tcparray->connections, sizeof(struct ctdb_tcp_connection) * num);
2583 ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0,
2584 CTDB_CONTROL_SET_TCP_TICKLE_LIST,
2585 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
2587 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set tcp tickles failed\n"));
2591 talloc_free(data.dptr);
2598 perform tickle updates if required
2600 static void ctdb_update_tcp_tickles(struct event_context *ev,
2601 struct timed_event *te,
2602 struct timeval t, void *private_data)
2604 struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
2606 struct ctdb_vnn *vnn;
2608 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2609 /* we only send out updates for public addresses that
2612 if (ctdb->pnn != vnn->pnn) {
2615 /* We only send out the updates if we need to */
2616 if (!vnn->tcp_update_needed) {
2619 ret = ctdb_ctrl_set_tcp_tickles(ctdb,
2621 CTDB_BROADCAST_CONNECTED,
2622 &vnn->public_address,
2625 DEBUG(DEBUG_ERR,("Failed to send the tickle update for public address %s\n",
2626 ctdb_addr_to_str(&vnn->public_address)));
2630 event_add_timed(ctdb->ev, ctdb->tickle_update_context,
2631 timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
2632 ctdb_update_tcp_tickles, ctdb);
2637 start periodic update of tcp tickles
2639 void ctdb_start_tcp_tickle_update(struct ctdb_context *ctdb)
2641 ctdb->tickle_update_context = talloc_new(ctdb);
2643 event_add_timed(ctdb->ev, ctdb->tickle_update_context,
2644 timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
2645 ctdb_update_tcp_tickles, ctdb);
2651 struct control_gratious_arp {
2652 struct ctdb_context *ctdb;
2653 ctdb_sock_addr addr;
2659 send a control_gratuitous arp
2661 static void send_gratious_arp(struct event_context *ev, struct timed_event *te,
2662 struct timeval t, void *private_data)
2665 struct control_gratious_arp *arp = talloc_get_type(private_data,
2666 struct control_gratious_arp);
2668 ret = ctdb_sys_send_arp(&arp->addr, arp->iface);
2670 DEBUG(DEBUG_ERR,(__location__ " sending of gratious arp on iface '%s' failed (%s)\n",
2671 arp->iface, strerror(errno)));
2676 if (arp->count == CTDB_ARP_REPEAT) {
2681 event_add_timed(arp->ctdb->ev, arp,
2682 timeval_current_ofs(CTDB_ARP_INTERVAL, 0),
2683 send_gratious_arp, arp);
2690 int32_t ctdb_control_send_gratious_arp(struct ctdb_context *ctdb, TDB_DATA indata)
2692 struct ctdb_control_gratious_arp *gratious_arp = (struct ctdb_control_gratious_arp *)indata.dptr;
2693 struct control_gratious_arp *arp;
2695 /* verify the size of indata */
2696 if (indata.dsize < offsetof(struct ctdb_control_gratious_arp, iface)) {
2697 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_gratious_arp structure. Got %u require %u bytes\n",
2698 (unsigned)indata.dsize,
2699 (unsigned)offsetof(struct ctdb_control_gratious_arp, iface)));
2703 ( offsetof(struct ctdb_control_gratious_arp, iface)
2704 + gratious_arp->len ) ){
2706 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2707 "but should be %u bytes\n",
2708 (unsigned)indata.dsize,
2709 (unsigned)(offsetof(struct ctdb_control_gratious_arp, iface)+gratious_arp->len)));
2714 arp = talloc(ctdb, struct control_gratious_arp);
2715 CTDB_NO_MEMORY(ctdb, arp);
2718 arp->addr = gratious_arp->addr;
2719 arp->iface = talloc_strdup(arp, gratious_arp->iface);
2720 CTDB_NO_MEMORY(ctdb, arp->iface);
2723 event_add_timed(arp->ctdb->ev, arp,
2724 timeval_zero(), send_gratious_arp, arp);
2729 int32_t ctdb_control_add_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2731 struct ctdb_control_ip_iface *pub = (struct ctdb_control_ip_iface *)indata.dptr;
2734 /* verify the size of indata */
2735 if (indata.dsize < offsetof(struct ctdb_control_ip_iface, iface)) {
2736 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_ip_iface structure\n"));
2740 ( offsetof(struct ctdb_control_ip_iface, iface)
2743 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2744 "but should be %u bytes\n",
2745 (unsigned)indata.dsize,
2746 (unsigned)(offsetof(struct ctdb_control_ip_iface, iface)+pub->len)));
2750 ret = ctdb_add_public_address(ctdb, &pub->addr, pub->mask, &pub->iface[0]);
2753 DEBUG(DEBUG_ERR,(__location__ " Failed to add public address\n"));
2761 called when releaseip event finishes for del_public_address
2763 static void delete_ip_callback(struct ctdb_context *ctdb, int status,
2766 talloc_free(private_data);
2769 int32_t ctdb_control_del_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2771 struct ctdb_control_ip_iface *pub = (struct ctdb_control_ip_iface *)indata.dptr;
2772 struct ctdb_vnn *vnn;
2775 /* verify the size of indata */
2776 if (indata.dsize < offsetof(struct ctdb_control_ip_iface, iface)) {
2777 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_ip_iface structure\n"));
2781 ( offsetof(struct ctdb_control_ip_iface, iface)
2784 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2785 "but should be %u bytes\n",
2786 (unsigned)indata.dsize,
2787 (unsigned)(offsetof(struct ctdb_control_ip_iface, iface)+pub->len)));
2791 /* walk over all public addresses until we find a match */
2792 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2793 if (ctdb_same_ip(&vnn->public_address, &pub->addr)) {
2794 TALLOC_CTX *mem_ctx;
2796 DLIST_REMOVE(ctdb->vnn, vnn);
2797 if (vnn->iface == NULL) {
2802 mem_ctx = talloc_new(ctdb);
2803 ret = ctdb_event_script_callback(ctdb,
2804 mem_ctx, delete_ip_callback, mem_ctx,
2806 CTDB_EVENT_RELEASE_IP,
2808 ctdb_vnn_iface_string(vnn),
2809 ctdb_addr_to_str(&vnn->public_address),
2810 vnn->public_netmask_bits);
2811 ctdb_vnn_unassign_iface(ctdb, vnn);
2823 /* This function is called from the recovery daemon to verify that a remote
2824 node has the expected ip allocation.
2825 This is verified against ctdb->ip_tree
2827 int verify_remote_ip_allocation(struct ctdb_context *ctdb, struct ctdb_all_public_ips *ips)
2829 struct ctdb_public_ip_list *tmp_ip;
2832 if (ctdb->ip_tree == NULL) {
2833 /* dont know the expected allocation yet, assume remote node
2842 for (i=0; i<ips->num; i++) {
2843 tmp_ip = trbt_lookuparray32(ctdb->ip_tree, IP_KEYLEN, ip_key(&ips->ips[i].addr));
2844 if (tmp_ip == NULL) {
2845 DEBUG(DEBUG_ERR,(__location__ " Could not find host for address %s, reassign ips\n", ctdb_addr_to_str(&ips->ips[i].addr)));
2849 if (tmp_ip->pnn == -1 || ips->ips[i].pnn == -1) {
2853 if (tmp_ip->pnn != ips->ips[i].pnn) {
2854 DEBUG(DEBUG_ERR,("Inconsistent ip allocation. Trigger reallocation. Thinks %s is held by node %u while it is held by node %u\n", ctdb_addr_to_str(&ips->ips[i].addr), ips->ips[i].pnn, tmp_ip->pnn));
2862 int update_ip_assignment_tree(struct ctdb_context *ctdb, struct ctdb_public_ip *ip)
2864 struct ctdb_public_ip_list *tmp_ip;
2866 if (ctdb->ip_tree == NULL) {
2867 DEBUG(DEBUG_ERR,("No ctdb->ip_tree yet. Failed to update ip assignment\n"));
2871 tmp_ip = trbt_lookuparray32(ctdb->ip_tree, IP_KEYLEN, ip_key(&ip->addr));
2872 if (tmp_ip == NULL) {
2873 DEBUG(DEBUG_ERR,(__location__ " Could not find record for address %s, update ip\n", ctdb_addr_to_str(&ip->addr)));
2877 DEBUG(DEBUG_NOTICE,("Updated ip assignment tree for ip : %s from node %u to node %u\n", ctdb_addr_to_str(&ip->addr), tmp_ip->pnn, ip->pnn));
2878 tmp_ip->pnn = ip->pnn;