4 Copyright (C) Ronnie Sahlberg 2007
5 Copyright (C) Andrew Tridgell 2007
6 Copyright (C) Martin Schwenke 2011
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, see <http://www.gnu.org/licenses/>.
22 #include "system/network.h"
23 #include "system/filesys.h"
24 #include "system/time.h"
25 #include "system/wait.h"
30 #include "lib/util/dlinklist.h"
31 #include "lib/util/debug.h"
32 #include "lib/util/samba_util.h"
33 #include "lib/util/util_process.h"
35 #include "ctdb_private.h"
36 #include "ctdb_client.h"
38 #include "common/rb_tree.h"
39 #include "common/reqid.h"
40 #include "common/system.h"
41 #include "common/common.h"
42 #include "common/logging.h"
44 #include "server/ipalloc.h"
46 #define TAKEOVER_TIMEOUT() timeval_current_ofs(ctdb->tunable.takeover_timeout,0)
48 #define CTDB_ARP_INTERVAL 1
49 #define CTDB_ARP_REPEAT 3
51 struct ctdb_interface {
52 struct ctdb_interface *prev, *next;
58 static const char *ctdb_vnn_iface_string(const struct ctdb_vnn *vnn)
61 return vnn->iface->name;
67 static int ctdb_add_local_iface(struct ctdb_context *ctdb, const char *iface)
69 struct ctdb_interface *i;
71 if (strlen(iface) > CTDB_IFACE_SIZE) {
72 DEBUG(DEBUG_ERR, ("Interface name too long \"%s\"\n", iface));
76 /* Verify that we don't have an entry for this ip yet */
77 for (i=ctdb->ifaces;i;i=i->next) {
78 if (strcmp(i->name, iface) == 0) {
83 /* create a new structure for this interface */
84 i = talloc_zero(ctdb, struct ctdb_interface);
85 CTDB_NO_MEMORY_FATAL(ctdb, i);
86 i->name = talloc_strdup(i, iface);
87 CTDB_NO_MEMORY(ctdb, i->name);
91 DLIST_ADD(ctdb->ifaces, i);
96 static bool vnn_has_interface_with_name(struct ctdb_vnn *vnn,
101 for (n = 0; vnn->ifaces[n] != NULL; n++) {
102 if (strcmp(name, vnn->ifaces[n]) == 0) {
110 /* If any interfaces now have no possible IPs then delete them. This
111 * implementation is naive (i.e. simple) rather than clever
112 * (i.e. complex). Given that this is run on delip and that operation
113 * is rare, this doesn't need to be efficient - it needs to be
114 * foolproof. One alternative is reference counting, where the logic
115 * is distributed and can, therefore, be broken in multiple places.
116 * Another alternative is to build a red-black tree of interfaces that
117 * can have addresses (by walking ctdb->vnn once) and then walking
118 * ctdb->ifaces once and deleting those not in the tree. Let's go to
119 * one of those if the naive implementation causes problems... :-)
121 static void ctdb_remove_orphaned_ifaces(struct ctdb_context *ctdb,
122 struct ctdb_vnn *vnn)
124 struct ctdb_interface *i, *next;
126 /* For each interface, check if there's an IP using it. */
127 for (i = ctdb->ifaces; i != NULL; i = next) {
132 /* Only consider interfaces named in the given VNN. */
133 if (!vnn_has_interface_with_name(vnn, i->name)) {
137 /* Search for a vnn with this interface. */
139 for (tv=ctdb->vnn; tv; tv=tv->next) {
140 if (vnn_has_interface_with_name(tv, i->name)) {
147 /* None of the VNNs are using this interface. */
148 DLIST_REMOVE(ctdb->ifaces, i);
155 static struct ctdb_interface *ctdb_find_iface(struct ctdb_context *ctdb,
158 struct ctdb_interface *i;
160 for (i=ctdb->ifaces;i;i=i->next) {
161 if (strcmp(i->name, iface) == 0) {
169 static struct ctdb_interface *ctdb_vnn_best_iface(struct ctdb_context *ctdb,
170 struct ctdb_vnn *vnn)
173 struct ctdb_interface *cur = NULL;
174 struct ctdb_interface *best = NULL;
176 for (i=0; vnn->ifaces[i]; i++) {
178 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
192 if (cur->references < best->references) {
201 static int32_t ctdb_vnn_assign_iface(struct ctdb_context *ctdb,
202 struct ctdb_vnn *vnn)
204 struct ctdb_interface *best = NULL;
207 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
208 "still assigned to iface '%s'\n",
209 ctdb_addr_to_str(&vnn->public_address),
210 ctdb_vnn_iface_string(vnn)));
214 best = ctdb_vnn_best_iface(ctdb, vnn);
216 DEBUG(DEBUG_ERR, (__location__ " public address '%s' "
217 "cannot assign to iface any iface\n",
218 ctdb_addr_to_str(&vnn->public_address)));
224 vnn->pnn = ctdb->pnn;
226 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
227 "now assigned to iface '%s' refs[%d]\n",
228 ctdb_addr_to_str(&vnn->public_address),
229 ctdb_vnn_iface_string(vnn),
234 static void ctdb_vnn_unassign_iface(struct ctdb_context *ctdb,
235 struct ctdb_vnn *vnn)
237 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
238 "now unassigned (old iface '%s' refs[%d])\n",
239 ctdb_addr_to_str(&vnn->public_address),
240 ctdb_vnn_iface_string(vnn),
241 vnn->iface?vnn->iface->references:0));
243 vnn->iface->references--;
246 if (vnn->pnn == ctdb->pnn) {
251 static bool ctdb_vnn_available(struct ctdb_context *ctdb,
252 struct ctdb_vnn *vnn)
256 /* Nodes that are not RUNNING can not host IPs */
257 if (ctdb->runstate != CTDB_RUNSTATE_RUNNING) {
261 if (vnn->delete_pending) {
265 if (vnn->iface && vnn->iface->link_up) {
269 for (i=0; vnn->ifaces[i]; i++) {
270 struct ctdb_interface *cur;
272 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
285 struct ctdb_takeover_arp {
286 struct ctdb_context *ctdb;
289 struct ctdb_tcp_array *tcparray;
290 struct ctdb_vnn *vnn;
295 lists of tcp endpoints
297 struct ctdb_tcp_list {
298 struct ctdb_tcp_list *prev, *next;
299 struct ctdb_connection connection;
303 list of clients to kill on IP release
305 struct ctdb_client_ip {
306 struct ctdb_client_ip *prev, *next;
307 struct ctdb_context *ctdb;
314 send a gratuitous arp
316 static void ctdb_control_send_arp(struct tevent_context *ev,
317 struct tevent_timer *te,
318 struct timeval t, void *private_data)
320 struct ctdb_takeover_arp *arp = talloc_get_type(private_data,
321 struct ctdb_takeover_arp);
323 struct ctdb_tcp_array *tcparray;
324 const char *iface = ctdb_vnn_iface_string(arp->vnn);
326 ret = ctdb_sys_send_arp(&arp->addr, iface);
328 DEBUG(DEBUG_CRIT,(__location__ " sending of arp failed on iface '%s' (%s)\n",
329 iface, strerror(errno)));
332 tcparray = arp->tcparray;
334 for (i=0;i<tcparray->num;i++) {
335 struct ctdb_connection *tcon;
337 tcon = &tcparray->connections[i];
338 DEBUG(DEBUG_INFO,("sending tcp tickle ack for %u->%s:%u\n",
339 (unsigned)ntohs(tcon->dst.ip.sin_port),
340 ctdb_addr_to_str(&tcon->src),
341 (unsigned)ntohs(tcon->src.ip.sin_port)));
342 ret = ctdb_sys_send_tcp(
347 DEBUG(DEBUG_CRIT,(__location__ " Failed to send tcp tickle ack for %s\n",
348 ctdb_addr_to_str(&tcon->src)));
355 if (arp->count == CTDB_ARP_REPEAT) {
360 tevent_add_timer(arp->ctdb->ev, arp->vnn->takeover_ctx,
361 timeval_current_ofs(CTDB_ARP_INTERVAL, 100000),
362 ctdb_control_send_arp, arp);
365 static int32_t ctdb_announce_vnn_iface(struct ctdb_context *ctdb,
366 struct ctdb_vnn *vnn)
368 struct ctdb_takeover_arp *arp;
369 struct ctdb_tcp_array *tcparray;
371 if (!vnn->takeover_ctx) {
372 vnn->takeover_ctx = talloc_new(vnn);
373 if (!vnn->takeover_ctx) {
378 arp = talloc_zero(vnn->takeover_ctx, struct ctdb_takeover_arp);
384 arp->addr = vnn->public_address;
387 tcparray = vnn->tcp_array;
389 /* add all of the known tcp connections for this IP to the
390 list of tcp connections to send tickle acks for */
391 arp->tcparray = talloc_steal(arp, tcparray);
393 vnn->tcp_array = NULL;
394 vnn->tcp_update_needed = true;
397 tevent_add_timer(arp->ctdb->ev, vnn->takeover_ctx,
398 timeval_zero(), ctdb_control_send_arp, arp);
403 struct takeover_callback_state {
404 struct ctdb_req_control_old *c;
405 ctdb_sock_addr *addr;
406 struct ctdb_vnn *vnn;
409 struct ctdb_do_takeip_state {
410 struct ctdb_req_control_old *c;
411 struct ctdb_vnn *vnn;
415 called when takeip event finishes
417 static void ctdb_do_takeip_callback(struct ctdb_context *ctdb, int status,
420 struct ctdb_do_takeip_state *state =
421 talloc_get_type(private_data, struct ctdb_do_takeip_state);
426 if (status == -ETIME) {
429 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
430 ctdb_addr_to_str(&state->vnn->public_address),
431 ctdb_vnn_iface_string(state->vnn)));
432 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
438 if (ctdb->do_checkpublicip) {
440 ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
442 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
449 data.dptr = (uint8_t *)ctdb_addr_to_str(&state->vnn->public_address);
450 data.dsize = strlen((char *)data.dptr) + 1;
451 DEBUG(DEBUG_INFO,(__location__ " sending TAKE_IP for '%s'\n", data.dptr));
453 ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_TAKE_IP, data);
456 /* the control succeeded */
457 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
462 static int ctdb_takeip_destructor(struct ctdb_do_takeip_state *state)
464 state->vnn->update_in_flight = false;
469 take over an ip address
471 static int32_t ctdb_do_takeip(struct ctdb_context *ctdb,
472 struct ctdb_req_control_old *c,
473 struct ctdb_vnn *vnn)
476 struct ctdb_do_takeip_state *state;
478 if (vnn->update_in_flight) {
479 DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u rejected "
480 "update for this IP already in flight\n",
481 ctdb_addr_to_str(&vnn->public_address),
482 vnn->public_netmask_bits));
486 ret = ctdb_vnn_assign_iface(ctdb, vnn);
488 DEBUG(DEBUG_ERR,("Takeover of IP %s/%u failed to "
489 "assign a usable interface\n",
490 ctdb_addr_to_str(&vnn->public_address),
491 vnn->public_netmask_bits));
495 state = talloc(vnn, struct ctdb_do_takeip_state);
496 CTDB_NO_MEMORY(ctdb, state);
498 state->c = talloc_steal(ctdb, c);
501 vnn->update_in_flight = true;
502 talloc_set_destructor(state, ctdb_takeip_destructor);
504 DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u on interface %s\n",
505 ctdb_addr_to_str(&vnn->public_address),
506 vnn->public_netmask_bits,
507 ctdb_vnn_iface_string(vnn)));
509 ret = ctdb_event_script_callback(ctdb,
511 ctdb_do_takeip_callback,
515 ctdb_vnn_iface_string(vnn),
516 ctdb_addr_to_str(&vnn->public_address),
517 vnn->public_netmask_bits);
520 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
521 ctdb_addr_to_str(&vnn->public_address),
522 ctdb_vnn_iface_string(vnn)));
530 struct ctdb_do_updateip_state {
531 struct ctdb_req_control_old *c;
532 struct ctdb_interface *old;
533 struct ctdb_vnn *vnn;
537 called when updateip event finishes
539 static void ctdb_do_updateip_callback(struct ctdb_context *ctdb, int status,
542 struct ctdb_do_updateip_state *state =
543 talloc_get_type(private_data, struct ctdb_do_updateip_state);
547 if (status == -ETIME) {
550 DEBUG(DEBUG_ERR,(__location__ " Failed to move IP %s from interface %s to %s\n",
551 ctdb_addr_to_str(&state->vnn->public_address),
553 ctdb_vnn_iface_string(state->vnn)));
556 * All we can do is reset the old interface
557 * and let the next run fix it
559 ctdb_vnn_unassign_iface(ctdb, state->vnn);
560 state->vnn->iface = state->old;
561 state->vnn->iface->references++;
563 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
568 if (ctdb->do_checkpublicip) {
570 ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
572 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
579 /* the control succeeded */
580 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
585 static int ctdb_updateip_destructor(struct ctdb_do_updateip_state *state)
587 state->vnn->update_in_flight = false;
592 update (move) an ip address
594 static int32_t ctdb_do_updateip(struct ctdb_context *ctdb,
595 struct ctdb_req_control_old *c,
596 struct ctdb_vnn *vnn)
599 struct ctdb_do_updateip_state *state;
600 struct ctdb_interface *old = vnn->iface;
601 const char *new_name;
603 if (vnn->update_in_flight) {
604 DEBUG(DEBUG_NOTICE,("Update of IP %s/%u rejected "
605 "update for this IP already in flight\n",
606 ctdb_addr_to_str(&vnn->public_address),
607 vnn->public_netmask_bits));
611 ctdb_vnn_unassign_iface(ctdb, vnn);
612 ret = ctdb_vnn_assign_iface(ctdb, vnn);
614 DEBUG(DEBUG_ERR,("update of IP %s/%u failed to "
615 "assin a usable interface (old iface '%s')\n",
616 ctdb_addr_to_str(&vnn->public_address),
617 vnn->public_netmask_bits,
622 new_name = ctdb_vnn_iface_string(vnn);
623 if (old->name != NULL && new_name != NULL && !strcmp(old->name, new_name)) {
624 /* A benign update from one interface onto itself.
625 * no need to run the eventscripts in this case, just return
628 ctdb_request_control_reply(ctdb, c, NULL, 0, NULL);
632 state = talloc(vnn, struct ctdb_do_updateip_state);
633 CTDB_NO_MEMORY(ctdb, state);
635 state->c = talloc_steal(ctdb, c);
639 vnn->update_in_flight = true;
640 talloc_set_destructor(state, ctdb_updateip_destructor);
642 DEBUG(DEBUG_NOTICE,("Update of IP %s/%u from "
643 "interface %s to %s\n",
644 ctdb_addr_to_str(&vnn->public_address),
645 vnn->public_netmask_bits,
649 ret = ctdb_event_script_callback(ctdb,
651 ctdb_do_updateip_callback,
653 CTDB_EVENT_UPDATE_IP,
657 ctdb_addr_to_str(&vnn->public_address),
658 vnn->public_netmask_bits);
660 DEBUG(DEBUG_ERR,(__location__ " Failed update IP %s from interface %s to %s\n",
661 ctdb_addr_to_str(&vnn->public_address),
662 old->name, new_name));
671 Find the vnn of the node that has a public ip address
672 returns -1 if the address is not known as a public address
674 static struct ctdb_vnn *find_public_ip_vnn(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
676 struct ctdb_vnn *vnn;
678 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
679 if (ctdb_same_ip(&vnn->public_address, addr)) {
688 take over an ip address
690 int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb,
691 struct ctdb_req_control_old *c,
696 struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
697 struct ctdb_vnn *vnn;
698 bool have_ip = false;
699 bool do_updateip = false;
700 bool do_takeip = false;
701 struct ctdb_interface *best_iface = NULL;
703 if (pip->pnn != ctdb->pnn) {
704 DEBUG(DEBUG_ERR,(__location__" takeoverip called for an ip '%s' "
705 "with pnn %d, but we're node %d\n",
706 ctdb_addr_to_str(&pip->addr),
707 pip->pnn, ctdb->pnn));
711 /* update out vnn list */
712 vnn = find_public_ip_vnn(ctdb, &pip->addr);
714 DEBUG(DEBUG_INFO,("takeoverip called for an ip '%s' that is not a public address\n",
715 ctdb_addr_to_str(&pip->addr)));
719 if (ctdb->tunable.disable_ip_failover == 0 && ctdb->do_checkpublicip) {
720 have_ip = ctdb_sys_have_ip(&pip->addr);
722 best_iface = ctdb_vnn_best_iface(ctdb, vnn);
723 if (best_iface == NULL) {
724 DEBUG(DEBUG_ERR,("takeoverip of IP %s/%u failed to find"
725 "a usable interface (old %s, have_ip %d)\n",
726 ctdb_addr_to_str(&vnn->public_address),
727 vnn->public_netmask_bits,
728 ctdb_vnn_iface_string(vnn),
733 if (vnn->iface == NULL && vnn->pnn == -1 && have_ip && best_iface != NULL) {
734 DEBUG(DEBUG_ERR,("Taking over newly created ip\n"));
739 if (vnn->iface == NULL && have_ip) {
740 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
741 "but we have no interface assigned, has someone manually configured it? Ignore for now.\n",
742 ctdb_addr_to_str(&vnn->public_address)));
746 if (vnn->pnn != ctdb->pnn && have_ip && vnn->pnn != -1) {
747 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
748 "and we have it on iface[%s], but it was assigned to node %d"
749 "and we are node %d, banning ourself\n",
750 ctdb_addr_to_str(&vnn->public_address),
751 ctdb_vnn_iface_string(vnn), vnn->pnn, ctdb->pnn));
756 if (vnn->pnn == -1 && have_ip) {
757 vnn->pnn = ctdb->pnn;
758 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
759 "and we already have it on iface[%s], update local daemon\n",
760 ctdb_addr_to_str(&vnn->public_address),
761 ctdb_vnn_iface_string(vnn)));
766 if (vnn->iface != best_iface) {
767 if (!vnn->iface->link_up) {
769 } else if (vnn->iface->references > (best_iface->references + 1)) {
770 /* only move when the rebalance gains something */
778 ctdb_vnn_unassign_iface(ctdb, vnn);
785 ret = ctdb_do_takeip(ctdb, c, vnn);
789 } else if (do_updateip) {
790 ret = ctdb_do_updateip(ctdb, c, vnn);
796 * The interface is up and the kernel known the ip
799 DEBUG(DEBUG_INFO,("Redundant takeover of IP %s/%u on interface %s (ip already held)\n",
800 ctdb_addr_to_str(&pip->addr),
801 vnn->public_netmask_bits,
802 ctdb_vnn_iface_string(vnn)));
806 /* tell ctdb_control.c that we will be replying asynchronously */
812 static void do_delete_ip(struct ctdb_context *ctdb, struct ctdb_vnn *vnn)
814 DLIST_REMOVE(ctdb->vnn, vnn);
815 ctdb_vnn_unassign_iface(ctdb, vnn);
816 ctdb_remove_orphaned_ifaces(ctdb, vnn);
821 called when releaseip event finishes
823 static void release_ip_callback(struct ctdb_context *ctdb, int status,
826 struct takeover_callback_state *state =
827 talloc_get_type(private_data, struct takeover_callback_state);
830 if (status == -ETIME) {
834 if (ctdb->tunable.disable_ip_failover == 0 && ctdb->do_checkpublicip) {
835 if (ctdb_sys_have_ip(state->addr)) {
837 ("IP %s still hosted during release IP callback, failing\n",
838 ctdb_addr_to_str(state->addr)));
839 ctdb_request_control_reply(ctdb, state->c,
846 /* send a message to all clients of this node telling them
847 that the cluster has been reconfigured and they should
848 release any sockets on this IP */
849 data.dptr = (uint8_t *)talloc_strdup(state, ctdb_addr_to_str(state->addr));
850 CTDB_NO_MEMORY_VOID(ctdb, data.dptr);
851 data.dsize = strlen((char *)data.dptr)+1;
853 DEBUG(DEBUG_INFO,(__location__ " sending RELEASE_IP for '%s'\n", data.dptr));
855 ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_RELEASE_IP, data);
857 ctdb_vnn_unassign_iface(ctdb, state->vnn);
859 /* Process the IP if it has been marked for deletion */
860 if (state->vnn->delete_pending) {
861 do_delete_ip(ctdb, state->vnn);
865 /* the control succeeded */
866 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
870 static int ctdb_releaseip_destructor(struct takeover_callback_state *state)
872 if (state->vnn != NULL) {
873 state->vnn->update_in_flight = false;
879 release an ip address
881 int32_t ctdb_control_release_ip(struct ctdb_context *ctdb,
882 struct ctdb_req_control_old *c,
887 struct takeover_callback_state *state;
888 struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
889 struct ctdb_vnn *vnn;
892 /* update our vnn list */
893 vnn = find_public_ip_vnn(ctdb, &pip->addr);
895 DEBUG(DEBUG_INFO,("releaseip called for an ip '%s' that is not a public address\n",
896 ctdb_addr_to_str(&pip->addr)));
901 /* stop any previous arps */
902 talloc_free(vnn->takeover_ctx);
903 vnn->takeover_ctx = NULL;
905 /* Some ctdb tool commands (e.g. moveip) send
906 * lazy multicast to drop an IP from any node that isn't the
907 * intended new node. The following causes makes ctdbd ignore
908 * a release for any address it doesn't host.
910 if (ctdb->tunable.disable_ip_failover == 0 && ctdb->do_checkpublicip) {
911 if (!ctdb_sys_have_ip(&pip->addr)) {
912 DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u on interface %s (ip not held)\n",
913 ctdb_addr_to_str(&pip->addr),
914 vnn->public_netmask_bits,
915 ctdb_vnn_iface_string(vnn)));
916 ctdb_vnn_unassign_iface(ctdb, vnn);
920 if (vnn->iface == NULL) {
921 DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u (ip not held)\n",
922 ctdb_addr_to_str(&pip->addr),
923 vnn->public_netmask_bits));
928 /* There is a potential race between take_ip and us because we
929 * update the VNN via a callback that run when the
930 * eventscripts have been run. Avoid the race by allowing one
931 * update to be in flight at a time.
933 if (vnn->update_in_flight) {
934 DEBUG(DEBUG_NOTICE,("Release of IP %s/%u rejected "
935 "update for this IP already in flight\n",
936 ctdb_addr_to_str(&vnn->public_address),
937 vnn->public_netmask_bits));
941 iface = strdup(ctdb_vnn_iface_string(vnn));
943 DEBUG(DEBUG_NOTICE,("Release of IP %s/%u on interface %s node:%d\n",
944 ctdb_addr_to_str(&pip->addr),
945 vnn->public_netmask_bits,
949 state = talloc(ctdb, struct takeover_callback_state);
951 ctdb_set_error(ctdb, "Out of memory at %s:%d",
957 state->c = talloc_steal(state, c);
958 state->addr = talloc(state, ctdb_sock_addr);
959 if (state->addr == NULL) {
960 ctdb_set_error(ctdb, "Out of memory at %s:%d",
966 *state->addr = pip->addr;
969 vnn->update_in_flight = true;
970 talloc_set_destructor(state, ctdb_releaseip_destructor);
972 ret = ctdb_event_script_callback(ctdb,
973 state, release_ip_callback, state,
974 CTDB_EVENT_RELEASE_IP,
977 ctdb_addr_to_str(&pip->addr),
978 vnn->public_netmask_bits);
981 DEBUG(DEBUG_ERR,(__location__ " Failed to release IP %s on interface %s\n",
982 ctdb_addr_to_str(&pip->addr),
983 ctdb_vnn_iface_string(vnn)));
988 /* tell the control that we will be reply asynchronously */
993 static int ctdb_add_public_address(struct ctdb_context *ctdb,
994 ctdb_sock_addr *addr,
995 unsigned mask, const char *ifaces,
998 struct ctdb_vnn *vnn;
1005 tmp = strdup(ifaces);
1006 for (iface = strtok(tmp, ","); iface; iface = strtok(NULL, ",")) {
1007 if (!ctdb_sys_check_iface_exists(iface)) {
1008 DEBUG(DEBUG_CRIT,("Interface %s does not exist. Can not add public-address : %s\n", iface, ctdb_addr_to_str(addr)));
1015 /* Verify that we don't have an entry for this ip yet */
1016 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1017 if (ctdb_same_sockaddr(addr, &vnn->public_address)) {
1018 DEBUG(DEBUG_CRIT,("Same ip '%s' specified multiple times in the public address list \n",
1019 ctdb_addr_to_str(addr)));
1024 /* create a new vnn structure for this ip address */
1025 vnn = talloc_zero(ctdb, struct ctdb_vnn);
1026 CTDB_NO_MEMORY_FATAL(ctdb, vnn);
1027 vnn->ifaces = talloc_array(vnn, const char *, num + 2);
1028 tmp = talloc_strdup(vnn, ifaces);
1029 CTDB_NO_MEMORY_FATAL(ctdb, tmp);
1030 for (iface = strtok(tmp, ","); iface; iface = strtok(NULL, ",")) {
1031 vnn->ifaces = talloc_realloc(vnn, vnn->ifaces, const char *, num + 2);
1032 CTDB_NO_MEMORY_FATAL(ctdb, vnn->ifaces);
1033 vnn->ifaces[num] = talloc_strdup(vnn, iface);
1034 CTDB_NO_MEMORY_FATAL(ctdb, vnn->ifaces[num]);
1038 vnn->ifaces[num] = NULL;
1039 vnn->public_address = *addr;
1040 vnn->public_netmask_bits = mask;
1042 if (check_address) {
1043 if (ctdb_sys_have_ip(addr)) {
1044 DEBUG(DEBUG_ERR,("We are already hosting public address '%s'. setting PNN to ourself:%d\n", ctdb_addr_to_str(addr), ctdb->pnn));
1045 vnn->pnn = ctdb->pnn;
1049 for (i=0; vnn->ifaces[i]; i++) {
1050 ret = ctdb_add_local_iface(ctdb, vnn->ifaces[i]);
1052 DEBUG(DEBUG_CRIT, (__location__ " failed to add iface[%s] "
1053 "for public_address[%s]\n",
1054 vnn->ifaces[i], ctdb_addr_to_str(addr)));
1060 DLIST_ADD(ctdb->vnn, vnn);
1066 setup the public address lists from a file
1068 int ctdb_set_public_addresses(struct ctdb_context *ctdb, bool check_addresses)
1074 lines = file_lines_load(ctdb->public_addresses_file, &nlines, 0, ctdb);
1075 if (lines == NULL) {
1076 ctdb_set_error(ctdb, "Failed to load public address list '%s'\n", ctdb->public_addresses_file);
1079 while (nlines > 0 && strcmp(lines[nlines-1], "") == 0) {
1083 for (i=0;i<nlines;i++) {
1085 ctdb_sock_addr addr;
1086 const char *addrstr;
1091 while ((*line == ' ') || (*line == '\t')) {
1097 if (strcmp(line, "") == 0) {
1100 tok = strtok(line, " \t");
1102 tok = strtok(NULL, " \t");
1104 if (NULL == ctdb->default_public_interface) {
1105 DEBUG(DEBUG_CRIT,("No default public interface and no interface specified at line %u of public address list\n",
1110 ifaces = ctdb->default_public_interface;
1115 if (!addrstr || !parse_ip_mask(addrstr, ifaces, &addr, &mask)) {
1116 DEBUG(DEBUG_CRIT,("Badly formed line %u in public address list\n", i+1));
1120 if (ctdb_add_public_address(ctdb, &addr, mask, ifaces, check_addresses)) {
1121 DEBUG(DEBUG_CRIT,("Failed to add line %u to the public address list\n", i+1));
1132 static void *add_ip_callback(void *parm, void *data)
1134 struct public_ip_list *this_ip = parm;
1135 struct public_ip_list *prev_ip = data;
1137 if (prev_ip == NULL) {
1140 if (this_ip->pnn == -1) {
1141 this_ip->pnn = prev_ip->pnn;
1147 static int getips_count_callback(void *param, void *data)
1149 struct public_ip_list **ip_list = (struct public_ip_list **)param;
1150 struct public_ip_list *new_ip = (struct public_ip_list *)data;
1152 new_ip->next = *ip_list;
1157 static int verify_remote_ip_allocation(struct ctdb_context *ctdb,
1158 struct ctdb_public_ip_list *ips,
1161 static int ctdb_reload_remote_public_ips(struct ctdb_context *ctdb,
1162 struct ipalloc_state *ipalloc_state,
1163 struct ctdb_node_map_old *nodemap)
1167 struct ctdb_public_ip_list_old *ip_list;
1169 if (ipalloc_state->num != nodemap->num) {
1172 " ipalloc_state->num (%d) != nodemap->num (%d) invalid param\n",
1173 ipalloc_state->num, nodemap->num));
1177 for (j=0; j<nodemap->num; j++) {
1178 if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) {
1182 /* Retrieve the list of known public IPs from the node */
1183 ret = ctdb_ctrl_get_public_ips_flags(ctdb,
1186 ipalloc_state->known_public_ips,
1191 ("Failed to read known public IPs from node: %u\n",
1195 ipalloc_state->known_public_ips[j].num = ip_list->num;
1196 /* This could be copied and freed. However, ip_list
1197 * is allocated off ipalloc_state->known_public_ips,
1198 * so this is a safe hack. This will go away in a
1199 * while anyway... */
1200 ipalloc_state->known_public_ips[j].ip = &ip_list->ips[0];
1202 if (ctdb->do_checkpublicip) {
1203 verify_remote_ip_allocation(
1205 &ipalloc_state->known_public_ips[j],
1209 /* Retrieve the list of available public IPs from the node */
1210 ret = ctdb_ctrl_get_public_ips_flags(ctdb,
1213 ipalloc_state->available_public_ips,
1214 CTDB_PUBLIC_IP_FLAGS_ONLY_AVAILABLE,
1218 ("Failed to read available public IPs from node: %u\n",
1222 ipalloc_state->available_public_ips[j].num = ip_list->num;
1223 /* This could be copied and freed. However, ip_list
1224 * is allocated off ipalloc_state->available_public_ips,
1225 * so this is a safe hack. This will go away in a
1226 * while anyway... */
1227 ipalloc_state->available_public_ips[j].ip = &ip_list->ips[0];
1233 static struct public_ip_list *
1234 create_merged_ip_list(struct ctdb_context *ctdb, struct ipalloc_state *ipalloc_state)
1237 struct public_ip_list *ip_list;
1238 struct ctdb_public_ip_list *public_ips;
1240 TALLOC_FREE(ctdb->ip_tree);
1241 ctdb->ip_tree = trbt_create(ctdb, 0);
1243 if (ipalloc_state->known_public_ips == NULL) {
1244 DEBUG(DEBUG_ERR, ("Known public IPs not set\n"));
1248 for (i=0; i < ipalloc_state->num; i++) {
1250 public_ips = &ipalloc_state->known_public_ips[i];
1252 for (j=0; j < public_ips->num; j++) {
1253 struct public_ip_list *tmp_ip;
1255 tmp_ip = talloc_zero(ctdb->ip_tree, struct public_ip_list);
1256 CTDB_NO_MEMORY_NULL(ctdb, tmp_ip);
1257 /* Do not use information about IP addresses hosted
1258 * on other nodes, it may not be accurate */
1259 if (public_ips->ip[j].pnn == i) {
1260 tmp_ip->pnn = public_ips->ip[j].pnn;
1264 tmp_ip->addr = public_ips->ip[j].addr;
1265 tmp_ip->next = NULL;
1267 trbt_insertarray32_callback(ctdb->ip_tree,
1268 IP_KEYLEN, ip_key(&public_ips->ip[j].addr),
1275 trbt_traversearray32(ctdb->ip_tree, IP_KEYLEN, getips_count_callback, &ip_list);
1280 static bool all_nodes_are_disabled(struct ctdb_node_map_old *nodemap)
1284 for (i=0;i<nodemap->num;i++) {
1285 if (!(nodemap->nodes[i].flags & (NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED))) {
1286 /* Found one completely healthy node */
1294 struct get_tunable_callback_data {
1295 const char *tunable;
1300 static void get_tunable_callback(struct ctdb_context *ctdb, uint32_t pnn,
1301 int32_t res, TDB_DATA outdata,
1304 struct get_tunable_callback_data *cd =
1305 (struct get_tunable_callback_data *)callback;
1309 /* Already handled in fail callback */
1313 if (outdata.dsize != sizeof(uint32_t)) {
1314 DEBUG(DEBUG_ERR,("Wrong size of returned data when reading \"%s\" tunable from node %d. Expected %d bytes but received %d bytes\n",
1315 cd->tunable, pnn, (int)sizeof(uint32_t),
1316 (int)outdata.dsize));
1321 size = talloc_array_length(cd->out);
1323 DEBUG(DEBUG_ERR,("Got %s reply from node %d but nodemap only has %d entries\n",
1324 cd->tunable, pnn, size));
1329 cd->out[pnn] = *(uint32_t *)outdata.dptr;
1332 static void get_tunable_fail_callback(struct ctdb_context *ctdb, uint32_t pnn,
1333 int32_t res, TDB_DATA outdata,
1336 struct get_tunable_callback_data *cd =
1337 (struct get_tunable_callback_data *)callback;
1342 ("Timed out getting tunable \"%s\" from node %d\n",
1348 DEBUG(DEBUG_WARNING,
1349 ("Tunable \"%s\" not implemented on node %d\n",
1354 ("Unexpected error getting tunable \"%s\" from node %d\n",
1360 static uint32_t *get_tunable_from_nodes(struct ctdb_context *ctdb,
1361 TALLOC_CTX *tmp_ctx,
1362 struct ctdb_node_map_old *nodemap,
1363 const char *tunable,
1364 uint32_t default_value)
1367 struct ctdb_control_get_tunable *t;
1370 struct get_tunable_callback_data callback_data;
1373 tvals = talloc_array(tmp_ctx, uint32_t, nodemap->num);
1374 CTDB_NO_MEMORY_NULL(ctdb, tvals);
1375 for (i=0; i<nodemap->num; i++) {
1376 tvals[i] = default_value;
1379 callback_data.out = tvals;
1380 callback_data.tunable = tunable;
1381 callback_data.fatal = false;
1383 data.dsize = offsetof(struct ctdb_control_get_tunable, name) + strlen(tunable) + 1;
1384 data.dptr = talloc_size(tmp_ctx, data.dsize);
1385 t = (struct ctdb_control_get_tunable *)data.dptr;
1386 t->length = strlen(tunable)+1;
1387 memcpy(t->name, tunable, t->length);
1388 nodes = list_of_connected_nodes(ctdb, nodemap, tmp_ctx, true);
1389 if (ctdb_client_async_control(ctdb, CTDB_CONTROL_GET_TUNABLE,
1390 nodes, 0, TAKEOVER_TIMEOUT(),
1392 get_tunable_callback,
1393 get_tunable_fail_callback,
1394 &callback_data) != 0) {
1395 if (callback_data.fatal) {
1401 talloc_free(data.dptr);
1406 /* Set internal flags for IP allocation:
1408 * Set NOIPTAKOVER ip flags from per-node NoIPTakeover tunable
1409 * Set NOIPHOST ip flag for each INACTIVE node
1410 * if all nodes are disabled:
1411 * Set NOIPHOST ip flags from per-node NoIPHostOnAllDisabled tunable
1413 * Set NOIPHOST ip flags for disabled nodes
1415 static void set_ipflags_internal(struct ipalloc_state *ipalloc_state,
1416 struct ctdb_node_map_old *nodemap,
1417 uint32_t *tval_noiptakeover,
1418 uint32_t *tval_noiphostonalldisabled)
1422 for (i=0;i<nodemap->num;i++) {
1423 /* Can not take IPs on node with NoIPTakeover set */
1424 if (tval_noiptakeover[i] != 0) {
1425 ipalloc_state->noiptakeover[i] = true;
1428 /* Can not host IPs on INACTIVE node */
1429 if (nodemap->nodes[i].flags & NODE_FLAGS_INACTIVE) {
1430 ipalloc_state->noiphost[i] = true;
1434 if (all_nodes_are_disabled(nodemap)) {
1435 /* If all nodes are disabled, can not host IPs on node
1436 * with NoIPHostOnAllDisabled set
1438 for (i=0;i<nodemap->num;i++) {
1439 if (tval_noiphostonalldisabled[i] != 0) {
1440 ipalloc_state->noiphost[i] = true;
1444 /* If some nodes are not disabled, then can not host
1445 * IPs on DISABLED node
1447 for (i=0;i<nodemap->num;i++) {
1448 if (nodemap->nodes[i].flags & NODE_FLAGS_DISABLED) {
1449 ipalloc_state->noiphost[i] = true;
1455 static bool set_ipflags(struct ctdb_context *ctdb,
1456 struct ipalloc_state *ipalloc_state,
1457 struct ctdb_node_map_old *nodemap)
1459 uint32_t *tval_noiptakeover;
1460 uint32_t *tval_noiphostonalldisabled;
1462 tval_noiptakeover = get_tunable_from_nodes(ctdb, ipalloc_state, nodemap,
1464 if (tval_noiptakeover == NULL) {
1468 tval_noiphostonalldisabled =
1469 get_tunable_from_nodes(ctdb, ipalloc_state, nodemap,
1470 "NoIPHostOnAllDisabled", 0);
1471 if (tval_noiphostonalldisabled == NULL) {
1472 /* Caller frees tmp_ctx */
1476 set_ipflags_internal(ipalloc_state, nodemap,
1478 tval_noiphostonalldisabled);
1480 talloc_free(tval_noiptakeover);
1481 talloc_free(tval_noiphostonalldisabled);
1486 static struct ipalloc_state * ipalloc_state_init(struct ctdb_context *ctdb,
1487 TALLOC_CTX *mem_ctx)
1489 struct ipalloc_state *ipalloc_state =
1490 talloc_zero(mem_ctx, struct ipalloc_state);
1491 if (ipalloc_state == NULL) {
1492 DEBUG(DEBUG_ERR, (__location__ " Out of memory\n"));
1496 ipalloc_state->num = ctdb->num_nodes;
1498 ipalloc_state->known_public_ips =
1499 talloc_zero_array(ipalloc_state,
1500 struct ctdb_public_ip_list,
1501 ipalloc_state->num);
1502 if (ipalloc_state->known_public_ips == NULL) {
1503 DEBUG(DEBUG_ERR, (__location__ " Out of memory\n"));
1507 ipalloc_state->available_public_ips =
1508 talloc_zero_array(ipalloc_state,
1509 struct ctdb_public_ip_list,
1510 ipalloc_state->num);
1511 if (ipalloc_state->available_public_ips == NULL) {
1512 DEBUG(DEBUG_ERR, (__location__ " Out of memory\n"));
1515 ipalloc_state->noiptakeover =
1516 talloc_zero_array(ipalloc_state,
1518 ipalloc_state->num);
1519 if (ipalloc_state->noiptakeover == NULL) {
1520 DEBUG(DEBUG_ERR, (__location__ " Out of memory\n"));
1523 ipalloc_state->noiphost =
1524 talloc_zero_array(ipalloc_state,
1526 ipalloc_state->num);
1527 if (ipalloc_state->noiphost == NULL) {
1528 DEBUG(DEBUG_ERR, (__location__ " Out of memory\n"));
1532 if (1 == ctdb->tunable.lcp2_public_ip_assignment) {
1533 ipalloc_state->algorithm = IPALLOC_LCP2;
1534 } else if (1 == ctdb->tunable.deterministic_public_ips) {
1535 ipalloc_state->algorithm = IPALLOC_DETERMINISTIC;
1537 ipalloc_state->algorithm = IPALLOC_NONDETERMINISTIC;
1540 ipalloc_state->no_ip_failback = ctdb->tunable.no_ip_failback;
1542 return ipalloc_state;
1544 talloc_free(ipalloc_state);
1548 struct takeover_callback_data {
1550 unsigned int *fail_count;
1553 static struct takeover_callback_data *
1554 takeover_callback_data_init(TALLOC_CTX *mem_ctx,
1557 static struct takeover_callback_data *takeover_data;
1559 takeover_data = talloc_zero(mem_ctx, struct takeover_callback_data);
1560 if (takeover_data == NULL) {
1561 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
1565 takeover_data->fail_count = talloc_zero_array(takeover_data,
1566 unsigned int, num_nodes);
1567 if (takeover_data->fail_count == NULL) {
1568 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
1569 talloc_free(takeover_data);
1573 takeover_data->num_nodes = num_nodes;
1575 return takeover_data;
1578 static void takeover_run_fail_callback(struct ctdb_context *ctdb,
1579 uint32_t node_pnn, int32_t res,
1580 TDB_DATA outdata, void *callback_data)
1582 struct takeover_callback_data *cd =
1583 talloc_get_type_abort(callback_data,
1584 struct takeover_callback_data);
1586 if (node_pnn >= cd->num_nodes) {
1587 DEBUG(DEBUG_ERR, (__location__ " invalid PNN %u\n", node_pnn));
1591 if (cd->fail_count[node_pnn] == 0) {
1593 ("Node %u failed the takeover run\n", node_pnn));
1596 cd->fail_count[node_pnn]++;
1599 static void takeover_run_process_failures(struct ctdb_context *ctdb,
1600 struct takeover_callback_data *tcd)
1602 unsigned int max_fails = 0;
1603 uint32_t max_pnn = -1;
1606 for (i = 0; i < tcd->num_nodes; i++) {
1607 if (tcd->fail_count[i] > max_fails) {
1609 max_fails = tcd->fail_count[i];
1613 if (max_fails > 0) {
1618 ("Sending banning credits to %u with fail count %u\n",
1619 max_pnn, max_fails));
1621 data.dptr = (uint8_t *)&max_pnn;
1622 data.dsize = sizeof(uint32_t);
1623 ret = ctdb_client_send_message(ctdb,
1624 CTDB_BROADCAST_CONNECTED,
1629 ("Failed to set banning credits for node %u\n",
1636 * Recalculate the allocation of public IPs to nodes and have the
1637 * nodes host their allocated addresses.
1639 * - Allocate memory for IP allocation state, including per node
1641 * - Populate IP allocation algorithm in IP allocation state
1642 * - Populate local value of tunable NoIPFailback in IP allocation
1643 state - this is really a cluster-wide configuration variable and
1644 only the value form the master node is used
1645 * - Retrieve tunables NoIPTakeover and NoIPHostOnAllDisabled from all
1646 * connected nodes - this is done separately so tunable values can
1647 * be faked in unit testing
1648 * - Populate NoIPTakover tunable in IP allocation state
1649 * - Populate NoIPHost in IP allocation state, derived from node flags
1650 * and NoIPHostOnAllDisabled tunable
1651 * - Retrieve and populate known and available IP lists in IP
1653 * - If no available IP addresses then early exit
1654 * - Build list of (known IPs, currently assigned node)
1655 * - Populate list of nodes to force rebalance - internal structure,
1656 * currently no way to fetch, only used by LCP2 for nodes that have
1657 * had new IP addresses added
1658 * - Run IP allocation algorithm
1659 * - Send RELEASE_IP to all nodes for IPs they should not host
1660 * - Send TAKE_IP to all nodes for IPs they should host
1661 * - Send IPREALLOCATED to all nodes (with backward compatibility hack)
1663 int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map_old *nodemap,
1664 uint32_t *force_rebalance_nodes)
1667 struct ctdb_public_ip ip;
1669 struct public_ip_list *all_ips, *tmp_ip;
1671 struct timeval timeout;
1672 struct client_async_data *async_data;
1673 struct ctdb_client_control_state *state;
1674 TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
1675 struct ipalloc_state *ipalloc_state;
1676 struct takeover_callback_data *takeover_data;
1679 /* Initialise fail callback data to be used with
1680 * takeover_run_fail_callback(). A failure in any of the
1681 * following steps will cause an early return, so this can be
1682 * reused for each of those steps without re-initialising. */
1683 takeover_data = takeover_callback_data_init(tmp_ctx,
1685 if (takeover_data == NULL) {
1686 talloc_free(tmp_ctx);
1691 * ip failover is completely disabled, just send out the
1692 * ipreallocated event.
1694 if (ctdb->tunable.disable_ip_failover != 0) {
1698 ipalloc_state = ipalloc_state_init(ctdb, tmp_ctx);
1699 if (ipalloc_state == NULL) {
1700 talloc_free(tmp_ctx);
1704 if (!set_ipflags(ctdb, ipalloc_state, nodemap)) {
1705 DEBUG(DEBUG_ERR,("Failed to set IP flags - aborting takeover run\n"));
1706 talloc_free(tmp_ctx);
1710 /* Fetch known/available public IPs from each active node */
1711 ret = ctdb_reload_remote_public_ips(ctdb, ipalloc_state, nodemap);
1713 talloc_free(tmp_ctx);
1717 /* Short-circuit IP allocation if no node has available IPs */
1718 can_host_ips = false;
1719 for (i=0; i < ipalloc_state->num; i++) {
1720 if (ipalloc_state->available_public_ips[i].num != 0) {
1721 can_host_ips = true;
1724 if (!can_host_ips) {
1725 DEBUG(DEBUG_WARNING,("No nodes available to host public IPs yet\n"));
1729 /* since nodes only know about those public addresses that
1730 can be served by that particular node, no single node has
1731 a full list of all public addresses that exist in the cluster.
1732 Walk over all node structures and create a merged list of
1733 all public addresses that exist in the cluster.
1735 keep the tree of ips around as ctdb->ip_tree
1737 all_ips = create_merged_ip_list(ctdb, ipalloc_state);
1738 ipalloc_state->all_ips = all_ips;
1740 ipalloc_state->force_rebalance_nodes = force_rebalance_nodes;
1742 /* Do the IP reassignment calculations */
1743 ipalloc(ipalloc_state);
1745 /* Now tell all nodes to release any public IPs should not
1746 * host. This will be a NOOP on nodes that don't currently
1747 * hold the given IP.
1749 async_data = talloc_zero(tmp_ctx, struct client_async_data);
1750 CTDB_NO_MEMORY_FATAL(ctdb, async_data);
1752 async_data->fail_callback = takeover_run_fail_callback;
1753 async_data->callback_data = takeover_data;
1755 ZERO_STRUCT(ip); /* Avoid valgrind warnings for union */
1757 /* Send a RELEASE_IP to all nodes that should not be hosting
1758 * each IP. For each IP, all but one of these will be
1759 * redundant. However, the redundant ones are used to tell
1760 * nodes which node should be hosting the IP so that commands
1761 * like "ctdb ip" can display a particular nodes idea of who
1762 * is hosting what. */
1763 for (i=0;i<nodemap->num;i++) {
1764 /* don't talk to unconnected nodes, but do talk to banned nodes */
1765 if (nodemap->nodes[i].flags & NODE_FLAGS_DISCONNECTED) {
1769 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1770 if (tmp_ip->pnn == nodemap->nodes[i].pnn) {
1771 /* This node should be serving this
1772 vnn so don't tell it to release the ip
1776 ip.pnn = tmp_ip->pnn;
1777 ip.addr = tmp_ip->addr;
1779 timeout = TAKEOVER_TIMEOUT();
1780 data.dsize = sizeof(ip);
1781 data.dptr = (uint8_t *)&ip;
1782 state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
1783 0, CTDB_CONTROL_RELEASE_IP, 0,
1786 if (state == NULL) {
1787 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_RELEASE_IP to node %u\n", nodemap->nodes[i].pnn));
1788 talloc_free(tmp_ctx);
1792 ctdb_client_async_add(async_data, state);
1795 if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1797 ("Async control CTDB_CONTROL_RELEASE_IP failed\n"));
1800 talloc_free(async_data);
1803 /* For each IP, send a TAKOVER_IP to the node that should be
1804 * hosting it. Many of these will often be redundant (since
1805 * the allocation won't have changed) but they can be useful
1806 * to recover from inconsistencies. */
1807 async_data = talloc_zero(tmp_ctx, struct client_async_data);
1808 CTDB_NO_MEMORY_FATAL(ctdb, async_data);
1810 async_data->fail_callback = takeover_run_fail_callback;
1811 async_data->callback_data = takeover_data;
1813 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1814 if (tmp_ip->pnn == -1) {
1815 /* this IP won't be taken over */
1819 ip.pnn = tmp_ip->pnn;
1820 ip.addr = tmp_ip->addr;
1822 timeout = TAKEOVER_TIMEOUT();
1823 data.dsize = sizeof(ip);
1824 data.dptr = (uint8_t *)&ip;
1825 state = ctdb_control_send(ctdb, tmp_ip->pnn,
1826 0, CTDB_CONTROL_TAKEOVER_IP, 0,
1827 data, async_data, &timeout, NULL);
1828 if (state == NULL) {
1829 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_TAKEOVER_IP to node %u\n", tmp_ip->pnn));
1830 talloc_free(tmp_ctx);
1834 ctdb_client_async_add(async_data, state);
1836 if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1838 ("Async control CTDB_CONTROL_TAKEOVER_IP failed\n"));
1844 * Tell all nodes to run eventscripts to process the
1845 * "ipreallocated" event. This can do a lot of things,
1846 * including restarting services to reconfigure them if public
1847 * IPs have moved. Once upon a time this event only used to
1850 nodes = list_of_connected_nodes(ctdb, nodemap, tmp_ctx, true);
1851 ret = ctdb_client_async_control(ctdb, CTDB_CONTROL_IPREALLOCATED,
1852 nodes, 0, TAKEOVER_TIMEOUT(),
1854 NULL, takeover_run_fail_callback,
1858 ("Async CTDB_CONTROL_IPREALLOCATED control failed\n"));
1862 talloc_free(tmp_ctx);
1866 takeover_run_process_failures(ctdb, takeover_data);
1867 talloc_free(tmp_ctx);
1873 destroy a ctdb_client_ip structure
1875 static int ctdb_client_ip_destructor(struct ctdb_client_ip *ip)
1877 DEBUG(DEBUG_DEBUG,("destroying client tcp for %s:%u (client_id %u)\n",
1878 ctdb_addr_to_str(&ip->addr),
1879 ntohs(ip->addr.ip.sin_port),
1882 DLIST_REMOVE(ip->ctdb->client_ip_list, ip);
1887 called by a client to inform us of a TCP connection that it is managing
1888 that should tickled with an ACK when IP takeover is done
1890 int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id,
1893 struct ctdb_client *client = reqid_find(ctdb->idr, client_id, struct ctdb_client);
1894 struct ctdb_connection *tcp_sock = NULL;
1895 struct ctdb_tcp_list *tcp;
1896 struct ctdb_connection t;
1899 struct ctdb_client_ip *ip;
1900 struct ctdb_vnn *vnn;
1901 ctdb_sock_addr addr;
1903 /* If we don't have public IPs, tickles are useless */
1904 if (ctdb->vnn == NULL) {
1908 tcp_sock = (struct ctdb_connection *)indata.dptr;
1910 addr = tcp_sock->src;
1911 ctdb_canonicalize_ip(&addr, &tcp_sock->src);
1912 addr = tcp_sock->dst;
1913 ctdb_canonicalize_ip(&addr, &tcp_sock->dst);
1916 memcpy(&addr, &tcp_sock->dst, sizeof(addr));
1917 vnn = find_public_ip_vnn(ctdb, &addr);
1919 switch (addr.sa.sa_family) {
1921 if (ntohl(addr.ip.sin_addr.s_addr) != INADDR_LOOPBACK) {
1922 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public address.\n",
1923 ctdb_addr_to_str(&addr)));
1927 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public ipv6 address.\n",
1928 ctdb_addr_to_str(&addr)));
1931 DEBUG(DEBUG_ERR,(__location__ " Unknown family type %d\n", addr.sa.sa_family));
1937 if (vnn->pnn != ctdb->pnn) {
1938 DEBUG(DEBUG_ERR,("Attempt to register tcp client for IP %s we don't hold - failing (client_id %u pid %u)\n",
1939 ctdb_addr_to_str(&addr),
1940 client_id, client->pid));
1941 /* failing this call will tell smbd to die */
1945 ip = talloc(client, struct ctdb_client_ip);
1946 CTDB_NO_MEMORY(ctdb, ip);
1950 ip->client_id = client_id;
1951 talloc_set_destructor(ip, ctdb_client_ip_destructor);
1952 DLIST_ADD(ctdb->client_ip_list, ip);
1954 tcp = talloc(client, struct ctdb_tcp_list);
1955 CTDB_NO_MEMORY(ctdb, tcp);
1957 tcp->connection.src = tcp_sock->src;
1958 tcp->connection.dst = tcp_sock->dst;
1960 DLIST_ADD(client->tcp_list, tcp);
1962 t.src = tcp_sock->src;
1963 t.dst = tcp_sock->dst;
1965 data.dptr = (uint8_t *)&t;
1966 data.dsize = sizeof(t);
1968 switch (addr.sa.sa_family) {
1970 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1971 (unsigned)ntohs(tcp_sock->dst.ip.sin_port),
1972 ctdb_addr_to_str(&tcp_sock->src),
1973 (unsigned)ntohs(tcp_sock->src.ip.sin_port), client_id, client->pid));
1976 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1977 (unsigned)ntohs(tcp_sock->dst.ip6.sin6_port),
1978 ctdb_addr_to_str(&tcp_sock->src),
1979 (unsigned)ntohs(tcp_sock->src.ip6.sin6_port), client_id, client->pid));
1982 DEBUG(DEBUG_ERR,(__location__ " Unknown family %d\n", addr.sa.sa_family));
1986 /* tell all nodes about this tcp connection */
1987 ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0,
1988 CTDB_CONTROL_TCP_ADD,
1989 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
1991 DEBUG(DEBUG_ERR,(__location__ " Failed to send CTDB_CONTROL_TCP_ADD\n"));
1999 find a tcp address on a list
2001 static struct ctdb_connection *ctdb_tcp_find(struct ctdb_tcp_array *array,
2002 struct ctdb_connection *tcp)
2006 if (array == NULL) {
2010 for (i=0;i<array->num;i++) {
2011 if (ctdb_same_sockaddr(&array->connections[i].src, &tcp->src) &&
2012 ctdb_same_sockaddr(&array->connections[i].dst, &tcp->dst)) {
2013 return &array->connections[i];
2022 called by a daemon to inform us of a TCP connection that one of its
2023 clients managing that should tickled with an ACK when IP takeover is
2026 int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata, bool tcp_update_needed)
2028 struct ctdb_connection *p = (struct ctdb_connection *)indata.dptr;
2029 struct ctdb_tcp_array *tcparray;
2030 struct ctdb_connection tcp;
2031 struct ctdb_vnn *vnn;
2033 /* If we don't have public IPs, tickles are useless */
2034 if (ctdb->vnn == NULL) {
2038 vnn = find_public_ip_vnn(ctdb, &p->dst);
2040 DEBUG(DEBUG_INFO,(__location__ " got TCP_ADD control for an address which is not a public address '%s'\n",
2041 ctdb_addr_to_str(&p->dst)));
2047 tcparray = vnn->tcp_array;
2049 /* If this is the first tickle */
2050 if (tcparray == NULL) {
2051 tcparray = talloc(vnn, struct ctdb_tcp_array);
2052 CTDB_NO_MEMORY(ctdb, tcparray);
2053 vnn->tcp_array = tcparray;
2056 tcparray->connections = talloc_size(tcparray, sizeof(struct ctdb_connection));
2057 CTDB_NO_MEMORY(ctdb, tcparray->connections);
2059 tcparray->connections[tcparray->num].src = p->src;
2060 tcparray->connections[tcparray->num].dst = p->dst;
2063 if (tcp_update_needed) {
2064 vnn->tcp_update_needed = true;
2070 /* Do we already have this tickle ?*/
2073 if (ctdb_tcp_find(tcparray, &tcp) != NULL) {
2074 DEBUG(DEBUG_DEBUG,("Already had tickle info for %s:%u for vnn:%u\n",
2075 ctdb_addr_to_str(&tcp.dst),
2076 ntohs(tcp.dst.ip.sin_port),
2081 /* A new tickle, we must add it to the array */
2082 tcparray->connections = talloc_realloc(tcparray, tcparray->connections,
2083 struct ctdb_connection,
2085 CTDB_NO_MEMORY(ctdb, tcparray->connections);
2087 tcparray->connections[tcparray->num].src = p->src;
2088 tcparray->connections[tcparray->num].dst = p->dst;
2091 DEBUG(DEBUG_INFO,("Added tickle info for %s:%u from vnn %u\n",
2092 ctdb_addr_to_str(&tcp.dst),
2093 ntohs(tcp.dst.ip.sin_port),
2096 if (tcp_update_needed) {
2097 vnn->tcp_update_needed = true;
2104 static void ctdb_remove_connection(struct ctdb_vnn *vnn, struct ctdb_connection *conn)
2106 struct ctdb_connection *tcpp;
2112 /* if the array is empty we cant remove it
2113 and we don't need to do anything
2115 if (vnn->tcp_array == NULL) {
2116 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist (array is empty) %s:%u\n",
2117 ctdb_addr_to_str(&conn->dst),
2118 ntohs(conn->dst.ip.sin_port)));
2123 /* See if we know this connection
2124 if we don't know this connection then we dont need to do anything
2126 tcpp = ctdb_tcp_find(vnn->tcp_array, conn);
2128 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist %s:%u\n",
2129 ctdb_addr_to_str(&conn->dst),
2130 ntohs(conn->dst.ip.sin_port)));
2135 /* We need to remove this entry from the array.
2136 Instead of allocating a new array and copying data to it
2137 we cheat and just copy the last entry in the existing array
2138 to the entry that is to be removed and just shring the
2141 *tcpp = vnn->tcp_array->connections[vnn->tcp_array->num - 1];
2142 vnn->tcp_array->num--;
2144 /* If we deleted the last entry we also need to remove the entire array
2146 if (vnn->tcp_array->num == 0) {
2147 talloc_free(vnn->tcp_array);
2148 vnn->tcp_array = NULL;
2151 vnn->tcp_update_needed = true;
2153 DEBUG(DEBUG_INFO,("Removed tickle info for %s:%u\n",
2154 ctdb_addr_to_str(&conn->src),
2155 ntohs(conn->src.ip.sin_port)));
2160 called by a daemon to inform us of a TCP connection that one of its
2161 clients used are no longer needed in the tickle database
2163 int32_t ctdb_control_tcp_remove(struct ctdb_context *ctdb, TDB_DATA indata)
2165 struct ctdb_vnn *vnn;
2166 struct ctdb_connection *conn = (struct ctdb_connection *)indata.dptr;
2168 /* If we don't have public IPs, tickles are useless */
2169 if (ctdb->vnn == NULL) {
2173 vnn = find_public_ip_vnn(ctdb, &conn->dst);
2176 (__location__ " unable to find public address %s\n",
2177 ctdb_addr_to_str(&conn->dst)));
2181 ctdb_remove_connection(vnn, conn);
2188 Called when another daemon starts - causes all tickles for all
2189 public addresses we are serving to be sent to the new node on the
2190 next check. This actually causes the next scheduled call to
2191 tdb_update_tcp_tickles() to update all nodes. This is simple and
2192 doesn't require careful error handling.
2194 int32_t ctdb_control_startup(struct ctdb_context *ctdb, uint32_t pnn)
2196 struct ctdb_vnn *vnn;
2198 DEBUG(DEBUG_INFO, ("Received startup control from node %lu\n",
2199 (unsigned long) pnn));
2201 for (vnn = ctdb->vnn; vnn != NULL; vnn = vnn->next) {
2202 vnn->tcp_update_needed = true;
2210 called when a client structure goes away - hook to remove
2211 elements from the tcp_list in all daemons
2213 void ctdb_takeover_client_destructor_hook(struct ctdb_client *client)
2215 while (client->tcp_list) {
2216 struct ctdb_vnn *vnn;
2217 struct ctdb_tcp_list *tcp = client->tcp_list;
2218 struct ctdb_connection *conn = &tcp->connection;
2220 DLIST_REMOVE(client->tcp_list, tcp);
2222 vnn = find_public_ip_vnn(client->ctdb,
2226 (__location__ " unable to find public address %s\n",
2227 ctdb_addr_to_str(&conn->dst)));
2231 /* If the IP address is hosted on this node then
2232 * remove the connection. */
2233 if (vnn->pnn == client->ctdb->pnn) {
2234 ctdb_remove_connection(vnn, conn);
2237 /* Otherwise this function has been called because the
2238 * server IP address has been released to another node
2239 * and the client has exited. This means that we
2240 * should not delete the connection information. The
2241 * takeover node processes connections too. */
2246 void ctdb_release_all_ips(struct ctdb_context *ctdb)
2248 struct ctdb_vnn *vnn;
2252 if (ctdb->tunable.disable_ip_failover == 1) {
2256 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2257 if (!ctdb_sys_have_ip(&vnn->public_address)) {
2258 ctdb_vnn_unassign_iface(ctdb, vnn);
2265 /* Don't allow multiple releases at once. Some code,
2266 * particularly ctdb_tickle_sentenced_connections() is
2268 if (vnn->update_in_flight) {
2269 DEBUG(DEBUG_WARNING,
2271 " Not releasing IP %s/%u on interface %s, an update is already in progess\n",
2272 ctdb_addr_to_str(&vnn->public_address),
2273 vnn->public_netmask_bits,
2274 ctdb_vnn_iface_string(vnn)));
2277 vnn->update_in_flight = true;
2279 DEBUG(DEBUG_INFO,("Release of IP %s/%u on interface %s node:-1\n",
2280 ctdb_addr_to_str(&vnn->public_address),
2281 vnn->public_netmask_bits,
2282 ctdb_vnn_iface_string(vnn)));
2284 ctdb_event_script_args(ctdb, CTDB_EVENT_RELEASE_IP, "%s %s %u",
2285 ctdb_vnn_iface_string(vnn),
2286 ctdb_addr_to_str(&vnn->public_address),
2287 vnn->public_netmask_bits);
2289 data.dptr = (uint8_t *)talloc_strdup(
2290 vnn, ctdb_addr_to_str(&vnn->public_address));
2291 if (data.dptr != NULL) {
2292 data.dsize = strlen((char *)data.dptr) + 1;
2293 ctdb_daemon_send_message(ctdb, ctdb->pnn,
2294 CTDB_SRVID_RELEASE_IP, data);
2295 talloc_free(data.dptr);
2298 ctdb_vnn_unassign_iface(ctdb, vnn);
2299 vnn->update_in_flight = false;
2303 DEBUG(DEBUG_NOTICE,(__location__ " Released %d public IPs\n", count));
2308 get list of public IPs
2310 int32_t ctdb_control_get_public_ips(struct ctdb_context *ctdb,
2311 struct ctdb_req_control_old *c, TDB_DATA *outdata)
2314 struct ctdb_public_ip_list_old *ips;
2315 struct ctdb_vnn *vnn;
2316 bool only_available = false;
2318 if (c->flags & CTDB_PUBLIC_IP_FLAGS_ONLY_AVAILABLE) {
2319 only_available = true;
2322 /* count how many public ip structures we have */
2324 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2328 len = offsetof(struct ctdb_public_ip_list_old, ips) +
2329 num*sizeof(struct ctdb_public_ip);
2330 ips = talloc_zero_size(outdata, len);
2331 CTDB_NO_MEMORY(ctdb, ips);
2334 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2335 if (only_available && !ctdb_vnn_available(ctdb, vnn)) {
2338 ips->ips[i].pnn = vnn->pnn;
2339 ips->ips[i].addr = vnn->public_address;
2343 len = offsetof(struct ctdb_public_ip_list_old, ips) +
2344 i*sizeof(struct ctdb_public_ip);
2346 outdata->dsize = len;
2347 outdata->dptr = (uint8_t *)ips;
2353 int32_t ctdb_control_get_public_ip_info(struct ctdb_context *ctdb,
2354 struct ctdb_req_control_old *c,
2359 ctdb_sock_addr *addr;
2360 struct ctdb_public_ip_info_old *info;
2361 struct ctdb_vnn *vnn;
2363 addr = (ctdb_sock_addr *)indata.dptr;
2365 vnn = find_public_ip_vnn(ctdb, addr);
2367 DEBUG(DEBUG_ERR,(__location__ " Could not get public ip info, "
2368 "'%s'not a public address\n",
2369 ctdb_addr_to_str(addr)));
2373 /* count how many public ip structures we have */
2375 for (;vnn->ifaces[num];) {
2379 len = offsetof(struct ctdb_public_ip_info_old, ifaces) +
2380 num*sizeof(struct ctdb_iface);
2381 info = talloc_zero_size(outdata, len);
2382 CTDB_NO_MEMORY(ctdb, info);
2384 info->ip.addr = vnn->public_address;
2385 info->ip.pnn = vnn->pnn;
2386 info->active_idx = 0xFFFFFFFF;
2388 for (i=0; vnn->ifaces[i]; i++) {
2389 struct ctdb_interface *cur;
2391 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
2393 DEBUG(DEBUG_CRIT, (__location__ " internal error iface[%s] unknown\n",
2397 if (vnn->iface == cur) {
2398 info->active_idx = i;
2400 strncpy(info->ifaces[i].name, cur->name,
2401 sizeof(info->ifaces[i].name));
2402 info->ifaces[i].name[sizeof(info->ifaces[i].name)-1] = '\0';
2403 info->ifaces[i].link_state = cur->link_up;
2404 info->ifaces[i].references = cur->references;
2407 len = offsetof(struct ctdb_public_ip_info_old, ifaces) +
2408 i*sizeof(struct ctdb_iface);
2410 outdata->dsize = len;
2411 outdata->dptr = (uint8_t *)info;
2416 int32_t ctdb_control_get_ifaces(struct ctdb_context *ctdb,
2417 struct ctdb_req_control_old *c,
2421 struct ctdb_iface_list_old *ifaces;
2422 struct ctdb_interface *cur;
2424 /* count how many public ip structures we have */
2426 for (cur=ctdb->ifaces;cur;cur=cur->next) {
2430 len = offsetof(struct ctdb_iface_list_old, ifaces) +
2431 num*sizeof(struct ctdb_iface);
2432 ifaces = talloc_zero_size(outdata, len);
2433 CTDB_NO_MEMORY(ctdb, ifaces);
2436 for (cur=ctdb->ifaces;cur;cur=cur->next) {
2437 strncpy(ifaces->ifaces[i].name, cur->name,
2438 sizeof(ifaces->ifaces[i].name));
2439 ifaces->ifaces[i].name[sizeof(ifaces->ifaces[i].name)-1] = '\0';
2440 ifaces->ifaces[i].link_state = cur->link_up;
2441 ifaces->ifaces[i].references = cur->references;
2445 len = offsetof(struct ctdb_iface_list_old, ifaces) +
2446 i*sizeof(struct ctdb_iface);
2448 outdata->dsize = len;
2449 outdata->dptr = (uint8_t *)ifaces;
2454 int32_t ctdb_control_set_iface_link(struct ctdb_context *ctdb,
2455 struct ctdb_req_control_old *c,
2458 struct ctdb_iface *info;
2459 struct ctdb_interface *iface;
2460 bool link_up = false;
2462 info = (struct ctdb_iface *)indata.dptr;
2464 if (info->name[CTDB_IFACE_SIZE] != '\0') {
2465 int len = strnlen(info->name, CTDB_IFACE_SIZE);
2466 DEBUG(DEBUG_ERR, (__location__ " name[%*.*s] not terminated\n",
2467 len, len, info->name));
2471 switch (info->link_state) {
2479 DEBUG(DEBUG_ERR, (__location__ " link_state[%u] invalid\n",
2480 (unsigned int)info->link_state));
2484 if (info->references != 0) {
2485 DEBUG(DEBUG_ERR, (__location__ " references[%u] should be 0\n",
2486 (unsigned int)info->references));
2490 iface = ctdb_find_iface(ctdb, info->name);
2491 if (iface == NULL) {
2495 if (link_up == iface->link_up) {
2499 DEBUG(iface->link_up?DEBUG_ERR:DEBUG_NOTICE,
2500 ("iface[%s] has changed it's link status %s => %s\n",
2502 iface->link_up?"up":"down",
2503 link_up?"up":"down"));
2505 iface->link_up = link_up;
2511 called by a daemon to inform us of the entire list of TCP tickles for
2512 a particular public address.
2513 this control should only be sent by the node that is currently serving
2514 that public address.
2516 int32_t ctdb_control_set_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata)
2518 struct ctdb_tickle_list_old *list = (struct ctdb_tickle_list_old *)indata.dptr;
2519 struct ctdb_tcp_array *tcparray;
2520 struct ctdb_vnn *vnn;
2522 /* We must at least have tickles.num or else we cant verify the size
2523 of the received data blob
2525 if (indata.dsize < offsetof(struct ctdb_tickle_list_old, connections)) {
2526 DEBUG(DEBUG_ERR,("Bad indata in ctdb_tickle_list. Not enough data for the tickle.num field\n"));
2530 /* verify that the size of data matches what we expect */
2531 if (indata.dsize < offsetof(struct ctdb_tickle_list_old, connections)
2532 + sizeof(struct ctdb_connection) * list->num) {
2533 DEBUG(DEBUG_ERR,("Bad indata in ctdb_tickle_list\n"));
2537 DEBUG(DEBUG_INFO, ("Received tickle update for public address %s\n",
2538 ctdb_addr_to_str(&list->addr)));
2540 vnn = find_public_ip_vnn(ctdb, &list->addr);
2542 DEBUG(DEBUG_INFO,(__location__ " Could not set tcp tickle list, '%s' is not a public address\n",
2543 ctdb_addr_to_str(&list->addr)));
2548 if (vnn->pnn == ctdb->pnn) {
2550 ("Ignoring redundant set tcp tickle list, this node hosts '%s'\n",
2551 ctdb_addr_to_str(&list->addr)));
2555 /* remove any old ticklelist we might have */
2556 talloc_free(vnn->tcp_array);
2557 vnn->tcp_array = NULL;
2559 tcparray = talloc(vnn, struct ctdb_tcp_array);
2560 CTDB_NO_MEMORY(ctdb, tcparray);
2562 tcparray->num = list->num;
2564 tcparray->connections = talloc_array(tcparray, struct ctdb_connection, tcparray->num);
2565 CTDB_NO_MEMORY(ctdb, tcparray->connections);
2567 memcpy(tcparray->connections, &list->connections[0],
2568 sizeof(struct ctdb_connection)*tcparray->num);
2570 /* We now have a new fresh tickle list array for this vnn */
2571 vnn->tcp_array = tcparray;
2577 called to return the full list of tickles for the puclic address associated
2578 with the provided vnn
2580 int32_t ctdb_control_get_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata)
2582 ctdb_sock_addr *addr = (ctdb_sock_addr *)indata.dptr;
2583 struct ctdb_tickle_list_old *list;
2584 struct ctdb_tcp_array *tcparray;
2586 struct ctdb_vnn *vnn;
2589 vnn = find_public_ip_vnn(ctdb, addr);
2591 DEBUG(DEBUG_ERR,(__location__ " Could not get tcp tickle list, '%s' is not a public address\n",
2592 ctdb_addr_to_str(addr)));
2597 port = ctdb_addr_to_port(addr);
2599 tcparray = vnn->tcp_array;
2601 if (tcparray != NULL) {
2603 /* All connections */
2604 num = tcparray->num;
2606 /* Count connections for port */
2607 for (i = 0; i < tcparray->num; i++) {
2608 if (port == ctdb_addr_to_port(&tcparray->connections[i].dst)) {
2615 outdata->dsize = offsetof(struct ctdb_tickle_list_old, connections)
2616 + sizeof(struct ctdb_connection) * num;
2618 outdata->dptr = talloc_size(outdata, outdata->dsize);
2619 CTDB_NO_MEMORY(ctdb, outdata->dptr);
2620 list = (struct ctdb_tickle_list_old *)outdata->dptr;
2630 for (i = 0; i < tcparray->num; i++) {
2632 port == ctdb_addr_to_port(&tcparray->connections[i].dst)) {
2633 list->connections[num] = tcparray->connections[i];
2643 set the list of all tcp tickles for a public address
2645 static int ctdb_send_set_tcp_tickles_for_ip(struct ctdb_context *ctdb,
2646 ctdb_sock_addr *addr,
2647 struct ctdb_tcp_array *tcparray)
2651 struct ctdb_tickle_list_old *list;
2654 num = tcparray->num;
2659 data.dsize = offsetof(struct ctdb_tickle_list_old, connections) +
2660 sizeof(struct ctdb_connection) * num;
2661 data.dptr = talloc_size(ctdb, data.dsize);
2662 CTDB_NO_MEMORY(ctdb, data.dptr);
2664 list = (struct ctdb_tickle_list_old *)data.dptr;
2668 memcpy(&list->connections[0], tcparray->connections, sizeof(struct ctdb_connection) * num);
2671 ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_ALL, 0,
2672 CTDB_CONTROL_SET_TCP_TICKLE_LIST,
2673 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
2675 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set tcp tickles failed\n"));
2679 talloc_free(data.dptr);
2686 perform tickle updates if required
2688 static void ctdb_update_tcp_tickles(struct tevent_context *ev,
2689 struct tevent_timer *te,
2690 struct timeval t, void *private_data)
2692 struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
2694 struct ctdb_vnn *vnn;
2696 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2697 /* we only send out updates for public addresses that
2700 if (ctdb->pnn != vnn->pnn) {
2703 /* We only send out the updates if we need to */
2704 if (!vnn->tcp_update_needed) {
2707 ret = ctdb_send_set_tcp_tickles_for_ip(ctdb,
2708 &vnn->public_address,
2711 DEBUG(DEBUG_ERR,("Failed to send the tickle update for public address %s\n",
2712 ctdb_addr_to_str(&vnn->public_address)));
2715 ("Sent tickle update for public address %s\n",
2716 ctdb_addr_to_str(&vnn->public_address)));
2717 vnn->tcp_update_needed = false;
2721 tevent_add_timer(ctdb->ev, ctdb->tickle_update_context,
2722 timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
2723 ctdb_update_tcp_tickles, ctdb);
2727 start periodic update of tcp tickles
2729 void ctdb_start_tcp_tickle_update(struct ctdb_context *ctdb)
2731 ctdb->tickle_update_context = talloc_new(ctdb);
2733 tevent_add_timer(ctdb->ev, ctdb->tickle_update_context,
2734 timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
2735 ctdb_update_tcp_tickles, ctdb);
2741 struct control_gratious_arp {
2742 struct ctdb_context *ctdb;
2743 ctdb_sock_addr addr;
2749 send a control_gratuitous arp
2751 static void send_gratious_arp(struct tevent_context *ev,
2752 struct tevent_timer *te,
2753 struct timeval t, void *private_data)
2756 struct control_gratious_arp *arp = talloc_get_type(private_data,
2757 struct control_gratious_arp);
2759 ret = ctdb_sys_send_arp(&arp->addr, arp->iface);
2761 DEBUG(DEBUG_ERR,(__location__ " sending of gratious arp on iface '%s' failed (%s)\n",
2762 arp->iface, strerror(errno)));
2767 if (arp->count == CTDB_ARP_REPEAT) {
2772 tevent_add_timer(arp->ctdb->ev, arp,
2773 timeval_current_ofs(CTDB_ARP_INTERVAL, 0),
2774 send_gratious_arp, arp);
2781 int32_t ctdb_control_send_gratious_arp(struct ctdb_context *ctdb, TDB_DATA indata)
2783 struct ctdb_addr_info_old *gratious_arp = (struct ctdb_addr_info_old *)indata.dptr;
2784 struct control_gratious_arp *arp;
2786 /* verify the size of indata */
2787 if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
2788 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_gratious_arp structure. Got %u require %u bytes\n",
2789 (unsigned)indata.dsize,
2790 (unsigned)offsetof(struct ctdb_addr_info_old, iface)));
2794 ( offsetof(struct ctdb_addr_info_old, iface)
2795 + gratious_arp->len ) ){
2797 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2798 "but should be %u bytes\n",
2799 (unsigned)indata.dsize,
2800 (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+gratious_arp->len)));
2805 arp = talloc(ctdb, struct control_gratious_arp);
2806 CTDB_NO_MEMORY(ctdb, arp);
2809 arp->addr = gratious_arp->addr;
2810 arp->iface = talloc_strdup(arp, gratious_arp->iface);
2811 CTDB_NO_MEMORY(ctdb, arp->iface);
2814 tevent_add_timer(arp->ctdb->ev, arp,
2815 timeval_zero(), send_gratious_arp, arp);
2820 int32_t ctdb_control_add_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2822 struct ctdb_addr_info_old *pub = (struct ctdb_addr_info_old *)indata.dptr;
2825 /* verify the size of indata */
2826 if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
2827 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_addr_info structure\n"));
2831 ( offsetof(struct ctdb_addr_info_old, iface)
2834 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2835 "but should be %u bytes\n",
2836 (unsigned)indata.dsize,
2837 (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+pub->len)));
2841 DEBUG(DEBUG_NOTICE,("Add IP %s\n", ctdb_addr_to_str(&pub->addr)));
2843 ret = ctdb_add_public_address(ctdb, &pub->addr, pub->mask, &pub->iface[0], true);
2846 DEBUG(DEBUG_ERR,(__location__ " Failed to add public address\n"));
2853 struct delete_ip_callback_state {
2854 struct ctdb_req_control_old *c;
2858 called when releaseip event finishes for del_public_address
2860 static void delete_ip_callback(struct ctdb_context *ctdb,
2861 int32_t status, TDB_DATA data,
2862 const char *errormsg,
2865 struct delete_ip_callback_state *state =
2866 talloc_get_type(private_data, struct delete_ip_callback_state);
2868 /* If release failed then fail. */
2869 ctdb_request_control_reply(ctdb, state->c, NULL, status, errormsg);
2870 talloc_free(private_data);
2873 int32_t ctdb_control_del_public_address(struct ctdb_context *ctdb,
2874 struct ctdb_req_control_old *c,
2875 TDB_DATA indata, bool *async_reply)
2877 struct ctdb_addr_info_old *pub = (struct ctdb_addr_info_old *)indata.dptr;
2878 struct ctdb_vnn *vnn;
2880 /* verify the size of indata */
2881 if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
2882 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_addr_info structure\n"));
2886 ( offsetof(struct ctdb_addr_info_old, iface)
2889 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2890 "but should be %u bytes\n",
2891 (unsigned)indata.dsize,
2892 (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+pub->len)));
2896 DEBUG(DEBUG_NOTICE,("Delete IP %s\n", ctdb_addr_to_str(&pub->addr)));
2898 /* walk over all public addresses until we find a match */
2899 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2900 if (ctdb_same_ip(&vnn->public_address, &pub->addr)) {
2901 if (vnn->pnn == ctdb->pnn) {
2902 struct delete_ip_callback_state *state;
2903 struct ctdb_public_ip *ip;
2907 vnn->delete_pending = true;
2909 state = talloc(ctdb,
2910 struct delete_ip_callback_state);
2911 CTDB_NO_MEMORY(ctdb, state);
2914 ip = talloc(state, struct ctdb_public_ip);
2917 (__location__ " Out of memory\n"));
2922 ip->addr = pub->addr;
2924 data.dsize = sizeof(struct ctdb_public_ip);
2925 data.dptr = (unsigned char *)ip;
2927 ret = ctdb_daemon_send_control(ctdb,
2930 CTDB_CONTROL_RELEASE_IP,
2937 (__location__ "Unable to send "
2938 "CTDB_CONTROL_RELEASE_IP\n"));
2943 state->c = talloc_steal(state, c);
2944 *async_reply = true;
2946 /* This IP is not hosted on the
2947 * current node so just delete it
2949 do_delete_ip(ctdb, vnn);
2956 DEBUG(DEBUG_ERR,("Delete IP of unknown public IP address %s\n",
2957 ctdb_addr_to_str(&pub->addr)));
2962 struct ipreallocated_callback_state {
2963 struct ctdb_req_control_old *c;
2966 static void ctdb_ipreallocated_callback(struct ctdb_context *ctdb,
2967 int status, void *p)
2969 struct ipreallocated_callback_state *state =
2970 talloc_get_type(p, struct ipreallocated_callback_state);
2974 (" \"ipreallocated\" event script failed (status %d)\n",
2976 if (status == -ETIME) {
2977 ctdb_ban_self(ctdb);
2981 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
2985 /* A control to run the ipreallocated event */
2986 int32_t ctdb_control_ipreallocated(struct ctdb_context *ctdb,
2987 struct ctdb_req_control_old *c,
2991 struct ipreallocated_callback_state *state;
2993 state = talloc(ctdb, struct ipreallocated_callback_state);
2994 CTDB_NO_MEMORY(ctdb, state);
2996 DEBUG(DEBUG_INFO,(__location__ " Running \"ipreallocated\" event\n"));
2998 ret = ctdb_event_script_callback(ctdb, state,
2999 ctdb_ipreallocated_callback, state,
3000 CTDB_EVENT_IPREALLOCATED,
3004 DEBUG(DEBUG_ERR,("Failed to run \"ipreallocated\" event \n"));
3009 /* tell the control that we will be reply asynchronously */
3010 state->c = talloc_steal(state, c);
3011 *async_reply = true;
3017 /* This function is called from the recovery daemon to verify that a remote
3018 node has the expected ip allocation.
3019 This is verified against ctdb->ip_tree
3021 static int verify_remote_ip_allocation(struct ctdb_context *ctdb,
3022 struct ctdb_public_ip_list *ips,
3025 struct public_ip_list *tmp_ip;
3028 if (ctdb->ip_tree == NULL) {
3029 /* don't know the expected allocation yet, assume remote node
3038 for (i=0; i<ips->num; i++) {
3039 tmp_ip = trbt_lookuparray32(ctdb->ip_tree, IP_KEYLEN, ip_key(&ips->ip[i].addr));
3040 if (tmp_ip == NULL) {
3041 DEBUG(DEBUG_ERR,("Node %u has new or unknown public IP %s\n", pnn, ctdb_addr_to_str(&ips->ip[i].addr)));
3045 if (tmp_ip->pnn == -1 || ips->ip[i].pnn == -1) {
3049 if (tmp_ip->pnn != ips->ip[i].pnn) {
3051 ("Inconsistent IP allocation - node %u thinks %s is held by node %u while it is assigned to node %u\n",
3053 ctdb_addr_to_str(&ips->ip[i].addr),
3054 ips->ip[i].pnn, tmp_ip->pnn));
3062 int update_ip_assignment_tree(struct ctdb_context *ctdb, struct ctdb_public_ip *ip)
3064 struct public_ip_list *tmp_ip;
3066 /* IP tree is never built if DisableIPFailover is set */
3067 if (ctdb->tunable.disable_ip_failover != 0) {
3071 if (ctdb->ip_tree == NULL) {
3072 DEBUG(DEBUG_ERR,("No ctdb->ip_tree yet. Failed to update ip assignment\n"));
3076 tmp_ip = trbt_lookuparray32(ctdb->ip_tree, IP_KEYLEN, ip_key(&ip->addr));
3077 if (tmp_ip == NULL) {
3078 DEBUG(DEBUG_ERR,(__location__ " Could not find record for address %s, update ip\n", ctdb_addr_to_str(&ip->addr)));
3082 DEBUG(DEBUG_NOTICE,("Updated ip assignment tree for ip : %s from node %u to node %u\n", ctdb_addr_to_str(&ip->addr), tmp_ip->pnn, ip->pnn));
3083 tmp_ip->pnn = ip->pnn;
3088 void clear_ip_assignment_tree(struct ctdb_context *ctdb)
3090 TALLOC_FREE(ctdb->ip_tree);
3093 struct ctdb_reloadips_handle {
3094 struct ctdb_context *ctdb;
3095 struct ctdb_req_control_old *c;
3099 struct tevent_fd *fde;
3102 static int ctdb_reloadips_destructor(struct ctdb_reloadips_handle *h)
3104 if (h == h->ctdb->reload_ips) {
3105 h->ctdb->reload_ips = NULL;
3108 ctdb_request_control_reply(h->ctdb, h->c, NULL, h->status, NULL);
3111 ctdb_kill(h->ctdb, h->child, SIGKILL);
3115 static void ctdb_reloadips_timeout_event(struct tevent_context *ev,
3116 struct tevent_timer *te,
3117 struct timeval t, void *private_data)
3119 struct ctdb_reloadips_handle *h = talloc_get_type(private_data, struct ctdb_reloadips_handle);
3124 static void ctdb_reloadips_child_handler(struct tevent_context *ev,
3125 struct tevent_fd *fde,
3126 uint16_t flags, void *private_data)
3128 struct ctdb_reloadips_handle *h = talloc_get_type(private_data, struct ctdb_reloadips_handle);
3133 ret = sys_read(h->fd[0], &res, 1);
3134 if (ret < 1 || res != 0) {
3135 DEBUG(DEBUG_ERR, (__location__ " Reloadips child process returned error\n"));
3143 static int ctdb_reloadips_child(struct ctdb_context *ctdb)
3145 TALLOC_CTX *mem_ctx = talloc_new(NULL);
3146 struct ctdb_public_ip_list_old *ips;
3147 struct ctdb_vnn *vnn;
3148 struct client_async_data *async_data;
3149 struct timeval timeout;
3151 struct ctdb_client_control_state *state;
3155 CTDB_NO_MEMORY(ctdb, mem_ctx);
3157 /* Read IPs from local node */
3158 ret = ctdb_ctrl_get_public_ips(ctdb, TAKEOVER_TIMEOUT(),
3159 CTDB_CURRENT_NODE, mem_ctx, &ips);
3162 ("Unable to fetch public IPs from local node\n"));
3163 talloc_free(mem_ctx);
3167 /* Read IPs file - this is safe since this is a child process */
3169 if (ctdb_set_public_addresses(ctdb, false) != 0) {
3170 DEBUG(DEBUG_ERR,("Failed to re-read public addresses file\n"));
3171 talloc_free(mem_ctx);
3175 async_data = talloc_zero(mem_ctx, struct client_async_data);
3176 CTDB_NO_MEMORY(ctdb, async_data);
3178 /* Compare IPs between node and file for IPs to be deleted */
3179 for (i = 0; i < ips->num; i++) {
3181 for (vnn = ctdb->vnn; vnn; vnn = vnn->next) {
3182 if (ctdb_same_ip(&vnn->public_address,
3183 &ips->ips[i].addr)) {
3184 /* IP is still in file */
3190 /* Delete IP ips->ips[i] */
3191 struct ctdb_addr_info_old *pub;
3194 ("IP %s no longer configured, deleting it\n",
3195 ctdb_addr_to_str(&ips->ips[i].addr)));
3197 pub = talloc_zero(mem_ctx, struct ctdb_addr_info_old);
3198 CTDB_NO_MEMORY(ctdb, pub);
3200 pub->addr = ips->ips[i].addr;
3204 timeout = TAKEOVER_TIMEOUT();
3206 data.dsize = offsetof(struct ctdb_addr_info_old,
3208 data.dptr = (uint8_t *)pub;
3210 state = ctdb_control_send(ctdb, CTDB_CURRENT_NODE, 0,
3211 CTDB_CONTROL_DEL_PUBLIC_IP,
3212 0, data, async_data,
3214 if (state == NULL) {
3217 " failed sending CTDB_CONTROL_DEL_PUBLIC_IP\n"));
3221 ctdb_client_async_add(async_data, state);
3225 /* Compare IPs between node and file for IPs to be added */
3227 for (vnn = ctdb->vnn; vnn; vnn = vnn->next) {
3228 for (i = 0; i < ips->num; i++) {
3229 if (ctdb_same_ip(&vnn->public_address,
3230 &ips->ips[i].addr)) {
3231 /* IP already on node */
3235 if (i == ips->num) {
3236 /* Add IP ips->ips[i] */
3237 struct ctdb_addr_info_old *pub;
3238 const char *ifaces = NULL;
3243 ("New IP %s configured, adding it\n",
3244 ctdb_addr_to_str(&vnn->public_address)));
3246 uint32_t pnn = ctdb_get_pnn(ctdb);
3248 data.dsize = sizeof(pnn);
3249 data.dptr = (uint8_t *)&pnn;
3251 ret = ctdb_client_send_message(
3253 CTDB_BROADCAST_CONNECTED,
3254 CTDB_SRVID_REBALANCE_NODE,
3257 DEBUG(DEBUG_WARNING,
3258 ("Failed to send message to force node reallocation - IPs may be unbalanced\n"));
3264 ifaces = vnn->ifaces[0];
3266 while (vnn->ifaces[iface] != NULL) {
3267 ifaces = talloc_asprintf(vnn, "%s,%s", ifaces,
3268 vnn->ifaces[iface]);
3272 len = strlen(ifaces) + 1;
3273 pub = talloc_zero_size(mem_ctx,
3274 offsetof(struct ctdb_addr_info_old, iface) + len);
3275 CTDB_NO_MEMORY(ctdb, pub);
3277 pub->addr = vnn->public_address;
3278 pub->mask = vnn->public_netmask_bits;
3280 memcpy(&pub->iface[0], ifaces, pub->len);
3282 timeout = TAKEOVER_TIMEOUT();
3284 data.dsize = offsetof(struct ctdb_addr_info_old,
3286 data.dptr = (uint8_t *)pub;
3288 state = ctdb_control_send(ctdb, CTDB_CURRENT_NODE, 0,
3289 CTDB_CONTROL_ADD_PUBLIC_IP,
3290 0, data, async_data,
3292 if (state == NULL) {
3295 " failed sending CTDB_CONTROL_ADD_PUBLIC_IP\n"));
3299 ctdb_client_async_add(async_data, state);
3303 if (ctdb_client_async_wait(ctdb, async_data) != 0) {
3304 DEBUG(DEBUG_ERR,(__location__ " Add/delete IPs failed\n"));
3308 talloc_free(mem_ctx);
3312 talloc_free(mem_ctx);
3316 /* This control is sent to force the node to re-read the public addresses file
3317 and drop any addresses we should nnot longer host, and add new addresses
3318 that we are now able to host
3320 int32_t ctdb_control_reload_public_ips(struct ctdb_context *ctdb, struct ctdb_req_control_old *c, bool *async_reply)
3322 struct ctdb_reloadips_handle *h;
3323 pid_t parent = getpid();
3325 if (ctdb->reload_ips != NULL) {
3326 talloc_free(ctdb->reload_ips);
3327 ctdb->reload_ips = NULL;
3330 h = talloc(ctdb, struct ctdb_reloadips_handle);
3331 CTDB_NO_MEMORY(ctdb, h);
3336 if (pipe(h->fd) == -1) {
3337 DEBUG(DEBUG_ERR,("Failed to create pipe for ctdb_freeze_lock\n"));
3342 h->child = ctdb_fork(ctdb);
3343 if (h->child == (pid_t)-1) {
3344 DEBUG(DEBUG_ERR, ("Failed to fork a child for reloadips\n"));
3352 if (h->child == 0) {
3353 signed char res = 0;
3356 debug_extra = talloc_asprintf(NULL, "reloadips:");
3358 prctl_set_comment("ctdb_reloadips");
3359 if (switch_from_server_to_client(ctdb, "reloadips-child") != 0) {
3360 DEBUG(DEBUG_CRIT,("ERROR: Failed to switch reloadips child into client mode\n"));
3363 res = ctdb_reloadips_child(ctdb);
3365 DEBUG(DEBUG_ERR,("Failed to reload ips on local node\n"));
3369 sys_write(h->fd[1], &res, 1);
3370 ctdb_wait_for_process_to_exit(parent);
3374 h->c = talloc_steal(h, c);
3377 set_close_on_exec(h->fd[0]);
3379 talloc_set_destructor(h, ctdb_reloadips_destructor);
3382 h->fde = tevent_add_fd(ctdb->ev, h, h->fd[0], TEVENT_FD_READ,
3383 ctdb_reloadips_child_handler, (void *)h);
3384 tevent_fd_set_auto_close(h->fde);
3386 tevent_add_timer(ctdb->ev, h, timeval_current_ofs(120, 0),
3387 ctdb_reloadips_timeout_event, h);
3389 /* we reply later */
3390 *async_reply = true;