4 Copyright (C) Ronnie Sahlberg 2007
5 Copyright (C) Andrew Tridgell 2007
6 Copyright (C) Martin Schwenke 2011
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, see <http://www.gnu.org/licenses/>.
22 #include "system/network.h"
23 #include "system/filesys.h"
24 #include "system/time.h"
25 #include "system/wait.h"
30 #include "lib/util/dlinklist.h"
31 #include "lib/util/debug.h"
32 #include "lib/util/samba_util.h"
33 #include "lib/util/util_process.h"
35 #include "ctdb_private.h"
36 #include "ctdb_client.h"
38 #include "common/rb_tree.h"
39 #include "common/reqid.h"
40 #include "common/system.h"
41 #include "common/common.h"
42 #include "common/logging.h"
44 #include "server/ipalloc.h"
46 #define TAKEOVER_TIMEOUT() timeval_current_ofs(ctdb->tunable.takeover_timeout,0)
48 #define CTDB_ARP_INTERVAL 1
49 #define CTDB_ARP_REPEAT 3
51 struct ctdb_interface {
52 struct ctdb_interface *prev, *next;
58 static const char *ctdb_vnn_iface_string(const struct ctdb_vnn *vnn)
61 return vnn->iface->name;
67 static int ctdb_add_local_iface(struct ctdb_context *ctdb, const char *iface)
69 struct ctdb_interface *i;
71 if (strlen(iface) > CTDB_IFACE_SIZE) {
72 DEBUG(DEBUG_ERR, ("Interface name too long \"%s\"\n", iface));
76 /* Verify that we don't have an entry for this ip yet */
77 for (i=ctdb->ifaces;i;i=i->next) {
78 if (strcmp(i->name, iface) == 0) {
83 /* create a new structure for this interface */
84 i = talloc_zero(ctdb, struct ctdb_interface);
85 CTDB_NO_MEMORY_FATAL(ctdb, i);
86 i->name = talloc_strdup(i, iface);
87 CTDB_NO_MEMORY(ctdb, i->name);
91 DLIST_ADD(ctdb->ifaces, i);
96 static bool vnn_has_interface_with_name(struct ctdb_vnn *vnn,
101 for (n = 0; vnn->ifaces[n] != NULL; n++) {
102 if (strcmp(name, vnn->ifaces[n]) == 0) {
110 /* If any interfaces now have no possible IPs then delete them. This
111 * implementation is naive (i.e. simple) rather than clever
112 * (i.e. complex). Given that this is run on delip and that operation
113 * is rare, this doesn't need to be efficient - it needs to be
114 * foolproof. One alternative is reference counting, where the logic
115 * is distributed and can, therefore, be broken in multiple places.
116 * Another alternative is to build a red-black tree of interfaces that
117 * can have addresses (by walking ctdb->vnn once) and then walking
118 * ctdb->ifaces once and deleting those not in the tree. Let's go to
119 * one of those if the naive implementation causes problems... :-)
121 static void ctdb_remove_orphaned_ifaces(struct ctdb_context *ctdb,
122 struct ctdb_vnn *vnn)
124 struct ctdb_interface *i, *next;
126 /* For each interface, check if there's an IP using it. */
127 for (i = ctdb->ifaces; i != NULL; i = next) {
132 /* Only consider interfaces named in the given VNN. */
133 if (!vnn_has_interface_with_name(vnn, i->name)) {
137 /* Search for a vnn with this interface. */
139 for (tv=ctdb->vnn; tv; tv=tv->next) {
140 if (vnn_has_interface_with_name(tv, i->name)) {
147 /* None of the VNNs are using this interface. */
148 DLIST_REMOVE(ctdb->ifaces, i);
155 static struct ctdb_interface *ctdb_find_iface(struct ctdb_context *ctdb,
158 struct ctdb_interface *i;
160 for (i=ctdb->ifaces;i;i=i->next) {
161 if (strcmp(i->name, iface) == 0) {
169 static struct ctdb_interface *ctdb_vnn_best_iface(struct ctdb_context *ctdb,
170 struct ctdb_vnn *vnn)
173 struct ctdb_interface *cur = NULL;
174 struct ctdb_interface *best = NULL;
176 for (i=0; vnn->ifaces[i]; i++) {
178 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
192 if (cur->references < best->references) {
201 static int32_t ctdb_vnn_assign_iface(struct ctdb_context *ctdb,
202 struct ctdb_vnn *vnn)
204 struct ctdb_interface *best = NULL;
207 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
208 "still assigned to iface '%s'\n",
209 ctdb_addr_to_str(&vnn->public_address),
210 ctdb_vnn_iface_string(vnn)));
214 best = ctdb_vnn_best_iface(ctdb, vnn);
216 DEBUG(DEBUG_ERR, (__location__ " public address '%s' "
217 "cannot assign to iface any iface\n",
218 ctdb_addr_to_str(&vnn->public_address)));
224 vnn->pnn = ctdb->pnn;
226 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
227 "now assigned to iface '%s' refs[%d]\n",
228 ctdb_addr_to_str(&vnn->public_address),
229 ctdb_vnn_iface_string(vnn),
234 static void ctdb_vnn_unassign_iface(struct ctdb_context *ctdb,
235 struct ctdb_vnn *vnn)
237 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
238 "now unassigned (old iface '%s' refs[%d])\n",
239 ctdb_addr_to_str(&vnn->public_address),
240 ctdb_vnn_iface_string(vnn),
241 vnn->iface?vnn->iface->references:0));
243 vnn->iface->references--;
246 if (vnn->pnn == ctdb->pnn) {
251 static bool ctdb_vnn_available(struct ctdb_context *ctdb,
252 struct ctdb_vnn *vnn)
256 /* Nodes that are not RUNNING can not host IPs */
257 if (ctdb->runstate != CTDB_RUNSTATE_RUNNING) {
261 if (vnn->delete_pending) {
265 if (vnn->iface && vnn->iface->link_up) {
269 for (i=0; vnn->ifaces[i]; i++) {
270 struct ctdb_interface *cur;
272 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
285 struct ctdb_takeover_arp {
286 struct ctdb_context *ctdb;
289 struct ctdb_tcp_array *tcparray;
290 struct ctdb_vnn *vnn;
295 lists of tcp endpoints
297 struct ctdb_tcp_list {
298 struct ctdb_tcp_list *prev, *next;
299 struct ctdb_connection connection;
303 list of clients to kill on IP release
305 struct ctdb_client_ip {
306 struct ctdb_client_ip *prev, *next;
307 struct ctdb_context *ctdb;
314 send a gratuitous arp
316 static void ctdb_control_send_arp(struct tevent_context *ev,
317 struct tevent_timer *te,
318 struct timeval t, void *private_data)
320 struct ctdb_takeover_arp *arp = talloc_get_type(private_data,
321 struct ctdb_takeover_arp);
323 struct ctdb_tcp_array *tcparray;
324 const char *iface = ctdb_vnn_iface_string(arp->vnn);
326 ret = ctdb_sys_send_arp(&arp->addr, iface);
328 DEBUG(DEBUG_CRIT,(__location__ " sending of arp failed on iface '%s' (%s)\n",
329 iface, strerror(errno)));
332 tcparray = arp->tcparray;
334 for (i=0;i<tcparray->num;i++) {
335 struct ctdb_connection *tcon;
337 tcon = &tcparray->connections[i];
338 DEBUG(DEBUG_INFO,("sending tcp tickle ack for %u->%s:%u\n",
339 (unsigned)ntohs(tcon->dst.ip.sin_port),
340 ctdb_addr_to_str(&tcon->src),
341 (unsigned)ntohs(tcon->src.ip.sin_port)));
342 ret = ctdb_sys_send_tcp(
347 DEBUG(DEBUG_CRIT,(__location__ " Failed to send tcp tickle ack for %s\n",
348 ctdb_addr_to_str(&tcon->src)));
355 if (arp->count == CTDB_ARP_REPEAT) {
360 tevent_add_timer(arp->ctdb->ev, arp->vnn->takeover_ctx,
361 timeval_current_ofs(CTDB_ARP_INTERVAL, 100000),
362 ctdb_control_send_arp, arp);
365 static int32_t ctdb_announce_vnn_iface(struct ctdb_context *ctdb,
366 struct ctdb_vnn *vnn)
368 struct ctdb_takeover_arp *arp;
369 struct ctdb_tcp_array *tcparray;
371 if (!vnn->takeover_ctx) {
372 vnn->takeover_ctx = talloc_new(vnn);
373 if (!vnn->takeover_ctx) {
378 arp = talloc_zero(vnn->takeover_ctx, struct ctdb_takeover_arp);
384 arp->addr = vnn->public_address;
387 tcparray = vnn->tcp_array;
389 /* add all of the known tcp connections for this IP to the
390 list of tcp connections to send tickle acks for */
391 arp->tcparray = talloc_steal(arp, tcparray);
393 vnn->tcp_array = NULL;
394 vnn->tcp_update_needed = true;
397 tevent_add_timer(arp->ctdb->ev, vnn->takeover_ctx,
398 timeval_zero(), ctdb_control_send_arp, arp);
403 struct takeover_callback_state {
404 struct ctdb_req_control_old *c;
405 ctdb_sock_addr *addr;
406 struct ctdb_vnn *vnn;
409 struct ctdb_do_takeip_state {
410 struct ctdb_req_control_old *c;
411 struct ctdb_vnn *vnn;
415 called when takeip event finishes
417 static void ctdb_do_takeip_callback(struct ctdb_context *ctdb, int status,
420 struct ctdb_do_takeip_state *state =
421 talloc_get_type(private_data, struct ctdb_do_takeip_state);
426 if (status == -ETIME) {
429 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
430 ctdb_addr_to_str(&state->vnn->public_address),
431 ctdb_vnn_iface_string(state->vnn)));
432 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
438 if (ctdb->do_checkpublicip) {
440 ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
442 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
449 data.dptr = (uint8_t *)ctdb_addr_to_str(&state->vnn->public_address);
450 data.dsize = strlen((char *)data.dptr) + 1;
451 DEBUG(DEBUG_INFO,(__location__ " sending TAKE_IP for '%s'\n", data.dptr));
453 ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_TAKE_IP, data);
456 /* the control succeeded */
457 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
462 static int ctdb_takeip_destructor(struct ctdb_do_takeip_state *state)
464 state->vnn->update_in_flight = false;
469 take over an ip address
471 static int32_t ctdb_do_takeip(struct ctdb_context *ctdb,
472 struct ctdb_req_control_old *c,
473 struct ctdb_vnn *vnn)
476 struct ctdb_do_takeip_state *state;
478 if (vnn->update_in_flight) {
479 DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u rejected "
480 "update for this IP already in flight\n",
481 ctdb_addr_to_str(&vnn->public_address),
482 vnn->public_netmask_bits));
486 ret = ctdb_vnn_assign_iface(ctdb, vnn);
488 DEBUG(DEBUG_ERR,("Takeover of IP %s/%u failed to "
489 "assign a usable interface\n",
490 ctdb_addr_to_str(&vnn->public_address),
491 vnn->public_netmask_bits));
495 state = talloc(vnn, struct ctdb_do_takeip_state);
496 CTDB_NO_MEMORY(ctdb, state);
498 state->c = talloc_steal(ctdb, c);
501 vnn->update_in_flight = true;
502 talloc_set_destructor(state, ctdb_takeip_destructor);
504 DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u on interface %s\n",
505 ctdb_addr_to_str(&vnn->public_address),
506 vnn->public_netmask_bits,
507 ctdb_vnn_iface_string(vnn)));
509 ret = ctdb_event_script_callback(ctdb,
511 ctdb_do_takeip_callback,
515 ctdb_vnn_iface_string(vnn),
516 ctdb_addr_to_str(&vnn->public_address),
517 vnn->public_netmask_bits);
520 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
521 ctdb_addr_to_str(&vnn->public_address),
522 ctdb_vnn_iface_string(vnn)));
530 struct ctdb_do_updateip_state {
531 struct ctdb_req_control_old *c;
532 struct ctdb_interface *old;
533 struct ctdb_vnn *vnn;
537 called when updateip event finishes
539 static void ctdb_do_updateip_callback(struct ctdb_context *ctdb, int status,
542 struct ctdb_do_updateip_state *state =
543 talloc_get_type(private_data, struct ctdb_do_updateip_state);
547 if (status == -ETIME) {
550 DEBUG(DEBUG_ERR,(__location__ " Failed to move IP %s from interface %s to %s\n",
551 ctdb_addr_to_str(&state->vnn->public_address),
553 ctdb_vnn_iface_string(state->vnn)));
556 * All we can do is reset the old interface
557 * and let the next run fix it
559 ctdb_vnn_unassign_iface(ctdb, state->vnn);
560 state->vnn->iface = state->old;
561 state->vnn->iface->references++;
563 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
568 if (ctdb->do_checkpublicip) {
570 ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
572 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
579 /* the control succeeded */
580 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
585 static int ctdb_updateip_destructor(struct ctdb_do_updateip_state *state)
587 state->vnn->update_in_flight = false;
592 update (move) an ip address
594 static int32_t ctdb_do_updateip(struct ctdb_context *ctdb,
595 struct ctdb_req_control_old *c,
596 struct ctdb_vnn *vnn)
599 struct ctdb_do_updateip_state *state;
600 struct ctdb_interface *old = vnn->iface;
601 const char *new_name;
603 if (vnn->update_in_flight) {
604 DEBUG(DEBUG_NOTICE,("Update of IP %s/%u rejected "
605 "update for this IP already in flight\n",
606 ctdb_addr_to_str(&vnn->public_address),
607 vnn->public_netmask_bits));
611 ctdb_vnn_unassign_iface(ctdb, vnn);
612 ret = ctdb_vnn_assign_iface(ctdb, vnn);
614 DEBUG(DEBUG_ERR,("update of IP %s/%u failed to "
615 "assin a usable interface (old iface '%s')\n",
616 ctdb_addr_to_str(&vnn->public_address),
617 vnn->public_netmask_bits,
622 new_name = ctdb_vnn_iface_string(vnn);
623 if (old->name != NULL && new_name != NULL && !strcmp(old->name, new_name)) {
624 /* A benign update from one interface onto itself.
625 * no need to run the eventscripts in this case, just return
628 ctdb_request_control_reply(ctdb, c, NULL, 0, NULL);
632 state = talloc(vnn, struct ctdb_do_updateip_state);
633 CTDB_NO_MEMORY(ctdb, state);
635 state->c = talloc_steal(ctdb, c);
639 vnn->update_in_flight = true;
640 talloc_set_destructor(state, ctdb_updateip_destructor);
642 DEBUG(DEBUG_NOTICE,("Update of IP %s/%u from "
643 "interface %s to %s\n",
644 ctdb_addr_to_str(&vnn->public_address),
645 vnn->public_netmask_bits,
649 ret = ctdb_event_script_callback(ctdb,
651 ctdb_do_updateip_callback,
653 CTDB_EVENT_UPDATE_IP,
657 ctdb_addr_to_str(&vnn->public_address),
658 vnn->public_netmask_bits);
660 DEBUG(DEBUG_ERR,(__location__ " Failed update IP %s from interface %s to %s\n",
661 ctdb_addr_to_str(&vnn->public_address),
662 old->name, new_name));
671 Find the vnn of the node that has a public ip address
672 returns -1 if the address is not known as a public address
674 static struct ctdb_vnn *find_public_ip_vnn(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
676 struct ctdb_vnn *vnn;
678 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
679 if (ctdb_same_ip(&vnn->public_address, addr)) {
688 take over an ip address
690 int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb,
691 struct ctdb_req_control_old *c,
696 struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
697 struct ctdb_vnn *vnn;
698 bool have_ip = false;
699 bool do_updateip = false;
700 bool do_takeip = false;
701 struct ctdb_interface *best_iface = NULL;
703 if (pip->pnn != ctdb->pnn) {
704 DEBUG(DEBUG_ERR,(__location__" takeoverip called for an ip '%s' "
705 "with pnn %d, but we're node %d\n",
706 ctdb_addr_to_str(&pip->addr),
707 pip->pnn, ctdb->pnn));
711 /* update out vnn list */
712 vnn = find_public_ip_vnn(ctdb, &pip->addr);
714 DEBUG(DEBUG_INFO,("takeoverip called for an ip '%s' that is not a public address\n",
715 ctdb_addr_to_str(&pip->addr)));
719 if (ctdb->tunable.disable_ip_failover == 0 && ctdb->do_checkpublicip) {
720 have_ip = ctdb_sys_have_ip(&pip->addr);
722 best_iface = ctdb_vnn_best_iface(ctdb, vnn);
723 if (best_iface == NULL) {
724 DEBUG(DEBUG_ERR,("takeoverip of IP %s/%u failed to find"
725 "a usable interface (old %s, have_ip %d)\n",
726 ctdb_addr_to_str(&vnn->public_address),
727 vnn->public_netmask_bits,
728 ctdb_vnn_iface_string(vnn),
733 if (vnn->iface == NULL && vnn->pnn == -1 && have_ip && best_iface != NULL) {
734 DEBUG(DEBUG_ERR,("Taking over newly created ip\n"));
739 if (vnn->iface == NULL && have_ip) {
740 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
741 "but we have no interface assigned, has someone manually configured it? Ignore for now.\n",
742 ctdb_addr_to_str(&vnn->public_address)));
746 if (vnn->pnn != ctdb->pnn && have_ip && vnn->pnn != -1) {
747 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
748 "and we have it on iface[%s], but it was assigned to node %d"
749 "and we are node %d, banning ourself\n",
750 ctdb_addr_to_str(&vnn->public_address),
751 ctdb_vnn_iface_string(vnn), vnn->pnn, ctdb->pnn));
756 if (vnn->pnn == -1 && have_ip) {
757 vnn->pnn = ctdb->pnn;
758 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
759 "and we already have it on iface[%s], update local daemon\n",
760 ctdb_addr_to_str(&vnn->public_address),
761 ctdb_vnn_iface_string(vnn)));
766 if (vnn->iface != best_iface) {
767 if (!vnn->iface->link_up) {
769 } else if (vnn->iface->references > (best_iface->references + 1)) {
770 /* only move when the rebalance gains something */
778 ctdb_vnn_unassign_iface(ctdb, vnn);
785 ret = ctdb_do_takeip(ctdb, c, vnn);
789 } else if (do_updateip) {
790 ret = ctdb_do_updateip(ctdb, c, vnn);
796 * The interface is up and the kernel known the ip
799 DEBUG(DEBUG_INFO,("Redundant takeover of IP %s/%u on interface %s (ip already held)\n",
800 ctdb_addr_to_str(&pip->addr),
801 vnn->public_netmask_bits,
802 ctdb_vnn_iface_string(vnn)));
806 /* tell ctdb_control.c that we will be replying asynchronously */
812 static void do_delete_ip(struct ctdb_context *ctdb, struct ctdb_vnn *vnn)
814 DLIST_REMOVE(ctdb->vnn, vnn);
815 ctdb_vnn_unassign_iface(ctdb, vnn);
816 ctdb_remove_orphaned_ifaces(ctdb, vnn);
821 called when releaseip event finishes
823 static void release_ip_callback(struct ctdb_context *ctdb, int status,
826 struct takeover_callback_state *state =
827 talloc_get_type(private_data, struct takeover_callback_state);
830 if (status == -ETIME) {
834 if (ctdb->tunable.disable_ip_failover == 0 && ctdb->do_checkpublicip) {
835 if (ctdb_sys_have_ip(state->addr)) {
837 ("IP %s still hosted during release IP callback, failing\n",
838 ctdb_addr_to_str(state->addr)));
839 ctdb_request_control_reply(ctdb, state->c,
846 /* send a message to all clients of this node telling them
847 that the cluster has been reconfigured and they should
848 release any sockets on this IP */
849 data.dptr = (uint8_t *)talloc_strdup(state, ctdb_addr_to_str(state->addr));
850 CTDB_NO_MEMORY_VOID(ctdb, data.dptr);
851 data.dsize = strlen((char *)data.dptr)+1;
853 DEBUG(DEBUG_INFO,(__location__ " sending RELEASE_IP for '%s'\n", data.dptr));
855 ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_RELEASE_IP, data);
857 ctdb_vnn_unassign_iface(ctdb, state->vnn);
859 /* Process the IP if it has been marked for deletion */
860 if (state->vnn->delete_pending) {
861 do_delete_ip(ctdb, state->vnn);
865 /* the control succeeded */
866 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
870 static int ctdb_releaseip_destructor(struct takeover_callback_state *state)
872 if (state->vnn != NULL) {
873 state->vnn->update_in_flight = false;
879 release an ip address
881 int32_t ctdb_control_release_ip(struct ctdb_context *ctdb,
882 struct ctdb_req_control_old *c,
887 struct takeover_callback_state *state;
888 struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
889 struct ctdb_vnn *vnn;
892 /* update our vnn list */
893 vnn = find_public_ip_vnn(ctdb, &pip->addr);
895 DEBUG(DEBUG_INFO,("releaseip called for an ip '%s' that is not a public address\n",
896 ctdb_addr_to_str(&pip->addr)));
901 /* stop any previous arps */
902 talloc_free(vnn->takeover_ctx);
903 vnn->takeover_ctx = NULL;
905 /* Some ctdb tool commands (e.g. moveip) send
906 * lazy multicast to drop an IP from any node that isn't the
907 * intended new node. The following causes makes ctdbd ignore
908 * a release for any address it doesn't host.
910 if (ctdb->tunable.disable_ip_failover == 0 && ctdb->do_checkpublicip) {
911 if (!ctdb_sys_have_ip(&pip->addr)) {
912 DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u on interface %s (ip not held)\n",
913 ctdb_addr_to_str(&pip->addr),
914 vnn->public_netmask_bits,
915 ctdb_vnn_iface_string(vnn)));
916 ctdb_vnn_unassign_iface(ctdb, vnn);
920 if (vnn->iface == NULL) {
921 DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u (ip not held)\n",
922 ctdb_addr_to_str(&pip->addr),
923 vnn->public_netmask_bits));
928 /* There is a potential race between take_ip and us because we
929 * update the VNN via a callback that run when the
930 * eventscripts have been run. Avoid the race by allowing one
931 * update to be in flight at a time.
933 if (vnn->update_in_flight) {
934 DEBUG(DEBUG_NOTICE,("Release of IP %s/%u rejected "
935 "update for this IP already in flight\n",
936 ctdb_addr_to_str(&vnn->public_address),
937 vnn->public_netmask_bits));
941 iface = strdup(ctdb_vnn_iface_string(vnn));
943 DEBUG(DEBUG_NOTICE,("Release of IP %s/%u on interface %s node:%d\n",
944 ctdb_addr_to_str(&pip->addr),
945 vnn->public_netmask_bits,
949 state = talloc(ctdb, struct takeover_callback_state);
951 ctdb_set_error(ctdb, "Out of memory at %s:%d",
957 state->c = talloc_steal(state, c);
958 state->addr = talloc(state, ctdb_sock_addr);
959 if (state->addr == NULL) {
960 ctdb_set_error(ctdb, "Out of memory at %s:%d",
966 *state->addr = pip->addr;
969 vnn->update_in_flight = true;
970 talloc_set_destructor(state, ctdb_releaseip_destructor);
972 ret = ctdb_event_script_callback(ctdb,
973 state, release_ip_callback, state,
974 CTDB_EVENT_RELEASE_IP,
977 ctdb_addr_to_str(&pip->addr),
978 vnn->public_netmask_bits);
981 DEBUG(DEBUG_ERR,(__location__ " Failed to release IP %s on interface %s\n",
982 ctdb_addr_to_str(&pip->addr),
983 ctdb_vnn_iface_string(vnn)));
988 /* tell the control that we will be reply asynchronously */
993 static int ctdb_add_public_address(struct ctdb_context *ctdb,
994 ctdb_sock_addr *addr,
995 unsigned mask, const char *ifaces,
998 struct ctdb_vnn *vnn;
1005 tmp = strdup(ifaces);
1006 for (iface = strtok(tmp, ","); iface; iface = strtok(NULL, ",")) {
1007 if (!ctdb_sys_check_iface_exists(iface)) {
1008 DEBUG(DEBUG_CRIT,("Interface %s does not exist. Can not add public-address : %s\n", iface, ctdb_addr_to_str(addr)));
1015 /* Verify that we don't have an entry for this ip yet */
1016 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1017 if (ctdb_same_sockaddr(addr, &vnn->public_address)) {
1018 DEBUG(DEBUG_CRIT,("Same ip '%s' specified multiple times in the public address list \n",
1019 ctdb_addr_to_str(addr)));
1024 /* create a new vnn structure for this ip address */
1025 vnn = talloc_zero(ctdb, struct ctdb_vnn);
1026 CTDB_NO_MEMORY_FATAL(ctdb, vnn);
1027 vnn->ifaces = talloc_array(vnn, const char *, num + 2);
1028 tmp = talloc_strdup(vnn, ifaces);
1029 CTDB_NO_MEMORY_FATAL(ctdb, tmp);
1030 for (iface = strtok(tmp, ","); iface; iface = strtok(NULL, ",")) {
1031 vnn->ifaces = talloc_realloc(vnn, vnn->ifaces, const char *, num + 2);
1032 CTDB_NO_MEMORY_FATAL(ctdb, vnn->ifaces);
1033 vnn->ifaces[num] = talloc_strdup(vnn, iface);
1034 CTDB_NO_MEMORY_FATAL(ctdb, vnn->ifaces[num]);
1038 vnn->ifaces[num] = NULL;
1039 vnn->public_address = *addr;
1040 vnn->public_netmask_bits = mask;
1042 if (check_address) {
1043 if (ctdb_sys_have_ip(addr)) {
1044 DEBUG(DEBUG_ERR,("We are already hosting public address '%s'. setting PNN to ourself:%d\n", ctdb_addr_to_str(addr), ctdb->pnn));
1045 vnn->pnn = ctdb->pnn;
1049 for (i=0; vnn->ifaces[i]; i++) {
1050 ret = ctdb_add_local_iface(ctdb, vnn->ifaces[i]);
1052 DEBUG(DEBUG_CRIT, (__location__ " failed to add iface[%s] "
1053 "for public_address[%s]\n",
1054 vnn->ifaces[i], ctdb_addr_to_str(addr)));
1060 DLIST_ADD(ctdb->vnn, vnn);
1066 setup the public address lists from a file
1068 int ctdb_set_public_addresses(struct ctdb_context *ctdb, bool check_addresses)
1074 lines = file_lines_load(ctdb->public_addresses_file, &nlines, 0, ctdb);
1075 if (lines == NULL) {
1076 ctdb_set_error(ctdb, "Failed to load public address list '%s'\n", ctdb->public_addresses_file);
1079 while (nlines > 0 && strcmp(lines[nlines-1], "") == 0) {
1083 for (i=0;i<nlines;i++) {
1085 ctdb_sock_addr addr;
1086 const char *addrstr;
1091 while ((*line == ' ') || (*line == '\t')) {
1097 if (strcmp(line, "") == 0) {
1100 tok = strtok(line, " \t");
1102 tok = strtok(NULL, " \t");
1104 if (NULL == ctdb->default_public_interface) {
1105 DEBUG(DEBUG_CRIT,("No default public interface and no interface specified at line %u of public address list\n",
1110 ifaces = ctdb->default_public_interface;
1115 if (!addrstr || !parse_ip_mask(addrstr, ifaces, &addr, &mask)) {
1116 DEBUG(DEBUG_CRIT,("Badly formed line %u in public address list\n", i+1));
1120 if (ctdb_add_public_address(ctdb, &addr, mask, ifaces, check_addresses)) {
1121 DEBUG(DEBUG_CRIT,("Failed to add line %u to the public address list\n", i+1));
1132 static void *add_ip_callback(void *parm, void *data)
1134 struct public_ip_list *this_ip = parm;
1135 struct public_ip_list *prev_ip = data;
1137 if (prev_ip == NULL) {
1140 if (this_ip->pnn == -1) {
1141 this_ip->pnn = prev_ip->pnn;
1147 static int getips_count_callback(void *param, void *data)
1149 struct public_ip_list **ip_list = (struct public_ip_list **)param;
1150 struct public_ip_list *new_ip = (struct public_ip_list *)data;
1152 new_ip->next = *ip_list;
1157 static int ctdb_reload_remote_public_ips(struct ctdb_context *ctdb,
1158 struct ipalloc_state *ipalloc_state,
1159 struct ctdb_node_map_old *nodemap)
1163 struct ctdb_public_ip_list_old *ip_list;
1165 if (ipalloc_state->num != nodemap->num) {
1168 " ipalloc_state->num (%d) != nodemap->num (%d) invalid param\n",
1169 ipalloc_state->num, nodemap->num));
1173 for (j=0; j<nodemap->num; j++) {
1174 if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) {
1178 /* Retrieve the list of known public IPs from the node */
1179 ret = ctdb_ctrl_get_public_ips_flags(ctdb,
1182 ipalloc_state->known_public_ips,
1187 ("Failed to read known public IPs from node: %u\n",
1191 ipalloc_state->known_public_ips[j].num = ip_list->num;
1192 /* This could be copied and freed. However, ip_list
1193 * is allocated off ipalloc_state->known_public_ips,
1194 * so this is a safe hack. This will go away in a
1195 * while anyway... */
1196 ipalloc_state->known_public_ips[j].ip = &ip_list->ips[0];
1198 /* Retrieve the list of available public IPs from the node */
1199 ret = ctdb_ctrl_get_public_ips_flags(ctdb,
1202 ipalloc_state->available_public_ips,
1203 CTDB_PUBLIC_IP_FLAGS_ONLY_AVAILABLE,
1207 ("Failed to read available public IPs from node: %u\n",
1211 ipalloc_state->available_public_ips[j].num = ip_list->num;
1212 /* This could be copied and freed. However, ip_list
1213 * is allocated off ipalloc_state->available_public_ips,
1214 * so this is a safe hack. This will go away in a
1215 * while anyway... */
1216 ipalloc_state->available_public_ips[j].ip = &ip_list->ips[0];
1222 static struct public_ip_list *
1223 create_merged_ip_list(struct ctdb_context *ctdb, struct ipalloc_state *ipalloc_state)
1226 struct public_ip_list *ip_list;
1227 struct ctdb_public_ip_list *public_ips;
1229 TALLOC_FREE(ctdb->ip_tree);
1230 ctdb->ip_tree = trbt_create(ctdb, 0);
1232 if (ipalloc_state->known_public_ips == NULL) {
1233 DEBUG(DEBUG_ERR, ("Known public IPs not set\n"));
1237 for (i=0; i < ipalloc_state->num; i++) {
1239 public_ips = &ipalloc_state->known_public_ips[i];
1241 for (j=0; j < public_ips->num; j++) {
1242 struct public_ip_list *tmp_ip;
1244 tmp_ip = talloc_zero(ctdb->ip_tree, struct public_ip_list);
1245 if (tmp_ip == NULL) {
1247 (__location__ " out of memory\n"));
1251 /* Do not use information about IP addresses hosted
1252 * on other nodes, it may not be accurate */
1253 if (public_ips->ip[j].pnn == i) {
1254 tmp_ip->pnn = public_ips->ip[j].pnn;
1258 tmp_ip->addr = public_ips->ip[j].addr;
1259 tmp_ip->next = NULL;
1261 trbt_insertarray32_callback(ctdb->ip_tree,
1262 IP_KEYLEN, ip_key(&public_ips->ip[j].addr),
1269 trbt_traversearray32(ctdb->ip_tree, IP_KEYLEN, getips_count_callback, &ip_list);
1274 static bool all_nodes_are_disabled(struct ctdb_node_map_old *nodemap)
1278 for (i=0;i<nodemap->num;i++) {
1279 if (!(nodemap->nodes[i].flags & (NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED))) {
1280 /* Found one completely healthy node */
1288 struct get_tunable_callback_data {
1289 const char *tunable;
1294 static void get_tunable_callback(struct ctdb_context *ctdb, uint32_t pnn,
1295 int32_t res, TDB_DATA outdata,
1298 struct get_tunable_callback_data *cd =
1299 (struct get_tunable_callback_data *)callback;
1303 /* Already handled in fail callback */
1307 if (outdata.dsize != sizeof(uint32_t)) {
1308 DEBUG(DEBUG_ERR,("Wrong size of returned data when reading \"%s\" tunable from node %d. Expected %d bytes but received %d bytes\n",
1309 cd->tunable, pnn, (int)sizeof(uint32_t),
1310 (int)outdata.dsize));
1315 size = talloc_array_length(cd->out);
1317 DEBUG(DEBUG_ERR,("Got %s reply from node %d but nodemap only has %d entries\n",
1318 cd->tunable, pnn, size));
1323 cd->out[pnn] = *(uint32_t *)outdata.dptr;
1326 static void get_tunable_fail_callback(struct ctdb_context *ctdb, uint32_t pnn,
1327 int32_t res, TDB_DATA outdata,
1330 struct get_tunable_callback_data *cd =
1331 (struct get_tunable_callback_data *)callback;
1336 ("Timed out getting tunable \"%s\" from node %d\n",
1342 DEBUG(DEBUG_WARNING,
1343 ("Tunable \"%s\" not implemented on node %d\n",
1348 ("Unexpected error getting tunable \"%s\" from node %d\n",
1354 static uint32_t *get_tunable_from_nodes(struct ctdb_context *ctdb,
1355 TALLOC_CTX *tmp_ctx,
1356 struct ctdb_node_map_old *nodemap,
1357 const char *tunable,
1358 uint32_t default_value)
1361 struct ctdb_control_get_tunable *t;
1364 struct get_tunable_callback_data callback_data;
1367 tvals = talloc_array(tmp_ctx, uint32_t, nodemap->num);
1368 CTDB_NO_MEMORY_NULL(ctdb, tvals);
1369 for (i=0; i<nodemap->num; i++) {
1370 tvals[i] = default_value;
1373 callback_data.out = tvals;
1374 callback_data.tunable = tunable;
1375 callback_data.fatal = false;
1377 data.dsize = offsetof(struct ctdb_control_get_tunable, name) + strlen(tunable) + 1;
1378 data.dptr = talloc_size(tmp_ctx, data.dsize);
1379 t = (struct ctdb_control_get_tunable *)data.dptr;
1380 t->length = strlen(tunable)+1;
1381 memcpy(t->name, tunable, t->length);
1382 nodes = list_of_connected_nodes(ctdb, nodemap, tmp_ctx, true);
1383 if (ctdb_client_async_control(ctdb, CTDB_CONTROL_GET_TUNABLE,
1384 nodes, 0, TAKEOVER_TIMEOUT(),
1386 get_tunable_callback,
1387 get_tunable_fail_callback,
1388 &callback_data) != 0) {
1389 if (callback_data.fatal) {
1395 talloc_free(data.dptr);
1400 /* Set internal flags for IP allocation:
1402 * Set NOIPTAKOVER ip flags from per-node NoIPTakeover tunable
1403 * Set NOIPHOST ip flag for each INACTIVE node
1404 * if all nodes are disabled:
1405 * Set NOIPHOST ip flags from per-node NoIPHostOnAllDisabled tunable
1407 * Set NOIPHOST ip flags for disabled nodes
1409 static void set_ipflags_internal(struct ipalloc_state *ipalloc_state,
1410 struct ctdb_node_map_old *nodemap,
1411 uint32_t *tval_noiptakeover,
1412 uint32_t *tval_noiphostonalldisabled)
1416 for (i=0;i<nodemap->num;i++) {
1417 /* Can not take IPs on node with NoIPTakeover set */
1418 if (tval_noiptakeover[i] != 0) {
1419 ipalloc_state->noiptakeover[i] = true;
1422 /* Can not host IPs on INACTIVE node */
1423 if (nodemap->nodes[i].flags & NODE_FLAGS_INACTIVE) {
1424 ipalloc_state->noiphost[i] = true;
1428 if (all_nodes_are_disabled(nodemap)) {
1429 /* If all nodes are disabled, can not host IPs on node
1430 * with NoIPHostOnAllDisabled set
1432 for (i=0;i<nodemap->num;i++) {
1433 if (tval_noiphostonalldisabled[i] != 0) {
1434 ipalloc_state->noiphost[i] = true;
1438 /* If some nodes are not disabled, then can not host
1439 * IPs on DISABLED node
1441 for (i=0;i<nodemap->num;i++) {
1442 if (nodemap->nodes[i].flags & NODE_FLAGS_DISABLED) {
1443 ipalloc_state->noiphost[i] = true;
1449 static bool set_ipflags(struct ctdb_context *ctdb,
1450 struct ipalloc_state *ipalloc_state,
1451 struct ctdb_node_map_old *nodemap)
1453 uint32_t *tval_noiptakeover;
1454 uint32_t *tval_noiphostonalldisabled;
1456 tval_noiptakeover = get_tunable_from_nodes(ctdb, ipalloc_state, nodemap,
1458 if (tval_noiptakeover == NULL) {
1462 tval_noiphostonalldisabled =
1463 get_tunable_from_nodes(ctdb, ipalloc_state, nodemap,
1464 "NoIPHostOnAllDisabled", 0);
1465 if (tval_noiphostonalldisabled == NULL) {
1466 /* Caller frees tmp_ctx */
1470 set_ipflags_internal(ipalloc_state, nodemap,
1472 tval_noiphostonalldisabled);
1474 talloc_free(tval_noiptakeover);
1475 talloc_free(tval_noiphostonalldisabled);
1480 static struct ipalloc_state * ipalloc_state_init(struct ctdb_context *ctdb,
1481 TALLOC_CTX *mem_ctx)
1483 struct ipalloc_state *ipalloc_state =
1484 talloc_zero(mem_ctx, struct ipalloc_state);
1485 if (ipalloc_state == NULL) {
1486 DEBUG(DEBUG_ERR, (__location__ " Out of memory\n"));
1490 ipalloc_state->num = ctdb->num_nodes;
1492 ipalloc_state->known_public_ips =
1493 talloc_zero_array(ipalloc_state,
1494 struct ctdb_public_ip_list,
1495 ipalloc_state->num);
1496 if (ipalloc_state->known_public_ips == NULL) {
1497 DEBUG(DEBUG_ERR, (__location__ " Out of memory\n"));
1501 ipalloc_state->available_public_ips =
1502 talloc_zero_array(ipalloc_state,
1503 struct ctdb_public_ip_list,
1504 ipalloc_state->num);
1505 if (ipalloc_state->available_public_ips == NULL) {
1506 DEBUG(DEBUG_ERR, (__location__ " Out of memory\n"));
1509 ipalloc_state->noiptakeover =
1510 talloc_zero_array(ipalloc_state,
1512 ipalloc_state->num);
1513 if (ipalloc_state->noiptakeover == NULL) {
1514 DEBUG(DEBUG_ERR, (__location__ " Out of memory\n"));
1517 ipalloc_state->noiphost =
1518 talloc_zero_array(ipalloc_state,
1520 ipalloc_state->num);
1521 if (ipalloc_state->noiphost == NULL) {
1522 DEBUG(DEBUG_ERR, (__location__ " Out of memory\n"));
1526 if (1 == ctdb->tunable.lcp2_public_ip_assignment) {
1527 ipalloc_state->algorithm = IPALLOC_LCP2;
1528 } else if (1 == ctdb->tunable.deterministic_public_ips) {
1529 ipalloc_state->algorithm = IPALLOC_DETERMINISTIC;
1531 ipalloc_state->algorithm = IPALLOC_NONDETERMINISTIC;
1534 ipalloc_state->no_ip_failback = ctdb->tunable.no_ip_failback;
1536 return ipalloc_state;
1538 talloc_free(ipalloc_state);
1542 struct takeover_callback_data {
1544 unsigned int *fail_count;
1547 static struct takeover_callback_data *
1548 takeover_callback_data_init(TALLOC_CTX *mem_ctx,
1551 static struct takeover_callback_data *takeover_data;
1553 takeover_data = talloc_zero(mem_ctx, struct takeover_callback_data);
1554 if (takeover_data == NULL) {
1555 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
1559 takeover_data->fail_count = talloc_zero_array(takeover_data,
1560 unsigned int, num_nodes);
1561 if (takeover_data->fail_count == NULL) {
1562 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
1563 talloc_free(takeover_data);
1567 takeover_data->num_nodes = num_nodes;
1569 return takeover_data;
1572 static void takeover_run_fail_callback(struct ctdb_context *ctdb,
1573 uint32_t node_pnn, int32_t res,
1574 TDB_DATA outdata, void *callback_data)
1576 struct takeover_callback_data *cd =
1577 talloc_get_type_abort(callback_data,
1578 struct takeover_callback_data);
1580 if (node_pnn >= cd->num_nodes) {
1581 DEBUG(DEBUG_ERR, (__location__ " invalid PNN %u\n", node_pnn));
1585 if (cd->fail_count[node_pnn] == 0) {
1587 ("Node %u failed the takeover run\n", node_pnn));
1590 cd->fail_count[node_pnn]++;
1593 static void takeover_run_process_failures(struct ctdb_context *ctdb,
1594 struct takeover_callback_data *tcd)
1596 unsigned int max_fails = 0;
1597 uint32_t max_pnn = -1;
1600 for (i = 0; i < tcd->num_nodes; i++) {
1601 if (tcd->fail_count[i] > max_fails) {
1603 max_fails = tcd->fail_count[i];
1607 if (max_fails > 0) {
1612 ("Sending banning credits to %u with fail count %u\n",
1613 max_pnn, max_fails));
1615 data.dptr = (uint8_t *)&max_pnn;
1616 data.dsize = sizeof(uint32_t);
1617 ret = ctdb_client_send_message(ctdb,
1618 CTDB_BROADCAST_CONNECTED,
1623 ("Failed to set banning credits for node %u\n",
1630 * Recalculate the allocation of public IPs to nodes and have the
1631 * nodes host their allocated addresses.
1633 * - Allocate memory for IP allocation state, including per node
1635 * - Populate IP allocation algorithm in IP allocation state
1636 * - Populate local value of tunable NoIPFailback in IP allocation
1637 state - this is really a cluster-wide configuration variable and
1638 only the value form the master node is used
1639 * - Retrieve tunables NoIPTakeover and NoIPHostOnAllDisabled from all
1640 * connected nodes - this is done separately so tunable values can
1641 * be faked in unit testing
1642 * - Populate NoIPTakover tunable in IP allocation state
1643 * - Populate NoIPHost in IP allocation state, derived from node flags
1644 * and NoIPHostOnAllDisabled tunable
1645 * - Retrieve and populate known and available IP lists in IP
1647 * - If no available IP addresses then early exit
1648 * - Build list of (known IPs, currently assigned node)
1649 * - Populate list of nodes to force rebalance - internal structure,
1650 * currently no way to fetch, only used by LCP2 for nodes that have
1651 * had new IP addresses added
1652 * - Run IP allocation algorithm
1653 * - Send RELEASE_IP to all nodes for IPs they should not host
1654 * - Send TAKE_IP to all nodes for IPs they should host
1655 * - Send IPREALLOCATED to all nodes (with backward compatibility hack)
1657 int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map_old *nodemap,
1658 uint32_t *force_rebalance_nodes)
1661 struct ctdb_public_ip ip;
1663 struct public_ip_list *all_ips, *tmp_ip;
1665 struct timeval timeout;
1666 struct client_async_data *async_data;
1667 struct ctdb_client_control_state *state;
1668 TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
1669 struct ipalloc_state *ipalloc_state;
1670 struct takeover_callback_data *takeover_data;
1673 /* Initialise fail callback data to be used with
1674 * takeover_run_fail_callback(). A failure in any of the
1675 * following steps will cause an early return, so this can be
1676 * reused for each of those steps without re-initialising. */
1677 takeover_data = takeover_callback_data_init(tmp_ctx,
1679 if (takeover_data == NULL) {
1680 talloc_free(tmp_ctx);
1685 * ip failover is completely disabled, just send out the
1686 * ipreallocated event.
1688 if (ctdb->tunable.disable_ip_failover != 0) {
1692 ipalloc_state = ipalloc_state_init(ctdb, tmp_ctx);
1693 if (ipalloc_state == NULL) {
1694 talloc_free(tmp_ctx);
1698 if (!set_ipflags(ctdb, ipalloc_state, nodemap)) {
1699 DEBUG(DEBUG_ERR,("Failed to set IP flags - aborting takeover run\n"));
1700 talloc_free(tmp_ctx);
1704 /* Fetch known/available public IPs from each active node */
1705 ret = ctdb_reload_remote_public_ips(ctdb, ipalloc_state, nodemap);
1707 talloc_free(tmp_ctx);
1711 /* Short-circuit IP allocation if no node has available IPs */
1712 can_host_ips = false;
1713 for (i=0; i < ipalloc_state->num; i++) {
1714 if (ipalloc_state->available_public_ips[i].num != 0) {
1715 can_host_ips = true;
1718 if (!can_host_ips) {
1719 DEBUG(DEBUG_WARNING,("No nodes available to host public IPs yet\n"));
1723 /* since nodes only know about those public addresses that
1724 can be served by that particular node, no single node has
1725 a full list of all public addresses that exist in the cluster.
1726 Walk over all node structures and create a merged list of
1727 all public addresses that exist in the cluster.
1729 keep the tree of ips around as ctdb->ip_tree
1731 all_ips = create_merged_ip_list(ctdb, ipalloc_state);
1732 ipalloc_state->all_ips = all_ips;
1734 ipalloc_state->force_rebalance_nodes = force_rebalance_nodes;
1736 /* Do the IP reassignment calculations */
1737 ipalloc(ipalloc_state);
1739 /* Now tell all nodes to release any public IPs should not
1740 * host. This will be a NOOP on nodes that don't currently
1741 * hold the given IP.
1743 async_data = talloc_zero(tmp_ctx, struct client_async_data);
1744 CTDB_NO_MEMORY_FATAL(ctdb, async_data);
1746 async_data->fail_callback = takeover_run_fail_callback;
1747 async_data->callback_data = takeover_data;
1749 ZERO_STRUCT(ip); /* Avoid valgrind warnings for union */
1751 /* Send a RELEASE_IP to all nodes that should not be hosting
1752 * each IP. For each IP, all but one of these will be
1753 * redundant. However, the redundant ones are used to tell
1754 * nodes which node should be hosting the IP so that commands
1755 * like "ctdb ip" can display a particular nodes idea of who
1756 * is hosting what. */
1757 for (i=0;i<nodemap->num;i++) {
1758 /* don't talk to unconnected nodes, but do talk to banned nodes */
1759 if (nodemap->nodes[i].flags & NODE_FLAGS_DISCONNECTED) {
1763 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1764 if (tmp_ip->pnn == nodemap->nodes[i].pnn) {
1765 /* This node should be serving this
1766 vnn so don't tell it to release the ip
1770 ip.pnn = tmp_ip->pnn;
1771 ip.addr = tmp_ip->addr;
1773 timeout = TAKEOVER_TIMEOUT();
1774 data.dsize = sizeof(ip);
1775 data.dptr = (uint8_t *)&ip;
1776 state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
1777 0, CTDB_CONTROL_RELEASE_IP, 0,
1780 if (state == NULL) {
1781 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_RELEASE_IP to node %u\n", nodemap->nodes[i].pnn));
1782 talloc_free(tmp_ctx);
1786 ctdb_client_async_add(async_data, state);
1789 if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1791 ("Async control CTDB_CONTROL_RELEASE_IP failed\n"));
1794 talloc_free(async_data);
1797 /* For each IP, send a TAKOVER_IP to the node that should be
1798 * hosting it. Many of these will often be redundant (since
1799 * the allocation won't have changed) but they can be useful
1800 * to recover from inconsistencies. */
1801 async_data = talloc_zero(tmp_ctx, struct client_async_data);
1802 CTDB_NO_MEMORY_FATAL(ctdb, async_data);
1804 async_data->fail_callback = takeover_run_fail_callback;
1805 async_data->callback_data = takeover_data;
1807 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1808 if (tmp_ip->pnn == -1) {
1809 /* this IP won't be taken over */
1813 ip.pnn = tmp_ip->pnn;
1814 ip.addr = tmp_ip->addr;
1816 timeout = TAKEOVER_TIMEOUT();
1817 data.dsize = sizeof(ip);
1818 data.dptr = (uint8_t *)&ip;
1819 state = ctdb_control_send(ctdb, tmp_ip->pnn,
1820 0, CTDB_CONTROL_TAKEOVER_IP, 0,
1821 data, async_data, &timeout, NULL);
1822 if (state == NULL) {
1823 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_TAKEOVER_IP to node %u\n", tmp_ip->pnn));
1824 talloc_free(tmp_ctx);
1828 ctdb_client_async_add(async_data, state);
1830 if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1832 ("Async control CTDB_CONTROL_TAKEOVER_IP failed\n"));
1838 * Tell all nodes to run eventscripts to process the
1839 * "ipreallocated" event. This can do a lot of things,
1840 * including restarting services to reconfigure them if public
1841 * IPs have moved. Once upon a time this event only used to
1844 nodes = list_of_connected_nodes(ctdb, nodemap, tmp_ctx, true);
1845 ret = ctdb_client_async_control(ctdb, CTDB_CONTROL_IPREALLOCATED,
1846 nodes, 0, TAKEOVER_TIMEOUT(),
1848 NULL, takeover_run_fail_callback,
1852 ("Async CTDB_CONTROL_IPREALLOCATED control failed\n"));
1856 talloc_free(tmp_ctx);
1860 takeover_run_process_failures(ctdb, takeover_data);
1861 talloc_free(tmp_ctx);
1867 destroy a ctdb_client_ip structure
1869 static int ctdb_client_ip_destructor(struct ctdb_client_ip *ip)
1871 DEBUG(DEBUG_DEBUG,("destroying client tcp for %s:%u (client_id %u)\n",
1872 ctdb_addr_to_str(&ip->addr),
1873 ntohs(ip->addr.ip.sin_port),
1876 DLIST_REMOVE(ip->ctdb->client_ip_list, ip);
1881 called by a client to inform us of a TCP connection that it is managing
1882 that should tickled with an ACK when IP takeover is done
1884 int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id,
1887 struct ctdb_client *client = reqid_find(ctdb->idr, client_id, struct ctdb_client);
1888 struct ctdb_connection *tcp_sock = NULL;
1889 struct ctdb_tcp_list *tcp;
1890 struct ctdb_connection t;
1893 struct ctdb_client_ip *ip;
1894 struct ctdb_vnn *vnn;
1895 ctdb_sock_addr addr;
1897 /* If we don't have public IPs, tickles are useless */
1898 if (ctdb->vnn == NULL) {
1902 tcp_sock = (struct ctdb_connection *)indata.dptr;
1904 addr = tcp_sock->src;
1905 ctdb_canonicalize_ip(&addr, &tcp_sock->src);
1906 addr = tcp_sock->dst;
1907 ctdb_canonicalize_ip(&addr, &tcp_sock->dst);
1910 memcpy(&addr, &tcp_sock->dst, sizeof(addr));
1911 vnn = find_public_ip_vnn(ctdb, &addr);
1913 switch (addr.sa.sa_family) {
1915 if (ntohl(addr.ip.sin_addr.s_addr) != INADDR_LOOPBACK) {
1916 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public address.\n",
1917 ctdb_addr_to_str(&addr)));
1921 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public ipv6 address.\n",
1922 ctdb_addr_to_str(&addr)));
1925 DEBUG(DEBUG_ERR,(__location__ " Unknown family type %d\n", addr.sa.sa_family));
1931 if (vnn->pnn != ctdb->pnn) {
1932 DEBUG(DEBUG_ERR,("Attempt to register tcp client for IP %s we don't hold - failing (client_id %u pid %u)\n",
1933 ctdb_addr_to_str(&addr),
1934 client_id, client->pid));
1935 /* failing this call will tell smbd to die */
1939 ip = talloc(client, struct ctdb_client_ip);
1940 CTDB_NO_MEMORY(ctdb, ip);
1944 ip->client_id = client_id;
1945 talloc_set_destructor(ip, ctdb_client_ip_destructor);
1946 DLIST_ADD(ctdb->client_ip_list, ip);
1948 tcp = talloc(client, struct ctdb_tcp_list);
1949 CTDB_NO_MEMORY(ctdb, tcp);
1951 tcp->connection.src = tcp_sock->src;
1952 tcp->connection.dst = tcp_sock->dst;
1954 DLIST_ADD(client->tcp_list, tcp);
1956 t.src = tcp_sock->src;
1957 t.dst = tcp_sock->dst;
1959 data.dptr = (uint8_t *)&t;
1960 data.dsize = sizeof(t);
1962 switch (addr.sa.sa_family) {
1964 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1965 (unsigned)ntohs(tcp_sock->dst.ip.sin_port),
1966 ctdb_addr_to_str(&tcp_sock->src),
1967 (unsigned)ntohs(tcp_sock->src.ip.sin_port), client_id, client->pid));
1970 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1971 (unsigned)ntohs(tcp_sock->dst.ip6.sin6_port),
1972 ctdb_addr_to_str(&tcp_sock->src),
1973 (unsigned)ntohs(tcp_sock->src.ip6.sin6_port), client_id, client->pid));
1976 DEBUG(DEBUG_ERR,(__location__ " Unknown family %d\n", addr.sa.sa_family));
1980 /* tell all nodes about this tcp connection */
1981 ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0,
1982 CTDB_CONTROL_TCP_ADD,
1983 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
1985 DEBUG(DEBUG_ERR,(__location__ " Failed to send CTDB_CONTROL_TCP_ADD\n"));
1993 find a tcp address on a list
1995 static struct ctdb_connection *ctdb_tcp_find(struct ctdb_tcp_array *array,
1996 struct ctdb_connection *tcp)
2000 if (array == NULL) {
2004 for (i=0;i<array->num;i++) {
2005 if (ctdb_same_sockaddr(&array->connections[i].src, &tcp->src) &&
2006 ctdb_same_sockaddr(&array->connections[i].dst, &tcp->dst)) {
2007 return &array->connections[i];
2016 called by a daemon to inform us of a TCP connection that one of its
2017 clients managing that should tickled with an ACK when IP takeover is
2020 int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata, bool tcp_update_needed)
2022 struct ctdb_connection *p = (struct ctdb_connection *)indata.dptr;
2023 struct ctdb_tcp_array *tcparray;
2024 struct ctdb_connection tcp;
2025 struct ctdb_vnn *vnn;
2027 /* If we don't have public IPs, tickles are useless */
2028 if (ctdb->vnn == NULL) {
2032 vnn = find_public_ip_vnn(ctdb, &p->dst);
2034 DEBUG(DEBUG_INFO,(__location__ " got TCP_ADD control for an address which is not a public address '%s'\n",
2035 ctdb_addr_to_str(&p->dst)));
2041 tcparray = vnn->tcp_array;
2043 /* If this is the first tickle */
2044 if (tcparray == NULL) {
2045 tcparray = talloc(vnn, struct ctdb_tcp_array);
2046 CTDB_NO_MEMORY(ctdb, tcparray);
2047 vnn->tcp_array = tcparray;
2050 tcparray->connections = talloc_size(tcparray, sizeof(struct ctdb_connection));
2051 CTDB_NO_MEMORY(ctdb, tcparray->connections);
2053 tcparray->connections[tcparray->num].src = p->src;
2054 tcparray->connections[tcparray->num].dst = p->dst;
2057 if (tcp_update_needed) {
2058 vnn->tcp_update_needed = true;
2064 /* Do we already have this tickle ?*/
2067 if (ctdb_tcp_find(tcparray, &tcp) != NULL) {
2068 DEBUG(DEBUG_DEBUG,("Already had tickle info for %s:%u for vnn:%u\n",
2069 ctdb_addr_to_str(&tcp.dst),
2070 ntohs(tcp.dst.ip.sin_port),
2075 /* A new tickle, we must add it to the array */
2076 tcparray->connections = talloc_realloc(tcparray, tcparray->connections,
2077 struct ctdb_connection,
2079 CTDB_NO_MEMORY(ctdb, tcparray->connections);
2081 tcparray->connections[tcparray->num].src = p->src;
2082 tcparray->connections[tcparray->num].dst = p->dst;
2085 DEBUG(DEBUG_INFO,("Added tickle info for %s:%u from vnn %u\n",
2086 ctdb_addr_to_str(&tcp.dst),
2087 ntohs(tcp.dst.ip.sin_port),
2090 if (tcp_update_needed) {
2091 vnn->tcp_update_needed = true;
2098 static void ctdb_remove_connection(struct ctdb_vnn *vnn, struct ctdb_connection *conn)
2100 struct ctdb_connection *tcpp;
2106 /* if the array is empty we cant remove it
2107 and we don't need to do anything
2109 if (vnn->tcp_array == NULL) {
2110 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist (array is empty) %s:%u\n",
2111 ctdb_addr_to_str(&conn->dst),
2112 ntohs(conn->dst.ip.sin_port)));
2117 /* See if we know this connection
2118 if we don't know this connection then we dont need to do anything
2120 tcpp = ctdb_tcp_find(vnn->tcp_array, conn);
2122 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist %s:%u\n",
2123 ctdb_addr_to_str(&conn->dst),
2124 ntohs(conn->dst.ip.sin_port)));
2129 /* We need to remove this entry from the array.
2130 Instead of allocating a new array and copying data to it
2131 we cheat and just copy the last entry in the existing array
2132 to the entry that is to be removed and just shring the
2135 *tcpp = vnn->tcp_array->connections[vnn->tcp_array->num - 1];
2136 vnn->tcp_array->num--;
2138 /* If we deleted the last entry we also need to remove the entire array
2140 if (vnn->tcp_array->num == 0) {
2141 talloc_free(vnn->tcp_array);
2142 vnn->tcp_array = NULL;
2145 vnn->tcp_update_needed = true;
2147 DEBUG(DEBUG_INFO,("Removed tickle info for %s:%u\n",
2148 ctdb_addr_to_str(&conn->src),
2149 ntohs(conn->src.ip.sin_port)));
2154 called by a daemon to inform us of a TCP connection that one of its
2155 clients used are no longer needed in the tickle database
2157 int32_t ctdb_control_tcp_remove(struct ctdb_context *ctdb, TDB_DATA indata)
2159 struct ctdb_vnn *vnn;
2160 struct ctdb_connection *conn = (struct ctdb_connection *)indata.dptr;
2162 /* If we don't have public IPs, tickles are useless */
2163 if (ctdb->vnn == NULL) {
2167 vnn = find_public_ip_vnn(ctdb, &conn->dst);
2170 (__location__ " unable to find public address %s\n",
2171 ctdb_addr_to_str(&conn->dst)));
2175 ctdb_remove_connection(vnn, conn);
2182 Called when another daemon starts - causes all tickles for all
2183 public addresses we are serving to be sent to the new node on the
2184 next check. This actually causes the next scheduled call to
2185 tdb_update_tcp_tickles() to update all nodes. This is simple and
2186 doesn't require careful error handling.
2188 int32_t ctdb_control_startup(struct ctdb_context *ctdb, uint32_t pnn)
2190 struct ctdb_vnn *vnn;
2192 DEBUG(DEBUG_INFO, ("Received startup control from node %lu\n",
2193 (unsigned long) pnn));
2195 for (vnn = ctdb->vnn; vnn != NULL; vnn = vnn->next) {
2196 vnn->tcp_update_needed = true;
2204 called when a client structure goes away - hook to remove
2205 elements from the tcp_list in all daemons
2207 void ctdb_takeover_client_destructor_hook(struct ctdb_client *client)
2209 while (client->tcp_list) {
2210 struct ctdb_vnn *vnn;
2211 struct ctdb_tcp_list *tcp = client->tcp_list;
2212 struct ctdb_connection *conn = &tcp->connection;
2214 DLIST_REMOVE(client->tcp_list, tcp);
2216 vnn = find_public_ip_vnn(client->ctdb,
2220 (__location__ " unable to find public address %s\n",
2221 ctdb_addr_to_str(&conn->dst)));
2225 /* If the IP address is hosted on this node then
2226 * remove the connection. */
2227 if (vnn->pnn == client->ctdb->pnn) {
2228 ctdb_remove_connection(vnn, conn);
2231 /* Otherwise this function has been called because the
2232 * server IP address has been released to another node
2233 * and the client has exited. This means that we
2234 * should not delete the connection information. The
2235 * takeover node processes connections too. */
2240 void ctdb_release_all_ips(struct ctdb_context *ctdb)
2242 struct ctdb_vnn *vnn;
2246 if (ctdb->tunable.disable_ip_failover == 1) {
2250 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2251 if (!ctdb_sys_have_ip(&vnn->public_address)) {
2252 ctdb_vnn_unassign_iface(ctdb, vnn);
2259 /* Don't allow multiple releases at once. Some code,
2260 * particularly ctdb_tickle_sentenced_connections() is
2262 if (vnn->update_in_flight) {
2263 DEBUG(DEBUG_WARNING,
2265 " Not releasing IP %s/%u on interface %s, an update is already in progess\n",
2266 ctdb_addr_to_str(&vnn->public_address),
2267 vnn->public_netmask_bits,
2268 ctdb_vnn_iface_string(vnn)));
2271 vnn->update_in_flight = true;
2273 DEBUG(DEBUG_INFO,("Release of IP %s/%u on interface %s node:-1\n",
2274 ctdb_addr_to_str(&vnn->public_address),
2275 vnn->public_netmask_bits,
2276 ctdb_vnn_iface_string(vnn)));
2278 ctdb_event_script_args(ctdb, CTDB_EVENT_RELEASE_IP, "%s %s %u",
2279 ctdb_vnn_iface_string(vnn),
2280 ctdb_addr_to_str(&vnn->public_address),
2281 vnn->public_netmask_bits);
2283 data.dptr = (uint8_t *)talloc_strdup(
2284 vnn, ctdb_addr_to_str(&vnn->public_address));
2285 if (data.dptr != NULL) {
2286 data.dsize = strlen((char *)data.dptr) + 1;
2287 ctdb_daemon_send_message(ctdb, ctdb->pnn,
2288 CTDB_SRVID_RELEASE_IP, data);
2289 talloc_free(data.dptr);
2292 ctdb_vnn_unassign_iface(ctdb, vnn);
2293 vnn->update_in_flight = false;
2297 DEBUG(DEBUG_NOTICE,(__location__ " Released %d public IPs\n", count));
2302 get list of public IPs
2304 int32_t ctdb_control_get_public_ips(struct ctdb_context *ctdb,
2305 struct ctdb_req_control_old *c, TDB_DATA *outdata)
2308 struct ctdb_public_ip_list_old *ips;
2309 struct ctdb_vnn *vnn;
2310 bool only_available = false;
2312 if (c->flags & CTDB_PUBLIC_IP_FLAGS_ONLY_AVAILABLE) {
2313 only_available = true;
2316 /* count how many public ip structures we have */
2318 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2322 len = offsetof(struct ctdb_public_ip_list_old, ips) +
2323 num*sizeof(struct ctdb_public_ip);
2324 ips = talloc_zero_size(outdata, len);
2325 CTDB_NO_MEMORY(ctdb, ips);
2328 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2329 if (only_available && !ctdb_vnn_available(ctdb, vnn)) {
2332 ips->ips[i].pnn = vnn->pnn;
2333 ips->ips[i].addr = vnn->public_address;
2337 len = offsetof(struct ctdb_public_ip_list_old, ips) +
2338 i*sizeof(struct ctdb_public_ip);
2340 outdata->dsize = len;
2341 outdata->dptr = (uint8_t *)ips;
2347 int32_t ctdb_control_get_public_ip_info(struct ctdb_context *ctdb,
2348 struct ctdb_req_control_old *c,
2353 ctdb_sock_addr *addr;
2354 struct ctdb_public_ip_info_old *info;
2355 struct ctdb_vnn *vnn;
2357 addr = (ctdb_sock_addr *)indata.dptr;
2359 vnn = find_public_ip_vnn(ctdb, addr);
2361 DEBUG(DEBUG_ERR,(__location__ " Could not get public ip info, "
2362 "'%s'not a public address\n",
2363 ctdb_addr_to_str(addr)));
2367 /* count how many public ip structures we have */
2369 for (;vnn->ifaces[num];) {
2373 len = offsetof(struct ctdb_public_ip_info_old, ifaces) +
2374 num*sizeof(struct ctdb_iface);
2375 info = talloc_zero_size(outdata, len);
2376 CTDB_NO_MEMORY(ctdb, info);
2378 info->ip.addr = vnn->public_address;
2379 info->ip.pnn = vnn->pnn;
2380 info->active_idx = 0xFFFFFFFF;
2382 for (i=0; vnn->ifaces[i]; i++) {
2383 struct ctdb_interface *cur;
2385 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
2387 DEBUG(DEBUG_CRIT, (__location__ " internal error iface[%s] unknown\n",
2391 if (vnn->iface == cur) {
2392 info->active_idx = i;
2394 strncpy(info->ifaces[i].name, cur->name,
2395 sizeof(info->ifaces[i].name));
2396 info->ifaces[i].name[sizeof(info->ifaces[i].name)-1] = '\0';
2397 info->ifaces[i].link_state = cur->link_up;
2398 info->ifaces[i].references = cur->references;
2401 len = offsetof(struct ctdb_public_ip_info_old, ifaces) +
2402 i*sizeof(struct ctdb_iface);
2404 outdata->dsize = len;
2405 outdata->dptr = (uint8_t *)info;
2410 int32_t ctdb_control_get_ifaces(struct ctdb_context *ctdb,
2411 struct ctdb_req_control_old *c,
2415 struct ctdb_iface_list_old *ifaces;
2416 struct ctdb_interface *cur;
2418 /* count how many public ip structures we have */
2420 for (cur=ctdb->ifaces;cur;cur=cur->next) {
2424 len = offsetof(struct ctdb_iface_list_old, ifaces) +
2425 num*sizeof(struct ctdb_iface);
2426 ifaces = talloc_zero_size(outdata, len);
2427 CTDB_NO_MEMORY(ctdb, ifaces);
2430 for (cur=ctdb->ifaces;cur;cur=cur->next) {
2431 strncpy(ifaces->ifaces[i].name, cur->name,
2432 sizeof(ifaces->ifaces[i].name));
2433 ifaces->ifaces[i].name[sizeof(ifaces->ifaces[i].name)-1] = '\0';
2434 ifaces->ifaces[i].link_state = cur->link_up;
2435 ifaces->ifaces[i].references = cur->references;
2439 len = offsetof(struct ctdb_iface_list_old, ifaces) +
2440 i*sizeof(struct ctdb_iface);
2442 outdata->dsize = len;
2443 outdata->dptr = (uint8_t *)ifaces;
2448 int32_t ctdb_control_set_iface_link(struct ctdb_context *ctdb,
2449 struct ctdb_req_control_old *c,
2452 struct ctdb_iface *info;
2453 struct ctdb_interface *iface;
2454 bool link_up = false;
2456 info = (struct ctdb_iface *)indata.dptr;
2458 if (info->name[CTDB_IFACE_SIZE] != '\0') {
2459 int len = strnlen(info->name, CTDB_IFACE_SIZE);
2460 DEBUG(DEBUG_ERR, (__location__ " name[%*.*s] not terminated\n",
2461 len, len, info->name));
2465 switch (info->link_state) {
2473 DEBUG(DEBUG_ERR, (__location__ " link_state[%u] invalid\n",
2474 (unsigned int)info->link_state));
2478 if (info->references != 0) {
2479 DEBUG(DEBUG_ERR, (__location__ " references[%u] should be 0\n",
2480 (unsigned int)info->references));
2484 iface = ctdb_find_iface(ctdb, info->name);
2485 if (iface == NULL) {
2489 if (link_up == iface->link_up) {
2493 DEBUG(iface->link_up?DEBUG_ERR:DEBUG_NOTICE,
2494 ("iface[%s] has changed it's link status %s => %s\n",
2496 iface->link_up?"up":"down",
2497 link_up?"up":"down"));
2499 iface->link_up = link_up;
2505 called by a daemon to inform us of the entire list of TCP tickles for
2506 a particular public address.
2507 this control should only be sent by the node that is currently serving
2508 that public address.
2510 int32_t ctdb_control_set_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata)
2512 struct ctdb_tickle_list_old *list = (struct ctdb_tickle_list_old *)indata.dptr;
2513 struct ctdb_tcp_array *tcparray;
2514 struct ctdb_vnn *vnn;
2516 /* We must at least have tickles.num or else we cant verify the size
2517 of the received data blob
2519 if (indata.dsize < offsetof(struct ctdb_tickle_list_old, connections)) {
2520 DEBUG(DEBUG_ERR,("Bad indata in ctdb_tickle_list. Not enough data for the tickle.num field\n"));
2524 /* verify that the size of data matches what we expect */
2525 if (indata.dsize < offsetof(struct ctdb_tickle_list_old, connections)
2526 + sizeof(struct ctdb_connection) * list->num) {
2527 DEBUG(DEBUG_ERR,("Bad indata in ctdb_tickle_list\n"));
2531 DEBUG(DEBUG_INFO, ("Received tickle update for public address %s\n",
2532 ctdb_addr_to_str(&list->addr)));
2534 vnn = find_public_ip_vnn(ctdb, &list->addr);
2536 DEBUG(DEBUG_INFO,(__location__ " Could not set tcp tickle list, '%s' is not a public address\n",
2537 ctdb_addr_to_str(&list->addr)));
2542 if (vnn->pnn == ctdb->pnn) {
2544 ("Ignoring redundant set tcp tickle list, this node hosts '%s'\n",
2545 ctdb_addr_to_str(&list->addr)));
2549 /* remove any old ticklelist we might have */
2550 talloc_free(vnn->tcp_array);
2551 vnn->tcp_array = NULL;
2553 tcparray = talloc(vnn, struct ctdb_tcp_array);
2554 CTDB_NO_MEMORY(ctdb, tcparray);
2556 tcparray->num = list->num;
2558 tcparray->connections = talloc_array(tcparray, struct ctdb_connection, tcparray->num);
2559 CTDB_NO_MEMORY(ctdb, tcparray->connections);
2561 memcpy(tcparray->connections, &list->connections[0],
2562 sizeof(struct ctdb_connection)*tcparray->num);
2564 /* We now have a new fresh tickle list array for this vnn */
2565 vnn->tcp_array = tcparray;
2571 called to return the full list of tickles for the puclic address associated
2572 with the provided vnn
2574 int32_t ctdb_control_get_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata)
2576 ctdb_sock_addr *addr = (ctdb_sock_addr *)indata.dptr;
2577 struct ctdb_tickle_list_old *list;
2578 struct ctdb_tcp_array *tcparray;
2580 struct ctdb_vnn *vnn;
2583 vnn = find_public_ip_vnn(ctdb, addr);
2585 DEBUG(DEBUG_ERR,(__location__ " Could not get tcp tickle list, '%s' is not a public address\n",
2586 ctdb_addr_to_str(addr)));
2591 port = ctdb_addr_to_port(addr);
2593 tcparray = vnn->tcp_array;
2595 if (tcparray != NULL) {
2597 /* All connections */
2598 num = tcparray->num;
2600 /* Count connections for port */
2601 for (i = 0; i < tcparray->num; i++) {
2602 if (port == ctdb_addr_to_port(&tcparray->connections[i].dst)) {
2609 outdata->dsize = offsetof(struct ctdb_tickle_list_old, connections)
2610 + sizeof(struct ctdb_connection) * num;
2612 outdata->dptr = talloc_size(outdata, outdata->dsize);
2613 CTDB_NO_MEMORY(ctdb, outdata->dptr);
2614 list = (struct ctdb_tickle_list_old *)outdata->dptr;
2624 for (i = 0; i < tcparray->num; i++) {
2626 port == ctdb_addr_to_port(&tcparray->connections[i].dst)) {
2627 list->connections[num] = tcparray->connections[i];
2637 set the list of all tcp tickles for a public address
2639 static int ctdb_send_set_tcp_tickles_for_ip(struct ctdb_context *ctdb,
2640 ctdb_sock_addr *addr,
2641 struct ctdb_tcp_array *tcparray)
2645 struct ctdb_tickle_list_old *list;
2648 num = tcparray->num;
2653 data.dsize = offsetof(struct ctdb_tickle_list_old, connections) +
2654 sizeof(struct ctdb_connection) * num;
2655 data.dptr = talloc_size(ctdb, data.dsize);
2656 CTDB_NO_MEMORY(ctdb, data.dptr);
2658 list = (struct ctdb_tickle_list_old *)data.dptr;
2662 memcpy(&list->connections[0], tcparray->connections, sizeof(struct ctdb_connection) * num);
2665 ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_ALL, 0,
2666 CTDB_CONTROL_SET_TCP_TICKLE_LIST,
2667 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
2669 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set tcp tickles failed\n"));
2673 talloc_free(data.dptr);
2680 perform tickle updates if required
2682 static void ctdb_update_tcp_tickles(struct tevent_context *ev,
2683 struct tevent_timer *te,
2684 struct timeval t, void *private_data)
2686 struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
2688 struct ctdb_vnn *vnn;
2690 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2691 /* we only send out updates for public addresses that
2694 if (ctdb->pnn != vnn->pnn) {
2697 /* We only send out the updates if we need to */
2698 if (!vnn->tcp_update_needed) {
2701 ret = ctdb_send_set_tcp_tickles_for_ip(ctdb,
2702 &vnn->public_address,
2705 DEBUG(DEBUG_ERR,("Failed to send the tickle update for public address %s\n",
2706 ctdb_addr_to_str(&vnn->public_address)));
2709 ("Sent tickle update for public address %s\n",
2710 ctdb_addr_to_str(&vnn->public_address)));
2711 vnn->tcp_update_needed = false;
2715 tevent_add_timer(ctdb->ev, ctdb->tickle_update_context,
2716 timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
2717 ctdb_update_tcp_tickles, ctdb);
2721 start periodic update of tcp tickles
2723 void ctdb_start_tcp_tickle_update(struct ctdb_context *ctdb)
2725 ctdb->tickle_update_context = talloc_new(ctdb);
2727 tevent_add_timer(ctdb->ev, ctdb->tickle_update_context,
2728 timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
2729 ctdb_update_tcp_tickles, ctdb);
2735 struct control_gratious_arp {
2736 struct ctdb_context *ctdb;
2737 ctdb_sock_addr addr;
2743 send a control_gratuitous arp
2745 static void send_gratious_arp(struct tevent_context *ev,
2746 struct tevent_timer *te,
2747 struct timeval t, void *private_data)
2750 struct control_gratious_arp *arp = talloc_get_type(private_data,
2751 struct control_gratious_arp);
2753 ret = ctdb_sys_send_arp(&arp->addr, arp->iface);
2755 DEBUG(DEBUG_ERR,(__location__ " sending of gratious arp on iface '%s' failed (%s)\n",
2756 arp->iface, strerror(errno)));
2761 if (arp->count == CTDB_ARP_REPEAT) {
2766 tevent_add_timer(arp->ctdb->ev, arp,
2767 timeval_current_ofs(CTDB_ARP_INTERVAL, 0),
2768 send_gratious_arp, arp);
2775 int32_t ctdb_control_send_gratious_arp(struct ctdb_context *ctdb, TDB_DATA indata)
2777 struct ctdb_addr_info_old *gratious_arp = (struct ctdb_addr_info_old *)indata.dptr;
2778 struct control_gratious_arp *arp;
2780 /* verify the size of indata */
2781 if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
2782 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_gratious_arp structure. Got %u require %u bytes\n",
2783 (unsigned)indata.dsize,
2784 (unsigned)offsetof(struct ctdb_addr_info_old, iface)));
2788 ( offsetof(struct ctdb_addr_info_old, iface)
2789 + gratious_arp->len ) ){
2791 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2792 "but should be %u bytes\n",
2793 (unsigned)indata.dsize,
2794 (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+gratious_arp->len)));
2799 arp = talloc(ctdb, struct control_gratious_arp);
2800 CTDB_NO_MEMORY(ctdb, arp);
2803 arp->addr = gratious_arp->addr;
2804 arp->iface = talloc_strdup(arp, gratious_arp->iface);
2805 CTDB_NO_MEMORY(ctdb, arp->iface);
2808 tevent_add_timer(arp->ctdb->ev, arp,
2809 timeval_zero(), send_gratious_arp, arp);
2814 int32_t ctdb_control_add_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2816 struct ctdb_addr_info_old *pub = (struct ctdb_addr_info_old *)indata.dptr;
2819 /* verify the size of indata */
2820 if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
2821 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_addr_info structure\n"));
2825 ( offsetof(struct ctdb_addr_info_old, iface)
2828 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2829 "but should be %u bytes\n",
2830 (unsigned)indata.dsize,
2831 (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+pub->len)));
2835 DEBUG(DEBUG_NOTICE,("Add IP %s\n", ctdb_addr_to_str(&pub->addr)));
2837 ret = ctdb_add_public_address(ctdb, &pub->addr, pub->mask, &pub->iface[0], true);
2840 DEBUG(DEBUG_ERR,(__location__ " Failed to add public address\n"));
2847 struct delete_ip_callback_state {
2848 struct ctdb_req_control_old *c;
2852 called when releaseip event finishes for del_public_address
2854 static void delete_ip_callback(struct ctdb_context *ctdb,
2855 int32_t status, TDB_DATA data,
2856 const char *errormsg,
2859 struct delete_ip_callback_state *state =
2860 talloc_get_type(private_data, struct delete_ip_callback_state);
2862 /* If release failed then fail. */
2863 ctdb_request_control_reply(ctdb, state->c, NULL, status, errormsg);
2864 talloc_free(private_data);
2867 int32_t ctdb_control_del_public_address(struct ctdb_context *ctdb,
2868 struct ctdb_req_control_old *c,
2869 TDB_DATA indata, bool *async_reply)
2871 struct ctdb_addr_info_old *pub = (struct ctdb_addr_info_old *)indata.dptr;
2872 struct ctdb_vnn *vnn;
2874 /* verify the size of indata */
2875 if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
2876 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_addr_info structure\n"));
2880 ( offsetof(struct ctdb_addr_info_old, iface)
2883 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2884 "but should be %u bytes\n",
2885 (unsigned)indata.dsize,
2886 (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+pub->len)));
2890 DEBUG(DEBUG_NOTICE,("Delete IP %s\n", ctdb_addr_to_str(&pub->addr)));
2892 /* walk over all public addresses until we find a match */
2893 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2894 if (ctdb_same_ip(&vnn->public_address, &pub->addr)) {
2895 if (vnn->pnn == ctdb->pnn) {
2896 struct delete_ip_callback_state *state;
2897 struct ctdb_public_ip *ip;
2901 vnn->delete_pending = true;
2903 state = talloc(ctdb,
2904 struct delete_ip_callback_state);
2905 CTDB_NO_MEMORY(ctdb, state);
2908 ip = talloc(state, struct ctdb_public_ip);
2911 (__location__ " Out of memory\n"));
2916 ip->addr = pub->addr;
2918 data.dsize = sizeof(struct ctdb_public_ip);
2919 data.dptr = (unsigned char *)ip;
2921 ret = ctdb_daemon_send_control(ctdb,
2924 CTDB_CONTROL_RELEASE_IP,
2931 (__location__ "Unable to send "
2932 "CTDB_CONTROL_RELEASE_IP\n"));
2937 state->c = talloc_steal(state, c);
2938 *async_reply = true;
2940 /* This IP is not hosted on the
2941 * current node so just delete it
2943 do_delete_ip(ctdb, vnn);
2950 DEBUG(DEBUG_ERR,("Delete IP of unknown public IP address %s\n",
2951 ctdb_addr_to_str(&pub->addr)));
2956 struct ipreallocated_callback_state {
2957 struct ctdb_req_control_old *c;
2960 static void ctdb_ipreallocated_callback(struct ctdb_context *ctdb,
2961 int status, void *p)
2963 struct ipreallocated_callback_state *state =
2964 talloc_get_type(p, struct ipreallocated_callback_state);
2968 (" \"ipreallocated\" event script failed (status %d)\n",
2970 if (status == -ETIME) {
2971 ctdb_ban_self(ctdb);
2975 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
2979 /* A control to run the ipreallocated event */
2980 int32_t ctdb_control_ipreallocated(struct ctdb_context *ctdb,
2981 struct ctdb_req_control_old *c,
2985 struct ipreallocated_callback_state *state;
2987 state = talloc(ctdb, struct ipreallocated_callback_state);
2988 CTDB_NO_MEMORY(ctdb, state);
2990 DEBUG(DEBUG_INFO,(__location__ " Running \"ipreallocated\" event\n"));
2992 ret = ctdb_event_script_callback(ctdb, state,
2993 ctdb_ipreallocated_callback, state,
2994 CTDB_EVENT_IPREALLOCATED,
2998 DEBUG(DEBUG_ERR,("Failed to run \"ipreallocated\" event \n"));
3003 /* tell the control that we will be reply asynchronously */
3004 state->c = talloc_steal(state, c);
3005 *async_reply = true;
3011 int update_ip_assignment_tree(struct ctdb_context *ctdb, struct ctdb_public_ip *ip)
3013 struct public_ip_list *tmp_ip;
3015 /* IP tree is never built if DisableIPFailover is set */
3016 if (ctdb->tunable.disable_ip_failover != 0) {
3020 if (ctdb->ip_tree == NULL) {
3021 DEBUG(DEBUG_ERR,("No ctdb->ip_tree yet. Failed to update ip assignment\n"));
3025 tmp_ip = trbt_lookuparray32(ctdb->ip_tree, IP_KEYLEN, ip_key(&ip->addr));
3026 if (tmp_ip == NULL) {
3027 DEBUG(DEBUG_ERR,(__location__ " Could not find record for address %s, update ip\n", ctdb_addr_to_str(&ip->addr)));
3031 DEBUG(DEBUG_NOTICE,("Updated ip assignment tree for ip : %s from node %u to node %u\n", ctdb_addr_to_str(&ip->addr), tmp_ip->pnn, ip->pnn));
3032 tmp_ip->pnn = ip->pnn;
3037 void clear_ip_assignment_tree(struct ctdb_context *ctdb)
3039 TALLOC_FREE(ctdb->ip_tree);
3042 struct ctdb_reloadips_handle {
3043 struct ctdb_context *ctdb;
3044 struct ctdb_req_control_old *c;
3048 struct tevent_fd *fde;
3051 static int ctdb_reloadips_destructor(struct ctdb_reloadips_handle *h)
3053 if (h == h->ctdb->reload_ips) {
3054 h->ctdb->reload_ips = NULL;
3057 ctdb_request_control_reply(h->ctdb, h->c, NULL, h->status, NULL);
3060 ctdb_kill(h->ctdb, h->child, SIGKILL);
3064 static void ctdb_reloadips_timeout_event(struct tevent_context *ev,
3065 struct tevent_timer *te,
3066 struct timeval t, void *private_data)
3068 struct ctdb_reloadips_handle *h = talloc_get_type(private_data, struct ctdb_reloadips_handle);
3073 static void ctdb_reloadips_child_handler(struct tevent_context *ev,
3074 struct tevent_fd *fde,
3075 uint16_t flags, void *private_data)
3077 struct ctdb_reloadips_handle *h = talloc_get_type(private_data, struct ctdb_reloadips_handle);
3082 ret = sys_read(h->fd[0], &res, 1);
3083 if (ret < 1 || res != 0) {
3084 DEBUG(DEBUG_ERR, (__location__ " Reloadips child process returned error\n"));
3092 static int ctdb_reloadips_child(struct ctdb_context *ctdb)
3094 TALLOC_CTX *mem_ctx = talloc_new(NULL);
3095 struct ctdb_public_ip_list_old *ips;
3096 struct ctdb_vnn *vnn;
3097 struct client_async_data *async_data;
3098 struct timeval timeout;
3100 struct ctdb_client_control_state *state;
3104 CTDB_NO_MEMORY(ctdb, mem_ctx);
3106 /* Read IPs from local node */
3107 ret = ctdb_ctrl_get_public_ips(ctdb, TAKEOVER_TIMEOUT(),
3108 CTDB_CURRENT_NODE, mem_ctx, &ips);
3111 ("Unable to fetch public IPs from local node\n"));
3112 talloc_free(mem_ctx);
3116 /* Read IPs file - this is safe since this is a child process */
3118 if (ctdb_set_public_addresses(ctdb, false) != 0) {
3119 DEBUG(DEBUG_ERR,("Failed to re-read public addresses file\n"));
3120 talloc_free(mem_ctx);
3124 async_data = talloc_zero(mem_ctx, struct client_async_data);
3125 CTDB_NO_MEMORY(ctdb, async_data);
3127 /* Compare IPs between node and file for IPs to be deleted */
3128 for (i = 0; i < ips->num; i++) {
3130 for (vnn = ctdb->vnn; vnn; vnn = vnn->next) {
3131 if (ctdb_same_ip(&vnn->public_address,
3132 &ips->ips[i].addr)) {
3133 /* IP is still in file */
3139 /* Delete IP ips->ips[i] */
3140 struct ctdb_addr_info_old *pub;
3143 ("IP %s no longer configured, deleting it\n",
3144 ctdb_addr_to_str(&ips->ips[i].addr)));
3146 pub = talloc_zero(mem_ctx, struct ctdb_addr_info_old);
3147 CTDB_NO_MEMORY(ctdb, pub);
3149 pub->addr = ips->ips[i].addr;
3153 timeout = TAKEOVER_TIMEOUT();
3155 data.dsize = offsetof(struct ctdb_addr_info_old,
3157 data.dptr = (uint8_t *)pub;
3159 state = ctdb_control_send(ctdb, CTDB_CURRENT_NODE, 0,
3160 CTDB_CONTROL_DEL_PUBLIC_IP,
3161 0, data, async_data,
3163 if (state == NULL) {
3166 " failed sending CTDB_CONTROL_DEL_PUBLIC_IP\n"));
3170 ctdb_client_async_add(async_data, state);
3174 /* Compare IPs between node and file for IPs to be added */
3176 for (vnn = ctdb->vnn; vnn; vnn = vnn->next) {
3177 for (i = 0; i < ips->num; i++) {
3178 if (ctdb_same_ip(&vnn->public_address,
3179 &ips->ips[i].addr)) {
3180 /* IP already on node */
3184 if (i == ips->num) {
3185 /* Add IP ips->ips[i] */
3186 struct ctdb_addr_info_old *pub;
3187 const char *ifaces = NULL;
3192 ("New IP %s configured, adding it\n",
3193 ctdb_addr_to_str(&vnn->public_address)));
3195 uint32_t pnn = ctdb_get_pnn(ctdb);
3197 data.dsize = sizeof(pnn);
3198 data.dptr = (uint8_t *)&pnn;
3200 ret = ctdb_client_send_message(
3202 CTDB_BROADCAST_CONNECTED,
3203 CTDB_SRVID_REBALANCE_NODE,
3206 DEBUG(DEBUG_WARNING,
3207 ("Failed to send message to force node reallocation - IPs may be unbalanced\n"));
3213 ifaces = vnn->ifaces[0];
3215 while (vnn->ifaces[iface] != NULL) {
3216 ifaces = talloc_asprintf(vnn, "%s,%s", ifaces,
3217 vnn->ifaces[iface]);
3221 len = strlen(ifaces) + 1;
3222 pub = talloc_zero_size(mem_ctx,
3223 offsetof(struct ctdb_addr_info_old, iface) + len);
3224 CTDB_NO_MEMORY(ctdb, pub);
3226 pub->addr = vnn->public_address;
3227 pub->mask = vnn->public_netmask_bits;
3229 memcpy(&pub->iface[0], ifaces, pub->len);
3231 timeout = TAKEOVER_TIMEOUT();
3233 data.dsize = offsetof(struct ctdb_addr_info_old,
3235 data.dptr = (uint8_t *)pub;
3237 state = ctdb_control_send(ctdb, CTDB_CURRENT_NODE, 0,
3238 CTDB_CONTROL_ADD_PUBLIC_IP,
3239 0, data, async_data,
3241 if (state == NULL) {
3244 " failed sending CTDB_CONTROL_ADD_PUBLIC_IP\n"));
3248 ctdb_client_async_add(async_data, state);
3252 if (ctdb_client_async_wait(ctdb, async_data) != 0) {
3253 DEBUG(DEBUG_ERR,(__location__ " Add/delete IPs failed\n"));
3257 talloc_free(mem_ctx);
3261 talloc_free(mem_ctx);
3265 /* This control is sent to force the node to re-read the public addresses file
3266 and drop any addresses we should nnot longer host, and add new addresses
3267 that we are now able to host
3269 int32_t ctdb_control_reload_public_ips(struct ctdb_context *ctdb, struct ctdb_req_control_old *c, bool *async_reply)
3271 struct ctdb_reloadips_handle *h;
3272 pid_t parent = getpid();
3274 if (ctdb->reload_ips != NULL) {
3275 talloc_free(ctdb->reload_ips);
3276 ctdb->reload_ips = NULL;
3279 h = talloc(ctdb, struct ctdb_reloadips_handle);
3280 CTDB_NO_MEMORY(ctdb, h);
3285 if (pipe(h->fd) == -1) {
3286 DEBUG(DEBUG_ERR,("Failed to create pipe for ctdb_freeze_lock\n"));
3291 h->child = ctdb_fork(ctdb);
3292 if (h->child == (pid_t)-1) {
3293 DEBUG(DEBUG_ERR, ("Failed to fork a child for reloadips\n"));
3301 if (h->child == 0) {
3302 signed char res = 0;
3305 debug_extra = talloc_asprintf(NULL, "reloadips:");
3307 prctl_set_comment("ctdb_reloadips");
3308 if (switch_from_server_to_client(ctdb, "reloadips-child") != 0) {
3309 DEBUG(DEBUG_CRIT,("ERROR: Failed to switch reloadips child into client mode\n"));
3312 res = ctdb_reloadips_child(ctdb);
3314 DEBUG(DEBUG_ERR,("Failed to reload ips on local node\n"));
3318 sys_write(h->fd[1], &res, 1);
3319 ctdb_wait_for_process_to_exit(parent);
3323 h->c = talloc_steal(h, c);
3326 set_close_on_exec(h->fd[0]);
3328 talloc_set_destructor(h, ctdb_reloadips_destructor);
3331 h->fde = tevent_add_fd(ctdb->ev, h, h->fd[0], TEVENT_FD_READ,
3332 ctdb_reloadips_child_handler, (void *)h);
3333 tevent_fd_set_auto_close(h->fde);
3335 tevent_add_timer(ctdb->ev, h, timeval_current_ofs(120, 0),
3336 ctdb_reloadips_timeout_event, h);
3338 /* we reply later */
3339 *async_reply = true;