4 Copyright (C) Ronnie Sahlberg 2007
5 Copyright (C) Andrew Tridgell 2007
6 Copyright (C) Martin Schwenke 2011
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, see <http://www.gnu.org/licenses/>.
22 #include "system/network.h"
23 #include "system/filesys.h"
24 #include "system/time.h"
25 #include "system/wait.h"
30 #include "lib/util/dlinklist.h"
31 #include "lib/util/debug.h"
32 #include "lib/util/samba_util.h"
33 #include "lib/util/util_process.h"
35 #include "ctdb_private.h"
36 #include "ctdb_client.h"
38 #include "common/rb_tree.h"
39 #include "common/reqid.h"
40 #include "common/system.h"
41 #include "common/common.h"
42 #include "common/logging.h"
44 #include "server/ipalloc.h"
46 #define TAKEOVER_TIMEOUT() timeval_current_ofs(ctdb->tunable.takeover_timeout,0)
48 #define CTDB_ARP_INTERVAL 1
49 #define CTDB_ARP_REPEAT 3
51 struct ctdb_interface {
52 struct ctdb_interface *prev, *next;
58 /* state associated with a public ip address */
60 struct ctdb_vnn *prev, *next;
62 struct ctdb_interface *iface;
64 ctdb_sock_addr public_address;
65 uint8_t public_netmask_bits;
67 /* the node number that is serving this public address, if any.
68 If no node serves this ip it is set to -1 */
71 /* List of clients to tickle for this public address */
72 struct ctdb_tcp_array *tcp_array;
74 /* whether we need to update the other nodes with changes to our list
75 of connected clients */
76 bool tcp_update_needed;
78 /* a context to hang sending gratious arp events off */
79 TALLOC_CTX *takeover_ctx;
81 /* Set to true any time an update to this VNN is in flight.
82 This helps to avoid races. */
83 bool update_in_flight;
85 /* If CTDB_CONTROL_DEL_PUBLIC_IP is received for this IP
86 * address then this flag is set. It will be deleted in the
87 * release IP callback. */
91 static const char *iface_string(const struct ctdb_interface *iface)
93 return (iface != NULL ? iface->name : "__none__");
96 static const char *ctdb_vnn_iface_string(const struct ctdb_vnn *vnn)
98 return iface_string(vnn->iface);
101 static int ctdb_add_local_iface(struct ctdb_context *ctdb, const char *iface)
103 struct ctdb_interface *i;
105 if (strlen(iface) > CTDB_IFACE_SIZE) {
106 DEBUG(DEBUG_ERR, ("Interface name too long \"%s\"\n", iface));
110 /* Verify that we don't have an entry for this ip yet */
111 for (i=ctdb->ifaces;i;i=i->next) {
112 if (strcmp(i->name, iface) == 0) {
117 /* create a new structure for this interface */
118 i = talloc_zero(ctdb, struct ctdb_interface);
119 CTDB_NO_MEMORY_FATAL(ctdb, i);
120 i->name = talloc_strdup(i, iface);
121 CTDB_NO_MEMORY(ctdb, i->name);
125 DLIST_ADD(ctdb->ifaces, i);
130 static bool vnn_has_interface_with_name(struct ctdb_vnn *vnn,
135 for (n = 0; vnn->ifaces[n] != NULL; n++) {
136 if (strcmp(name, vnn->ifaces[n]) == 0) {
144 /* If any interfaces now have no possible IPs then delete them. This
145 * implementation is naive (i.e. simple) rather than clever
146 * (i.e. complex). Given that this is run on delip and that operation
147 * is rare, this doesn't need to be efficient - it needs to be
148 * foolproof. One alternative is reference counting, where the logic
149 * is distributed and can, therefore, be broken in multiple places.
150 * Another alternative is to build a red-black tree of interfaces that
151 * can have addresses (by walking ctdb->vnn once) and then walking
152 * ctdb->ifaces once and deleting those not in the tree. Let's go to
153 * one of those if the naive implementation causes problems... :-)
155 static void ctdb_remove_orphaned_ifaces(struct ctdb_context *ctdb,
156 struct ctdb_vnn *vnn)
158 struct ctdb_interface *i, *next;
160 /* For each interface, check if there's an IP using it. */
161 for (i = ctdb->ifaces; i != NULL; i = next) {
166 /* Only consider interfaces named in the given VNN. */
167 if (!vnn_has_interface_with_name(vnn, i->name)) {
171 /* Search for a vnn with this interface. */
173 for (tv=ctdb->vnn; tv; tv=tv->next) {
174 if (vnn_has_interface_with_name(tv, i->name)) {
181 /* None of the VNNs are using this interface. */
182 DLIST_REMOVE(ctdb->ifaces, i);
189 static struct ctdb_interface *ctdb_find_iface(struct ctdb_context *ctdb,
192 struct ctdb_interface *i;
194 for (i=ctdb->ifaces;i;i=i->next) {
195 if (strcmp(i->name, iface) == 0) {
203 static struct ctdb_interface *ctdb_vnn_best_iface(struct ctdb_context *ctdb,
204 struct ctdb_vnn *vnn)
207 struct ctdb_interface *cur = NULL;
208 struct ctdb_interface *best = NULL;
210 for (i=0; vnn->ifaces[i]; i++) {
212 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
226 if (cur->references < best->references) {
235 static int32_t ctdb_vnn_assign_iface(struct ctdb_context *ctdb,
236 struct ctdb_vnn *vnn)
238 struct ctdb_interface *best = NULL;
241 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
242 "still assigned to iface '%s'\n",
243 ctdb_addr_to_str(&vnn->public_address),
244 ctdb_vnn_iface_string(vnn)));
248 best = ctdb_vnn_best_iface(ctdb, vnn);
250 DEBUG(DEBUG_ERR, (__location__ " public address '%s' "
251 "cannot assign to iface any iface\n",
252 ctdb_addr_to_str(&vnn->public_address)));
258 vnn->pnn = ctdb->pnn;
260 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
261 "now assigned to iface '%s' refs[%d]\n",
262 ctdb_addr_to_str(&vnn->public_address),
263 ctdb_vnn_iface_string(vnn),
268 static void ctdb_vnn_unassign_iface(struct ctdb_context *ctdb,
269 struct ctdb_vnn *vnn)
271 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
272 "now unassigned (old iface '%s' refs[%d])\n",
273 ctdb_addr_to_str(&vnn->public_address),
274 ctdb_vnn_iface_string(vnn),
275 vnn->iface?vnn->iface->references:0));
277 vnn->iface->references--;
280 if (vnn->pnn == ctdb->pnn) {
285 static bool ctdb_vnn_available(struct ctdb_context *ctdb,
286 struct ctdb_vnn *vnn)
290 /* Nodes that are not RUNNING can not host IPs */
291 if (ctdb->runstate != CTDB_RUNSTATE_RUNNING) {
295 if (vnn->delete_pending) {
299 if (vnn->iface && vnn->iface->link_up) {
303 for (i=0; vnn->ifaces[i]; i++) {
304 struct ctdb_interface *cur;
306 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
319 struct ctdb_takeover_arp {
320 struct ctdb_context *ctdb;
323 struct ctdb_tcp_array *tcparray;
324 struct ctdb_vnn *vnn;
329 lists of tcp endpoints
331 struct ctdb_tcp_list {
332 struct ctdb_tcp_list *prev, *next;
333 struct ctdb_connection connection;
337 list of clients to kill on IP release
339 struct ctdb_client_ip {
340 struct ctdb_client_ip *prev, *next;
341 struct ctdb_context *ctdb;
348 send a gratuitous arp
350 static void ctdb_control_send_arp(struct tevent_context *ev,
351 struct tevent_timer *te,
352 struct timeval t, void *private_data)
354 struct ctdb_takeover_arp *arp = talloc_get_type(private_data,
355 struct ctdb_takeover_arp);
357 struct ctdb_tcp_array *tcparray;
358 const char *iface = ctdb_vnn_iface_string(arp->vnn);
360 ret = ctdb_sys_send_arp(&arp->addr, iface);
362 DEBUG(DEBUG_CRIT,(__location__ " sending of arp failed on iface '%s' (%s)\n",
363 iface, strerror(errno)));
366 tcparray = arp->tcparray;
368 for (i=0;i<tcparray->num;i++) {
369 struct ctdb_connection *tcon;
371 tcon = &tcparray->connections[i];
372 DEBUG(DEBUG_INFO,("sending tcp tickle ack for %u->%s:%u\n",
373 (unsigned)ntohs(tcon->dst.ip.sin_port),
374 ctdb_addr_to_str(&tcon->src),
375 (unsigned)ntohs(tcon->src.ip.sin_port)));
376 ret = ctdb_sys_send_tcp(
381 DEBUG(DEBUG_CRIT,(__location__ " Failed to send tcp tickle ack for %s\n",
382 ctdb_addr_to_str(&tcon->src)));
389 if (arp->count == CTDB_ARP_REPEAT) {
394 tevent_add_timer(arp->ctdb->ev, arp->vnn->takeover_ctx,
395 timeval_current_ofs(CTDB_ARP_INTERVAL, 100000),
396 ctdb_control_send_arp, arp);
399 static int32_t ctdb_announce_vnn_iface(struct ctdb_context *ctdb,
400 struct ctdb_vnn *vnn)
402 struct ctdb_takeover_arp *arp;
403 struct ctdb_tcp_array *tcparray;
405 if (!vnn->takeover_ctx) {
406 vnn->takeover_ctx = talloc_new(vnn);
407 if (!vnn->takeover_ctx) {
412 arp = talloc_zero(vnn->takeover_ctx, struct ctdb_takeover_arp);
418 arp->addr = vnn->public_address;
421 tcparray = vnn->tcp_array;
423 /* add all of the known tcp connections for this IP to the
424 list of tcp connections to send tickle acks for */
425 arp->tcparray = talloc_steal(arp, tcparray);
427 vnn->tcp_array = NULL;
428 vnn->tcp_update_needed = true;
431 tevent_add_timer(arp->ctdb->ev, vnn->takeover_ctx,
432 timeval_zero(), ctdb_control_send_arp, arp);
437 struct ctdb_do_takeip_state {
438 struct ctdb_req_control_old *c;
439 struct ctdb_vnn *vnn;
443 called when takeip event finishes
445 static void ctdb_do_takeip_callback(struct ctdb_context *ctdb, int status,
448 struct ctdb_do_takeip_state *state =
449 talloc_get_type(private_data, struct ctdb_do_takeip_state);
454 if (status == -ETIME) {
457 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
458 ctdb_addr_to_str(&state->vnn->public_address),
459 ctdb_vnn_iface_string(state->vnn)));
460 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
466 if (ctdb->do_checkpublicip) {
468 ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
470 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
477 data.dptr = (uint8_t *)ctdb_addr_to_str(&state->vnn->public_address);
478 data.dsize = strlen((char *)data.dptr) + 1;
479 DEBUG(DEBUG_INFO,(__location__ " sending TAKE_IP for '%s'\n", data.dptr));
481 ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_TAKE_IP, data);
484 /* the control succeeded */
485 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
490 static int ctdb_takeip_destructor(struct ctdb_do_takeip_state *state)
492 state->vnn->update_in_flight = false;
497 take over an ip address
499 static int32_t ctdb_do_takeip(struct ctdb_context *ctdb,
500 struct ctdb_req_control_old *c,
501 struct ctdb_vnn *vnn)
504 struct ctdb_do_takeip_state *state;
506 if (vnn->update_in_flight) {
507 DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u rejected "
508 "update for this IP already in flight\n",
509 ctdb_addr_to_str(&vnn->public_address),
510 vnn->public_netmask_bits));
514 ret = ctdb_vnn_assign_iface(ctdb, vnn);
516 DEBUG(DEBUG_ERR,("Takeover of IP %s/%u failed to "
517 "assign a usable interface\n",
518 ctdb_addr_to_str(&vnn->public_address),
519 vnn->public_netmask_bits));
523 state = talloc(vnn, struct ctdb_do_takeip_state);
524 CTDB_NO_MEMORY(ctdb, state);
526 state->c = talloc_steal(ctdb, c);
529 vnn->update_in_flight = true;
530 talloc_set_destructor(state, ctdb_takeip_destructor);
532 DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u on interface %s\n",
533 ctdb_addr_to_str(&vnn->public_address),
534 vnn->public_netmask_bits,
535 ctdb_vnn_iface_string(vnn)));
537 ret = ctdb_event_script_callback(ctdb,
539 ctdb_do_takeip_callback,
543 ctdb_vnn_iface_string(vnn),
544 ctdb_addr_to_str(&vnn->public_address),
545 vnn->public_netmask_bits);
548 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
549 ctdb_addr_to_str(&vnn->public_address),
550 ctdb_vnn_iface_string(vnn)));
558 struct ctdb_do_updateip_state {
559 struct ctdb_req_control_old *c;
560 struct ctdb_interface *old;
561 struct ctdb_vnn *vnn;
565 called when updateip event finishes
567 static void ctdb_do_updateip_callback(struct ctdb_context *ctdb, int status,
570 struct ctdb_do_updateip_state *state =
571 talloc_get_type(private_data, struct ctdb_do_updateip_state);
575 if (status == -ETIME) {
579 ("Failed update of IP %s from interface %s to %s\n",
580 ctdb_addr_to_str(&state->vnn->public_address),
581 iface_string(state->old),
582 ctdb_vnn_iface_string(state->vnn)));
585 * All we can do is reset the old interface
586 * and let the next run fix it
588 ctdb_vnn_unassign_iface(ctdb, state->vnn);
589 state->vnn->iface = state->old;
590 state->vnn->iface->references++;
592 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
597 if (ctdb->do_checkpublicip) {
599 ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
601 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
608 /* the control succeeded */
609 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
614 static int ctdb_updateip_destructor(struct ctdb_do_updateip_state *state)
616 state->vnn->update_in_flight = false;
621 update (move) an ip address
623 static int32_t ctdb_do_updateip(struct ctdb_context *ctdb,
624 struct ctdb_req_control_old *c,
625 struct ctdb_vnn *vnn)
628 struct ctdb_do_updateip_state *state;
629 struct ctdb_interface *old = vnn->iface;
630 const char *old_name = iface_string(old);
631 const char *new_name;
633 if (vnn->update_in_flight) {
634 DEBUG(DEBUG_NOTICE,("Update of IP %s/%u rejected "
635 "update for this IP already in flight\n",
636 ctdb_addr_to_str(&vnn->public_address),
637 vnn->public_netmask_bits));
641 ctdb_vnn_unassign_iface(ctdb, vnn);
642 ret = ctdb_vnn_assign_iface(ctdb, vnn);
644 DEBUG(DEBUG_ERR,("Update of IP %s/%u failed to "
645 "assign a usable interface (old iface '%s')\n",
646 ctdb_addr_to_str(&vnn->public_address),
647 vnn->public_netmask_bits,
652 new_name = ctdb_vnn_iface_string(vnn);
653 if (old_name != NULL && new_name != NULL &&
654 strcmp(old_name, new_name) == 0) {
655 /* A benign update from one interface onto itself.
656 * no need to run the eventscripts in this case, just return
659 ctdb_request_control_reply(ctdb, c, NULL, 0, NULL);
663 state = talloc(vnn, struct ctdb_do_updateip_state);
664 CTDB_NO_MEMORY(ctdb, state);
666 state->c = talloc_steal(ctdb, c);
670 vnn->update_in_flight = true;
671 talloc_set_destructor(state, ctdb_updateip_destructor);
673 DEBUG(DEBUG_NOTICE,("Update of IP %s/%u from "
674 "interface %s to %s\n",
675 ctdb_addr_to_str(&vnn->public_address),
676 vnn->public_netmask_bits,
680 ret = ctdb_event_script_callback(ctdb,
682 ctdb_do_updateip_callback,
684 CTDB_EVENT_UPDATE_IP,
688 ctdb_addr_to_str(&vnn->public_address),
689 vnn->public_netmask_bits);
692 ("Failed update IP %s from interface %s to %s\n",
693 ctdb_addr_to_str(&vnn->public_address),
694 old_name, new_name));
703 Find the vnn of the node that has a public ip address
704 returns -1 if the address is not known as a public address
706 static struct ctdb_vnn *find_public_ip_vnn(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
708 struct ctdb_vnn *vnn;
710 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
711 if (ctdb_same_ip(&vnn->public_address, addr)) {
720 take over an ip address
722 int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb,
723 struct ctdb_req_control_old *c,
728 struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
729 struct ctdb_vnn *vnn;
730 bool have_ip = false;
731 bool do_updateip = false;
732 bool do_takeip = false;
733 struct ctdb_interface *best_iface = NULL;
735 if (pip->pnn != ctdb->pnn) {
736 DEBUG(DEBUG_ERR,(__location__" takeoverip called for an ip '%s' "
737 "with pnn %d, but we're node %d\n",
738 ctdb_addr_to_str(&pip->addr),
739 pip->pnn, ctdb->pnn));
743 /* update out vnn list */
744 vnn = find_public_ip_vnn(ctdb, &pip->addr);
746 DEBUG(DEBUG_INFO,("takeoverip called for an ip '%s' that is not a public address\n",
747 ctdb_addr_to_str(&pip->addr)));
751 if (ctdb->tunable.disable_ip_failover == 0 && ctdb->do_checkpublicip) {
752 have_ip = ctdb_sys_have_ip(&pip->addr);
754 best_iface = ctdb_vnn_best_iface(ctdb, vnn);
755 if (best_iface == NULL) {
756 DEBUG(DEBUG_ERR,("takeoverip of IP %s/%u failed to find"
757 "a usable interface (old %s, have_ip %d)\n",
758 ctdb_addr_to_str(&vnn->public_address),
759 vnn->public_netmask_bits,
760 ctdb_vnn_iface_string(vnn),
765 if (vnn->pnn != ctdb->pnn && have_ip && vnn->pnn != -1) {
766 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
767 "and we have it on iface[%s], but it was assigned to node %d"
768 "and we are node %d, banning ourself\n",
769 ctdb_addr_to_str(&vnn->public_address),
770 ctdb_vnn_iface_string(vnn), vnn->pnn, ctdb->pnn));
775 if (vnn->pnn == -1 && have_ip) {
776 /* This will cause connections to be reset and
777 * reestablished. However, this is a very unusual
778 * situation and doing this will completely repair the
779 * inconsistency in the VNN.
783 " Doing updateip for IP %s already on an interface\n",
784 ctdb_addr_to_str(&vnn->public_address)));
789 if (vnn->iface != best_iface) {
790 if (!vnn->iface->link_up) {
792 } else if (vnn->iface->references > (best_iface->references + 1)) {
793 /* only move when the rebalance gains something */
801 ctdb_vnn_unassign_iface(ctdb, vnn);
808 ret = ctdb_do_takeip(ctdb, c, vnn);
812 } else if (do_updateip) {
813 ret = ctdb_do_updateip(ctdb, c, vnn);
819 * The interface is up and the kernel known the ip
822 DEBUG(DEBUG_INFO,("Redundant takeover of IP %s/%u on interface %s (ip already held)\n",
823 ctdb_addr_to_str(&pip->addr),
824 vnn->public_netmask_bits,
825 ctdb_vnn_iface_string(vnn)));
829 /* tell ctdb_control.c that we will be replying asynchronously */
835 static void do_delete_ip(struct ctdb_context *ctdb, struct ctdb_vnn *vnn)
837 DLIST_REMOVE(ctdb->vnn, vnn);
838 ctdb_vnn_unassign_iface(ctdb, vnn);
839 ctdb_remove_orphaned_ifaces(ctdb, vnn);
843 static struct ctdb_vnn *release_ip_post(struct ctdb_context *ctdb,
844 struct ctdb_vnn *vnn,
845 ctdb_sock_addr *addr)
849 /* Send a message to all clients of this node telling them
850 * that the cluster has been reconfigured and they should
851 * close any connections on this IP address
853 data.dptr = (uint8_t *)ctdb_addr_to_str(addr);
854 data.dsize = strlen((char *)data.dptr)+1;
855 DEBUG(DEBUG_INFO, ("Sending RELEASE_IP message for %s\n", data.dptr));
856 ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_RELEASE_IP, data);
858 ctdb_vnn_unassign_iface(ctdb, vnn);
860 /* Process the IP if it has been marked for deletion */
861 if (vnn->delete_pending) {
862 do_delete_ip(ctdb, vnn);
869 struct release_ip_callback_state {
870 struct ctdb_req_control_old *c;
871 ctdb_sock_addr *addr;
872 struct ctdb_vnn *vnn;
876 called when releaseip event finishes
878 static void release_ip_callback(struct ctdb_context *ctdb, int status,
881 struct release_ip_callback_state *state =
882 talloc_get_type(private_data, struct release_ip_callback_state);
884 if (status == -ETIME) {
888 if (ctdb->tunable.disable_ip_failover == 0 && ctdb->do_checkpublicip) {
889 if (ctdb_sys_have_ip(state->addr)) {
891 ("IP %s still hosted during release IP callback, failing\n",
892 ctdb_addr_to_str(state->addr)));
893 ctdb_request_control_reply(ctdb, state->c,
900 state->vnn = release_ip_post(ctdb, state->vnn, state->addr);
902 /* the control succeeded */
903 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
907 static int ctdb_releaseip_destructor(struct release_ip_callback_state *state)
909 if (state->vnn != NULL) {
910 state->vnn->update_in_flight = false;
916 release an ip address
918 int32_t ctdb_control_release_ip(struct ctdb_context *ctdb,
919 struct ctdb_req_control_old *c,
924 struct release_ip_callback_state *state;
925 struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
926 struct ctdb_vnn *vnn;
929 /* update our vnn list */
930 vnn = find_public_ip_vnn(ctdb, &pip->addr);
932 DEBUG(DEBUG_INFO,("releaseip called for an ip '%s' that is not a public address\n",
933 ctdb_addr_to_str(&pip->addr)));
938 /* stop any previous arps */
939 talloc_free(vnn->takeover_ctx);
940 vnn->takeover_ctx = NULL;
942 /* Some ctdb tool commands (e.g. moveip) send
943 * lazy multicast to drop an IP from any node that isn't the
944 * intended new node. The following causes makes ctdbd ignore
945 * a release for any address it doesn't host.
947 if (ctdb->tunable.disable_ip_failover == 0 && ctdb->do_checkpublicip) {
948 if (!ctdb_sys_have_ip(&pip->addr)) {
949 DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u on interface %s (ip not held)\n",
950 ctdb_addr_to_str(&pip->addr),
951 vnn->public_netmask_bits,
952 ctdb_vnn_iface_string(vnn)));
953 ctdb_vnn_unassign_iface(ctdb, vnn);
957 if (vnn->iface == NULL) {
958 DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u (ip not held)\n",
959 ctdb_addr_to_str(&pip->addr),
960 vnn->public_netmask_bits));
965 /* There is a potential race between take_ip and us because we
966 * update the VNN via a callback that run when the
967 * eventscripts have been run. Avoid the race by allowing one
968 * update to be in flight at a time.
970 if (vnn->update_in_flight) {
971 DEBUG(DEBUG_NOTICE,("Release of IP %s/%u rejected "
972 "update for this IP already in flight\n",
973 ctdb_addr_to_str(&vnn->public_address),
974 vnn->public_netmask_bits));
978 iface = strdup(ctdb_vnn_iface_string(vnn));
980 DEBUG(DEBUG_NOTICE,("Release of IP %s/%u on interface %s node:%d\n",
981 ctdb_addr_to_str(&pip->addr),
982 vnn->public_netmask_bits,
986 state = talloc(ctdb, struct release_ip_callback_state);
988 ctdb_set_error(ctdb, "Out of memory at %s:%d",
994 state->c = talloc_steal(state, c);
995 state->addr = talloc(state, ctdb_sock_addr);
996 if (state->addr == NULL) {
997 ctdb_set_error(ctdb, "Out of memory at %s:%d",
1003 *state->addr = pip->addr;
1006 vnn->update_in_flight = true;
1007 talloc_set_destructor(state, ctdb_releaseip_destructor);
1009 ret = ctdb_event_script_callback(ctdb,
1010 state, release_ip_callback, state,
1011 CTDB_EVENT_RELEASE_IP,
1014 ctdb_addr_to_str(&pip->addr),
1015 vnn->public_netmask_bits);
1018 DEBUG(DEBUG_ERR,(__location__ " Failed to release IP %s on interface %s\n",
1019 ctdb_addr_to_str(&pip->addr),
1020 ctdb_vnn_iface_string(vnn)));
1025 /* tell the control that we will be reply asynchronously */
1026 *async_reply = true;
1030 static int ctdb_add_public_address(struct ctdb_context *ctdb,
1031 ctdb_sock_addr *addr,
1032 unsigned mask, const char *ifaces,
1035 struct ctdb_vnn *vnn;
1042 tmp = strdup(ifaces);
1043 for (iface = strtok(tmp, ","); iface; iface = strtok(NULL, ",")) {
1044 if (!ctdb_sys_check_iface_exists(iface)) {
1045 DEBUG(DEBUG_CRIT,("Interface %s does not exist. Can not add public-address : %s\n", iface, ctdb_addr_to_str(addr)));
1052 /* Verify that we don't have an entry for this ip yet */
1053 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1054 if (ctdb_same_sockaddr(addr, &vnn->public_address)) {
1055 DEBUG(DEBUG_CRIT,("Same ip '%s' specified multiple times in the public address list \n",
1056 ctdb_addr_to_str(addr)));
1061 /* create a new vnn structure for this ip address */
1062 vnn = talloc_zero(ctdb, struct ctdb_vnn);
1063 CTDB_NO_MEMORY_FATAL(ctdb, vnn);
1064 vnn->ifaces = talloc_array(vnn, const char *, num + 2);
1065 tmp = talloc_strdup(vnn, ifaces);
1066 CTDB_NO_MEMORY_FATAL(ctdb, tmp);
1067 for (iface = strtok(tmp, ","); iface; iface = strtok(NULL, ",")) {
1068 vnn->ifaces = talloc_realloc(vnn, vnn->ifaces, const char *, num + 2);
1069 CTDB_NO_MEMORY_FATAL(ctdb, vnn->ifaces);
1070 vnn->ifaces[num] = talloc_strdup(vnn, iface);
1071 CTDB_NO_MEMORY_FATAL(ctdb, vnn->ifaces[num]);
1075 vnn->ifaces[num] = NULL;
1076 vnn->public_address = *addr;
1077 vnn->public_netmask_bits = mask;
1080 for (i=0; vnn->ifaces[i]; i++) {
1081 ret = ctdb_add_local_iface(ctdb, vnn->ifaces[i]);
1083 DEBUG(DEBUG_CRIT, (__location__ " failed to add iface[%s] "
1084 "for public_address[%s]\n",
1085 vnn->ifaces[i], ctdb_addr_to_str(addr)));
1091 DLIST_ADD(ctdb->vnn, vnn);
1097 setup the public address lists from a file
1099 int ctdb_set_public_addresses(struct ctdb_context *ctdb, bool check_addresses)
1105 lines = file_lines_load(ctdb->public_addresses_file, &nlines, 0, ctdb);
1106 if (lines == NULL) {
1107 ctdb_set_error(ctdb, "Failed to load public address list '%s'\n", ctdb->public_addresses_file);
1110 while (nlines > 0 && strcmp(lines[nlines-1], "") == 0) {
1114 for (i=0;i<nlines;i++) {
1116 ctdb_sock_addr addr;
1117 const char *addrstr;
1122 while ((*line == ' ') || (*line == '\t')) {
1128 if (strcmp(line, "") == 0) {
1131 tok = strtok(line, " \t");
1133 tok = strtok(NULL, " \t");
1135 if (NULL == ctdb->default_public_interface) {
1136 DEBUG(DEBUG_CRIT,("No default public interface and no interface specified at line %u of public address list\n",
1141 ifaces = ctdb->default_public_interface;
1146 if (!addrstr || !parse_ip_mask(addrstr, ifaces, &addr, &mask)) {
1147 DEBUG(DEBUG_CRIT,("Badly formed line %u in public address list\n", i+1));
1151 if (ctdb_add_public_address(ctdb, &addr, mask, ifaces, check_addresses)) {
1152 DEBUG(DEBUG_CRIT,("Failed to add line %u to the public address list\n", i+1));
1163 static struct ctdb_public_ip_list *
1164 ctdb_fetch_remote_public_ips(struct ctdb_context *ctdb,
1165 TALLOC_CTX *mem_ctx,
1166 struct ctdb_node_map_old *nodemap,
1167 uint32_t public_ip_flags)
1170 struct ctdb_public_ip_list_old *ip_list;
1171 struct ctdb_public_ip_list *public_ips;
1173 public_ips = talloc_zero_array(mem_ctx,
1174 struct ctdb_public_ip_list,
1176 if (public_ips == NULL) {
1177 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
1181 for (j = 0; j < nodemap->num; j++) {
1182 if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) {
1186 /* Retrieve the list of public IPs from the
1187 * node. Flags says whether it is known or
1189 ret = ctdb_ctrl_get_public_ips_flags(
1190 ctdb, TAKEOVER_TIMEOUT(), j, public_ips,
1191 public_ip_flags, &ip_list);
1194 ("Failed to read public IPs from node: %u\n", j));
1195 talloc_free(public_ips);
1198 public_ips[j].num = ip_list->num;
1199 if (ip_list->num == 0) {
1200 talloc_free(ip_list);
1203 public_ips[j].ip = talloc_zero_array(public_ips,
1204 struct ctdb_public_ip,
1206 if (public_ips[j].ip == NULL) {
1207 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
1208 talloc_free(public_ips);
1211 memcpy(public_ips[j].ip, &ip_list->ips[0],
1212 sizeof(struct ctdb_public_ip) * ip_list->num);
1213 talloc_free(ip_list);
1219 struct get_tunable_callback_data {
1220 const char *tunable;
1225 static void get_tunable_callback(struct ctdb_context *ctdb, uint32_t pnn,
1226 int32_t res, TDB_DATA outdata,
1229 struct get_tunable_callback_data *cd =
1230 (struct get_tunable_callback_data *)callback;
1234 /* Already handled in fail callback */
1238 if (outdata.dsize != sizeof(uint32_t)) {
1239 DEBUG(DEBUG_ERR,("Wrong size of returned data when reading \"%s\" tunable from node %d. Expected %d bytes but received %d bytes\n",
1240 cd->tunable, pnn, (int)sizeof(uint32_t),
1241 (int)outdata.dsize));
1246 size = talloc_array_length(cd->out);
1248 DEBUG(DEBUG_ERR,("Got %s reply from node %d but nodemap only has %d entries\n",
1249 cd->tunable, pnn, size));
1254 cd->out[pnn] = *(uint32_t *)outdata.dptr;
1257 static void get_tunable_fail_callback(struct ctdb_context *ctdb, uint32_t pnn,
1258 int32_t res, TDB_DATA outdata,
1261 struct get_tunable_callback_data *cd =
1262 (struct get_tunable_callback_data *)callback;
1267 ("Timed out getting tunable \"%s\" from node %d\n",
1273 DEBUG(DEBUG_WARNING,
1274 ("Tunable \"%s\" not implemented on node %d\n",
1279 ("Unexpected error getting tunable \"%s\" from node %d\n",
1285 static uint32_t *get_tunable_from_nodes(struct ctdb_context *ctdb,
1286 TALLOC_CTX *tmp_ctx,
1287 struct ctdb_node_map_old *nodemap,
1288 const char *tunable,
1289 uint32_t default_value)
1292 struct ctdb_control_get_tunable *t;
1295 struct get_tunable_callback_data callback_data;
1298 tvals = talloc_array(tmp_ctx, uint32_t, nodemap->num);
1299 CTDB_NO_MEMORY_NULL(ctdb, tvals);
1300 for (i=0; i<nodemap->num; i++) {
1301 tvals[i] = default_value;
1304 callback_data.out = tvals;
1305 callback_data.tunable = tunable;
1306 callback_data.fatal = false;
1308 data.dsize = offsetof(struct ctdb_control_get_tunable, name) + strlen(tunable) + 1;
1309 data.dptr = talloc_size(tmp_ctx, data.dsize);
1310 t = (struct ctdb_control_get_tunable *)data.dptr;
1311 t->length = strlen(tunable)+1;
1312 memcpy(t->name, tunable, t->length);
1313 nodes = list_of_connected_nodes(ctdb, nodemap, tmp_ctx, true);
1314 if (ctdb_client_async_control(ctdb, CTDB_CONTROL_GET_TUNABLE,
1315 nodes, 0, TAKEOVER_TIMEOUT(),
1317 get_tunable_callback,
1318 get_tunable_fail_callback,
1319 &callback_data) != 0) {
1320 if (callback_data.fatal) {
1326 talloc_free(data.dptr);
1331 static struct ctdb_node_map *
1332 ctdb_node_map_old_to_new(TALLOC_CTX *mem_ctx,
1333 const struct ctdb_node_map_old *old)
1335 struct ctdb_node_map *new;
1337 new = talloc(mem_ctx, struct ctdb_node_map);
1339 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
1342 new->num = old->num;
1343 new->node = talloc_zero_array(new,
1344 struct ctdb_node_and_flags, new->num);
1345 memcpy(new->node, &old->nodes[0],
1346 sizeof(struct ctdb_node_and_flags) * new->num);
1352 static bool set_ipflags(struct ctdb_context *ctdb,
1353 struct ipalloc_state *ipalloc_state,
1354 struct ctdb_node_map_old *nodemap)
1356 uint32_t *tval_noiptakeover;
1357 uint32_t *tval_noiphostonalldisabled;
1358 struct ctdb_node_map *new;
1360 tval_noiptakeover = get_tunable_from_nodes(ctdb, ipalloc_state, nodemap,
1362 if (tval_noiptakeover == NULL) {
1366 tval_noiphostonalldisabled =
1367 get_tunable_from_nodes(ctdb, ipalloc_state, nodemap,
1368 "NoIPHostOnAllDisabled", 0);
1369 if (tval_noiphostonalldisabled == NULL) {
1370 /* Caller frees tmp_ctx */
1374 new = ctdb_node_map_old_to_new(ipalloc_state, nodemap);
1379 ipalloc_set_node_flags(ipalloc_state, new,
1381 tval_noiphostonalldisabled);
1383 talloc_free(tval_noiptakeover);
1384 talloc_free(tval_noiphostonalldisabled);
1390 static enum ipalloc_algorithm
1391 determine_algorithm(const struct ctdb_tunable_list *tunables)
1393 if (1 == tunables->lcp2_public_ip_assignment) {
1394 return IPALLOC_LCP2;
1395 } else if (1 == tunables->deterministic_public_ips) {
1396 return IPALLOC_DETERMINISTIC;
1398 return IPALLOC_NONDETERMINISTIC;
1402 struct takeover_callback_data {
1404 unsigned int *fail_count;
1407 static struct takeover_callback_data *
1408 takeover_callback_data_init(TALLOC_CTX *mem_ctx,
1411 static struct takeover_callback_data *takeover_data;
1413 takeover_data = talloc_zero(mem_ctx, struct takeover_callback_data);
1414 if (takeover_data == NULL) {
1415 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
1419 takeover_data->fail_count = talloc_zero_array(takeover_data,
1420 unsigned int, num_nodes);
1421 if (takeover_data->fail_count == NULL) {
1422 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
1423 talloc_free(takeover_data);
1427 takeover_data->num_nodes = num_nodes;
1429 return takeover_data;
1432 static void takeover_run_fail_callback(struct ctdb_context *ctdb,
1433 uint32_t node_pnn, int32_t res,
1434 TDB_DATA outdata, void *callback_data)
1436 struct takeover_callback_data *cd =
1437 talloc_get_type_abort(callback_data,
1438 struct takeover_callback_data);
1440 if (node_pnn >= cd->num_nodes) {
1441 DEBUG(DEBUG_ERR, (__location__ " invalid PNN %u\n", node_pnn));
1445 if (cd->fail_count[node_pnn] == 0) {
1447 ("Node %u failed the takeover run\n", node_pnn));
1450 cd->fail_count[node_pnn]++;
1453 static void takeover_run_process_failures(struct ctdb_context *ctdb,
1454 struct takeover_callback_data *tcd)
1456 unsigned int max_fails = 0;
1457 uint32_t max_pnn = -1;
1460 for (i = 0; i < tcd->num_nodes; i++) {
1461 if (tcd->fail_count[i] > max_fails) {
1463 max_fails = tcd->fail_count[i];
1467 if (max_fails > 0) {
1472 ("Sending banning credits to %u with fail count %u\n",
1473 max_pnn, max_fails));
1475 data.dptr = (uint8_t *)&max_pnn;
1476 data.dsize = sizeof(uint32_t);
1477 ret = ctdb_client_send_message(ctdb,
1478 CTDB_BROADCAST_CONNECTED,
1483 ("Failed to set banning credits for node %u\n",
1490 * Recalculate the allocation of public IPs to nodes and have the
1491 * nodes host their allocated addresses.
1493 * - Initialise IP allocation state. Pass:
1494 + algorithm to be used;
1495 + whether IP rebalancing ("failback") should be done (this uses a
1496 cluster-wide configuration variable and only the value form the
1497 master node is used); and
1498 * + list of nodes to force rebalance (internal structure, currently
1499 * no way to fetch, only used by LCP2 for nodes that have had new
1500 * IP addresses added).
1501 * - Set IP flags for IP allocation based on node map and tunables
1502 * NoIPTakeover/NoIPHostOnAllDisabled from all connected nodes
1503 * (tunable fetching done separately so values can be faked in unit
1505 * - Retrieve known and available IP addresses (done separately so
1506 * values can be faked in unit testing)
1507 * - Use ipalloc_set_public_ips() to set known and available IP
1508 addresses for allocation
1509 * - If cluster can't host IP addresses then early exit
1510 * - Run IP allocation algorithm
1511 * - Send RELEASE_IP to all nodes for IPs they should not host
1512 * - Send TAKE_IP to all nodes for IPs they should host
1513 * - Send IPREALLOCATED to all nodes (with backward compatibility hack)
1515 int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map_old *nodemap,
1516 uint32_t *force_rebalance_nodes)
1519 struct ctdb_public_ip ip;
1521 struct public_ip_list *all_ips, *tmp_ip;
1523 struct timeval timeout;
1524 struct client_async_data *async_data;
1525 struct ctdb_client_control_state *state;
1526 TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
1527 struct ipalloc_state *ipalloc_state;
1528 struct ctdb_public_ip_list *known_ips, *available_ips;
1529 struct takeover_callback_data *takeover_data;
1531 /* Initialise fail callback data to be used with
1532 * takeover_run_fail_callback(). A failure in any of the
1533 * following steps will cause an early return, so this can be
1534 * reused for each of those steps without re-initialising. */
1535 takeover_data = takeover_callback_data_init(tmp_ctx,
1537 if (takeover_data == NULL) {
1538 talloc_free(tmp_ctx);
1542 /* Default timeout for early jump to IPREALLOCATED. See below
1543 * for explanation of 3 times... */
1544 timeout = timeval_current_ofs(3 * ctdb->tunable.takeover_timeout, 0);
1547 * ip failover is completely disabled, just send out the
1548 * ipreallocated event.
1550 if (ctdb->tunable.disable_ip_failover != 0) {
1554 ipalloc_state = ipalloc_state_init(tmp_ctx, ctdb->num_nodes,
1555 determine_algorithm(&ctdb->tunable),
1556 (ctdb->tunable.no_ip_failback != 0),
1557 force_rebalance_nodes);
1558 if (ipalloc_state == NULL) {
1559 talloc_free(tmp_ctx);
1563 if (!set_ipflags(ctdb, ipalloc_state, nodemap)) {
1565 ("Failed to set IP flags - aborting takeover run\n"));
1566 talloc_free(tmp_ctx);
1570 /* Fetch known/available public IPs from each active node */
1571 /* Fetch lists of known public IPs from all nodes */
1572 known_ips = ctdb_fetch_remote_public_ips(ctdb, ipalloc_state,
1574 if (known_ips == NULL) {
1575 DEBUG(DEBUG_ERR, ("Failed to read known public IPs\n"));
1576 talloc_free(tmp_ctx);
1579 available_ips = ctdb_fetch_remote_public_ips(
1580 ctdb, ipalloc_state, nodemap,
1581 CTDB_PUBLIC_IP_FLAGS_ONLY_AVAILABLE);
1582 if (available_ips == NULL) {
1583 DEBUG(DEBUG_ERR, ("Failed to read available public IPs\n"));
1584 talloc_free(tmp_ctx);
1588 if (! ipalloc_set_public_ips(ipalloc_state, known_ips, available_ips)) {
1589 DEBUG(DEBUG_ERR, ("Failed to set public IPs\n"));
1590 talloc_free(tmp_ctx);
1594 if (! ipalloc_can_host_ips(ipalloc_state)) {
1595 DEBUG(DEBUG_WARNING,("No nodes available to host public IPs yet\n"));
1599 /* Do the IP reassignment calculations */
1600 all_ips = ipalloc(ipalloc_state);
1601 if (all_ips == NULL) {
1602 talloc_free(tmp_ctx);
1606 /* Now tell all nodes to release any public IPs should not
1607 * host. This will be a NOOP on nodes that don't currently
1608 * hold the given IP.
1610 async_data = talloc_zero(tmp_ctx, struct client_async_data);
1611 CTDB_NO_MEMORY_FATAL(ctdb, async_data);
1613 async_data->fail_callback = takeover_run_fail_callback;
1614 async_data->callback_data = takeover_data;
1616 ZERO_STRUCT(ip); /* Avoid valgrind warnings for union */
1618 /* Each of the following stages (RELEASE_IP, TAKEOVER_IP,
1619 * IPREALLOCATED) notionally has a timeout of TakeoverTimeout
1620 * seconds. However, RELEASE_IP can take longer due to TCP
1621 * connection killing, so sometimes needs more time.
1622 * Therefore, use a cumulative timeout of TakeoverTimeout * 3
1623 * seconds across all 3 stages. No explicit expiry checks are
1624 * needed before each stage because tevent is smart enough to
1625 * fire the timeouts even if they are in the past. Initialise
1626 * this here so it explicitly covers the stages we're
1627 * interested in but, in particular, not the time taken by the
1630 timeout = timeval_current_ofs(3 * ctdb->tunable.takeover_timeout, 0);
1632 /* Send a RELEASE_IP to all nodes that should not be hosting
1633 * each IP. For each IP, all but one of these will be
1634 * redundant. However, the redundant ones are used to tell
1635 * nodes which node should be hosting the IP so that commands
1636 * like "ctdb ip" can display a particular nodes idea of who
1637 * is hosting what. */
1638 for (i=0;i<nodemap->num;i++) {
1639 /* don't talk to unconnected nodes, but do talk to banned nodes */
1640 if (nodemap->nodes[i].flags & NODE_FLAGS_DISCONNECTED) {
1644 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1645 if (tmp_ip->pnn == nodemap->nodes[i].pnn) {
1646 /* This node should be serving this
1647 vnn so don't tell it to release the ip
1651 ip.pnn = tmp_ip->pnn;
1652 ip.addr = tmp_ip->addr;
1654 data.dsize = sizeof(ip);
1655 data.dptr = (uint8_t *)&ip;
1656 state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
1657 0, CTDB_CONTROL_RELEASE_IP, 0,
1660 if (state == NULL) {
1661 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_RELEASE_IP to node %u\n", nodemap->nodes[i].pnn));
1662 talloc_free(tmp_ctx);
1666 ctdb_client_async_add(async_data, state);
1669 if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1671 ("Async control CTDB_CONTROL_RELEASE_IP failed\n"));
1674 talloc_free(async_data);
1677 /* For each IP, send a TAKOVER_IP to the node that should be
1678 * hosting it. Many of these will often be redundant (since
1679 * the allocation won't have changed) but they can be useful
1680 * to recover from inconsistencies. */
1681 async_data = talloc_zero(tmp_ctx, struct client_async_data);
1682 CTDB_NO_MEMORY_FATAL(ctdb, async_data);
1684 async_data->fail_callback = takeover_run_fail_callback;
1685 async_data->callback_data = takeover_data;
1687 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1688 if (tmp_ip->pnn == -1) {
1689 /* this IP won't be taken over */
1693 ip.pnn = tmp_ip->pnn;
1694 ip.addr = tmp_ip->addr;
1696 data.dsize = sizeof(ip);
1697 data.dptr = (uint8_t *)&ip;
1698 state = ctdb_control_send(ctdb, tmp_ip->pnn,
1699 0, CTDB_CONTROL_TAKEOVER_IP, 0,
1700 data, async_data, &timeout, NULL);
1701 if (state == NULL) {
1702 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_TAKEOVER_IP to node %u\n", tmp_ip->pnn));
1703 talloc_free(tmp_ctx);
1707 ctdb_client_async_add(async_data, state);
1709 if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1711 ("Async control CTDB_CONTROL_TAKEOVER_IP failed\n"));
1717 * Tell all nodes to run eventscripts to process the
1718 * "ipreallocated" event. This can do a lot of things,
1719 * including restarting services to reconfigure them if public
1720 * IPs have moved. Once upon a time this event only used to
1723 nodes = list_of_connected_nodes(ctdb, nodemap, tmp_ctx, true);
1724 ret = ctdb_client_async_control(ctdb, CTDB_CONTROL_IPREALLOCATED,
1727 NULL, takeover_run_fail_callback,
1731 ("Async CTDB_CONTROL_IPREALLOCATED control failed\n"));
1735 talloc_free(tmp_ctx);
1739 takeover_run_process_failures(ctdb, takeover_data);
1740 talloc_free(tmp_ctx);
1746 destroy a ctdb_client_ip structure
1748 static int ctdb_client_ip_destructor(struct ctdb_client_ip *ip)
1750 DEBUG(DEBUG_DEBUG,("destroying client tcp for %s:%u (client_id %u)\n",
1751 ctdb_addr_to_str(&ip->addr),
1752 ntohs(ip->addr.ip.sin_port),
1755 DLIST_REMOVE(ip->ctdb->client_ip_list, ip);
1760 called by a client to inform us of a TCP connection that it is managing
1761 that should tickled with an ACK when IP takeover is done
1763 int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id,
1766 struct ctdb_client *client = reqid_find(ctdb->idr, client_id, struct ctdb_client);
1767 struct ctdb_connection *tcp_sock = NULL;
1768 struct ctdb_tcp_list *tcp;
1769 struct ctdb_connection t;
1772 struct ctdb_client_ip *ip;
1773 struct ctdb_vnn *vnn;
1774 ctdb_sock_addr addr;
1776 /* If we don't have public IPs, tickles are useless */
1777 if (ctdb->vnn == NULL) {
1781 tcp_sock = (struct ctdb_connection *)indata.dptr;
1783 addr = tcp_sock->src;
1784 ctdb_canonicalize_ip(&addr, &tcp_sock->src);
1785 addr = tcp_sock->dst;
1786 ctdb_canonicalize_ip(&addr, &tcp_sock->dst);
1789 memcpy(&addr, &tcp_sock->dst, sizeof(addr));
1790 vnn = find_public_ip_vnn(ctdb, &addr);
1792 switch (addr.sa.sa_family) {
1794 if (ntohl(addr.ip.sin_addr.s_addr) != INADDR_LOOPBACK) {
1795 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public address.\n",
1796 ctdb_addr_to_str(&addr)));
1800 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public ipv6 address.\n",
1801 ctdb_addr_to_str(&addr)));
1804 DEBUG(DEBUG_ERR,(__location__ " Unknown family type %d\n", addr.sa.sa_family));
1810 if (vnn->pnn != ctdb->pnn) {
1811 DEBUG(DEBUG_ERR,("Attempt to register tcp client for IP %s we don't hold - failing (client_id %u pid %u)\n",
1812 ctdb_addr_to_str(&addr),
1813 client_id, client->pid));
1814 /* failing this call will tell smbd to die */
1818 ip = talloc(client, struct ctdb_client_ip);
1819 CTDB_NO_MEMORY(ctdb, ip);
1823 ip->client_id = client_id;
1824 talloc_set_destructor(ip, ctdb_client_ip_destructor);
1825 DLIST_ADD(ctdb->client_ip_list, ip);
1827 tcp = talloc(client, struct ctdb_tcp_list);
1828 CTDB_NO_MEMORY(ctdb, tcp);
1830 tcp->connection.src = tcp_sock->src;
1831 tcp->connection.dst = tcp_sock->dst;
1833 DLIST_ADD(client->tcp_list, tcp);
1835 t.src = tcp_sock->src;
1836 t.dst = tcp_sock->dst;
1838 data.dptr = (uint8_t *)&t;
1839 data.dsize = sizeof(t);
1841 switch (addr.sa.sa_family) {
1843 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1844 (unsigned)ntohs(tcp_sock->dst.ip.sin_port),
1845 ctdb_addr_to_str(&tcp_sock->src),
1846 (unsigned)ntohs(tcp_sock->src.ip.sin_port), client_id, client->pid));
1849 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1850 (unsigned)ntohs(tcp_sock->dst.ip6.sin6_port),
1851 ctdb_addr_to_str(&tcp_sock->src),
1852 (unsigned)ntohs(tcp_sock->src.ip6.sin6_port), client_id, client->pid));
1855 DEBUG(DEBUG_ERR,(__location__ " Unknown family %d\n", addr.sa.sa_family));
1859 /* tell all nodes about this tcp connection */
1860 ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0,
1861 CTDB_CONTROL_TCP_ADD,
1862 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
1864 DEBUG(DEBUG_ERR,(__location__ " Failed to send CTDB_CONTROL_TCP_ADD\n"));
1872 find a tcp address on a list
1874 static struct ctdb_connection *ctdb_tcp_find(struct ctdb_tcp_array *array,
1875 struct ctdb_connection *tcp)
1879 if (array == NULL) {
1883 for (i=0;i<array->num;i++) {
1884 if (ctdb_same_sockaddr(&array->connections[i].src, &tcp->src) &&
1885 ctdb_same_sockaddr(&array->connections[i].dst, &tcp->dst)) {
1886 return &array->connections[i];
1895 called by a daemon to inform us of a TCP connection that one of its
1896 clients managing that should tickled with an ACK when IP takeover is
1899 int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata, bool tcp_update_needed)
1901 struct ctdb_connection *p = (struct ctdb_connection *)indata.dptr;
1902 struct ctdb_tcp_array *tcparray;
1903 struct ctdb_connection tcp;
1904 struct ctdb_vnn *vnn;
1906 /* If we don't have public IPs, tickles are useless */
1907 if (ctdb->vnn == NULL) {
1911 vnn = find_public_ip_vnn(ctdb, &p->dst);
1913 DEBUG(DEBUG_INFO,(__location__ " got TCP_ADD control for an address which is not a public address '%s'\n",
1914 ctdb_addr_to_str(&p->dst)));
1920 tcparray = vnn->tcp_array;
1922 /* If this is the first tickle */
1923 if (tcparray == NULL) {
1924 tcparray = talloc(vnn, struct ctdb_tcp_array);
1925 CTDB_NO_MEMORY(ctdb, tcparray);
1926 vnn->tcp_array = tcparray;
1929 tcparray->connections = talloc_size(tcparray, sizeof(struct ctdb_connection));
1930 CTDB_NO_MEMORY(ctdb, tcparray->connections);
1932 tcparray->connections[tcparray->num].src = p->src;
1933 tcparray->connections[tcparray->num].dst = p->dst;
1936 if (tcp_update_needed) {
1937 vnn->tcp_update_needed = true;
1943 /* Do we already have this tickle ?*/
1946 if (ctdb_tcp_find(tcparray, &tcp) != NULL) {
1947 DEBUG(DEBUG_DEBUG,("Already had tickle info for %s:%u for vnn:%u\n",
1948 ctdb_addr_to_str(&tcp.dst),
1949 ntohs(tcp.dst.ip.sin_port),
1954 /* A new tickle, we must add it to the array */
1955 tcparray->connections = talloc_realloc(tcparray, tcparray->connections,
1956 struct ctdb_connection,
1958 CTDB_NO_MEMORY(ctdb, tcparray->connections);
1960 tcparray->connections[tcparray->num].src = p->src;
1961 tcparray->connections[tcparray->num].dst = p->dst;
1964 DEBUG(DEBUG_INFO,("Added tickle info for %s:%u from vnn %u\n",
1965 ctdb_addr_to_str(&tcp.dst),
1966 ntohs(tcp.dst.ip.sin_port),
1969 if (tcp_update_needed) {
1970 vnn->tcp_update_needed = true;
1977 static void ctdb_remove_connection(struct ctdb_vnn *vnn, struct ctdb_connection *conn)
1979 struct ctdb_connection *tcpp;
1985 /* if the array is empty we cant remove it
1986 and we don't need to do anything
1988 if (vnn->tcp_array == NULL) {
1989 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist (array is empty) %s:%u\n",
1990 ctdb_addr_to_str(&conn->dst),
1991 ntohs(conn->dst.ip.sin_port)));
1996 /* See if we know this connection
1997 if we don't know this connection then we dont need to do anything
1999 tcpp = ctdb_tcp_find(vnn->tcp_array, conn);
2001 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist %s:%u\n",
2002 ctdb_addr_to_str(&conn->dst),
2003 ntohs(conn->dst.ip.sin_port)));
2008 /* We need to remove this entry from the array.
2009 Instead of allocating a new array and copying data to it
2010 we cheat and just copy the last entry in the existing array
2011 to the entry that is to be removed and just shring the
2014 *tcpp = vnn->tcp_array->connections[vnn->tcp_array->num - 1];
2015 vnn->tcp_array->num--;
2017 /* If we deleted the last entry we also need to remove the entire array
2019 if (vnn->tcp_array->num == 0) {
2020 talloc_free(vnn->tcp_array);
2021 vnn->tcp_array = NULL;
2024 vnn->tcp_update_needed = true;
2026 DEBUG(DEBUG_INFO,("Removed tickle info for %s:%u\n",
2027 ctdb_addr_to_str(&conn->src),
2028 ntohs(conn->src.ip.sin_port)));
2033 called by a daemon to inform us of a TCP connection that one of its
2034 clients used are no longer needed in the tickle database
2036 int32_t ctdb_control_tcp_remove(struct ctdb_context *ctdb, TDB_DATA indata)
2038 struct ctdb_vnn *vnn;
2039 struct ctdb_connection *conn = (struct ctdb_connection *)indata.dptr;
2041 /* If we don't have public IPs, tickles are useless */
2042 if (ctdb->vnn == NULL) {
2046 vnn = find_public_ip_vnn(ctdb, &conn->dst);
2049 (__location__ " unable to find public address %s\n",
2050 ctdb_addr_to_str(&conn->dst)));
2054 ctdb_remove_connection(vnn, conn);
2061 Called when another daemon starts - causes all tickles for all
2062 public addresses we are serving to be sent to the new node on the
2063 next check. This actually causes the next scheduled call to
2064 tdb_update_tcp_tickles() to update all nodes. This is simple and
2065 doesn't require careful error handling.
2067 int32_t ctdb_control_startup(struct ctdb_context *ctdb, uint32_t pnn)
2069 struct ctdb_vnn *vnn;
2071 DEBUG(DEBUG_INFO, ("Received startup control from node %lu\n",
2072 (unsigned long) pnn));
2074 for (vnn = ctdb->vnn; vnn != NULL; vnn = vnn->next) {
2075 vnn->tcp_update_needed = true;
2083 called when a client structure goes away - hook to remove
2084 elements from the tcp_list in all daemons
2086 void ctdb_takeover_client_destructor_hook(struct ctdb_client *client)
2088 while (client->tcp_list) {
2089 struct ctdb_vnn *vnn;
2090 struct ctdb_tcp_list *tcp = client->tcp_list;
2091 struct ctdb_connection *conn = &tcp->connection;
2093 DLIST_REMOVE(client->tcp_list, tcp);
2095 vnn = find_public_ip_vnn(client->ctdb,
2099 (__location__ " unable to find public address %s\n",
2100 ctdb_addr_to_str(&conn->dst)));
2104 /* If the IP address is hosted on this node then
2105 * remove the connection. */
2106 if (vnn->pnn == client->ctdb->pnn) {
2107 ctdb_remove_connection(vnn, conn);
2110 /* Otherwise this function has been called because the
2111 * server IP address has been released to another node
2112 * and the client has exited. This means that we
2113 * should not delete the connection information. The
2114 * takeover node processes connections too. */
2119 void ctdb_release_all_ips(struct ctdb_context *ctdb)
2121 struct ctdb_vnn *vnn, *next;
2124 if (ctdb->tunable.disable_ip_failover == 1) {
2128 for (vnn = ctdb->vnn; vnn != NULL; vnn = next) {
2129 /* vnn can be freed below in release_ip_post() */
2132 if (!ctdb_sys_have_ip(&vnn->public_address)) {
2133 ctdb_vnn_unassign_iface(ctdb, vnn);
2137 /* Don't allow multiple releases at once. Some code,
2138 * particularly ctdb_tickle_sentenced_connections() is
2140 if (vnn->update_in_flight) {
2141 DEBUG(DEBUG_WARNING,
2143 " Not releasing IP %s/%u on interface %s, an update is already in progess\n",
2144 ctdb_addr_to_str(&vnn->public_address),
2145 vnn->public_netmask_bits,
2146 ctdb_vnn_iface_string(vnn)));
2149 vnn->update_in_flight = true;
2151 DEBUG(DEBUG_INFO,("Release of IP %s/%u on interface %s node:-1\n",
2152 ctdb_addr_to_str(&vnn->public_address),
2153 vnn->public_netmask_bits,
2154 ctdb_vnn_iface_string(vnn)));
2156 ctdb_event_script_args(ctdb, CTDB_EVENT_RELEASE_IP, "%s %s %u",
2157 ctdb_vnn_iface_string(vnn),
2158 ctdb_addr_to_str(&vnn->public_address),
2159 vnn->public_netmask_bits);
2160 /* releaseip timeouts are converted to success, so to
2161 * detect failures just check if the IP address is
2164 if (ctdb_sys_have_ip(&vnn->public_address)) {
2167 " IP address %s not released\n",
2168 ctdb_addr_to_str(&vnn->public_address)));
2169 vnn->update_in_flight = false;
2173 vnn = release_ip_post(ctdb, vnn, &vnn->public_address);
2175 vnn->update_in_flight = false;
2180 DEBUG(DEBUG_NOTICE,(__location__ " Released %d public IPs\n", count));
2185 get list of public IPs
2187 int32_t ctdb_control_get_public_ips(struct ctdb_context *ctdb,
2188 struct ctdb_req_control_old *c, TDB_DATA *outdata)
2191 struct ctdb_public_ip_list_old *ips;
2192 struct ctdb_vnn *vnn;
2193 bool only_available = false;
2195 if (c->flags & CTDB_PUBLIC_IP_FLAGS_ONLY_AVAILABLE) {
2196 only_available = true;
2199 /* count how many public ip structures we have */
2201 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2205 len = offsetof(struct ctdb_public_ip_list_old, ips) +
2206 num*sizeof(struct ctdb_public_ip);
2207 ips = talloc_zero_size(outdata, len);
2208 CTDB_NO_MEMORY(ctdb, ips);
2211 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2212 if (only_available && !ctdb_vnn_available(ctdb, vnn)) {
2215 ips->ips[i].pnn = vnn->pnn;
2216 ips->ips[i].addr = vnn->public_address;
2220 len = offsetof(struct ctdb_public_ip_list_old, ips) +
2221 i*sizeof(struct ctdb_public_ip);
2223 outdata->dsize = len;
2224 outdata->dptr = (uint8_t *)ips;
2230 int32_t ctdb_control_get_public_ip_info(struct ctdb_context *ctdb,
2231 struct ctdb_req_control_old *c,
2236 ctdb_sock_addr *addr;
2237 struct ctdb_public_ip_info_old *info;
2238 struct ctdb_vnn *vnn;
2240 addr = (ctdb_sock_addr *)indata.dptr;
2242 vnn = find_public_ip_vnn(ctdb, addr);
2244 DEBUG(DEBUG_ERR,(__location__ " Could not get public ip info, "
2245 "'%s'not a public address\n",
2246 ctdb_addr_to_str(addr)));
2250 /* count how many public ip structures we have */
2252 for (;vnn->ifaces[num];) {
2256 len = offsetof(struct ctdb_public_ip_info_old, ifaces) +
2257 num*sizeof(struct ctdb_iface);
2258 info = talloc_zero_size(outdata, len);
2259 CTDB_NO_MEMORY(ctdb, info);
2261 info->ip.addr = vnn->public_address;
2262 info->ip.pnn = vnn->pnn;
2263 info->active_idx = 0xFFFFFFFF;
2265 for (i=0; vnn->ifaces[i]; i++) {
2266 struct ctdb_interface *cur;
2268 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
2270 DEBUG(DEBUG_CRIT, (__location__ " internal error iface[%s] unknown\n",
2274 if (vnn->iface == cur) {
2275 info->active_idx = i;
2277 strncpy(info->ifaces[i].name, cur->name,
2278 sizeof(info->ifaces[i].name));
2279 info->ifaces[i].name[sizeof(info->ifaces[i].name)-1] = '\0';
2280 info->ifaces[i].link_state = cur->link_up;
2281 info->ifaces[i].references = cur->references;
2284 len = offsetof(struct ctdb_public_ip_info_old, ifaces) +
2285 i*sizeof(struct ctdb_iface);
2287 outdata->dsize = len;
2288 outdata->dptr = (uint8_t *)info;
2293 int32_t ctdb_control_get_ifaces(struct ctdb_context *ctdb,
2294 struct ctdb_req_control_old *c,
2298 struct ctdb_iface_list_old *ifaces;
2299 struct ctdb_interface *cur;
2301 /* count how many public ip structures we have */
2303 for (cur=ctdb->ifaces;cur;cur=cur->next) {
2307 len = offsetof(struct ctdb_iface_list_old, ifaces) +
2308 num*sizeof(struct ctdb_iface);
2309 ifaces = talloc_zero_size(outdata, len);
2310 CTDB_NO_MEMORY(ctdb, ifaces);
2313 for (cur=ctdb->ifaces;cur;cur=cur->next) {
2314 strncpy(ifaces->ifaces[i].name, cur->name,
2315 sizeof(ifaces->ifaces[i].name));
2316 ifaces->ifaces[i].name[sizeof(ifaces->ifaces[i].name)-1] = '\0';
2317 ifaces->ifaces[i].link_state = cur->link_up;
2318 ifaces->ifaces[i].references = cur->references;
2322 len = offsetof(struct ctdb_iface_list_old, ifaces) +
2323 i*sizeof(struct ctdb_iface);
2325 outdata->dsize = len;
2326 outdata->dptr = (uint8_t *)ifaces;
2331 int32_t ctdb_control_set_iface_link(struct ctdb_context *ctdb,
2332 struct ctdb_req_control_old *c,
2335 struct ctdb_iface *info;
2336 struct ctdb_interface *iface;
2337 bool link_up = false;
2339 info = (struct ctdb_iface *)indata.dptr;
2341 if (info->name[CTDB_IFACE_SIZE] != '\0') {
2342 int len = strnlen(info->name, CTDB_IFACE_SIZE);
2343 DEBUG(DEBUG_ERR, (__location__ " name[%*.*s] not terminated\n",
2344 len, len, info->name));
2348 switch (info->link_state) {
2356 DEBUG(DEBUG_ERR, (__location__ " link_state[%u] invalid\n",
2357 (unsigned int)info->link_state));
2361 if (info->references != 0) {
2362 DEBUG(DEBUG_ERR, (__location__ " references[%u] should be 0\n",
2363 (unsigned int)info->references));
2367 iface = ctdb_find_iface(ctdb, info->name);
2368 if (iface == NULL) {
2372 if (link_up == iface->link_up) {
2377 ("iface[%s] has changed it's link status %s => %s\n",
2379 iface->link_up?"up":"down",
2380 link_up?"up":"down"));
2382 iface->link_up = link_up;
2388 called by a daemon to inform us of the entire list of TCP tickles for
2389 a particular public address.
2390 this control should only be sent by the node that is currently serving
2391 that public address.
2393 int32_t ctdb_control_set_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata)
2395 struct ctdb_tickle_list_old *list = (struct ctdb_tickle_list_old *)indata.dptr;
2396 struct ctdb_tcp_array *tcparray;
2397 struct ctdb_vnn *vnn;
2399 /* We must at least have tickles.num or else we cant verify the size
2400 of the received data blob
2402 if (indata.dsize < offsetof(struct ctdb_tickle_list_old, connections)) {
2403 DEBUG(DEBUG_ERR,("Bad indata in ctdb_tickle_list. Not enough data for the tickle.num field\n"));
2407 /* verify that the size of data matches what we expect */
2408 if (indata.dsize < offsetof(struct ctdb_tickle_list_old, connections)
2409 + sizeof(struct ctdb_connection) * list->num) {
2410 DEBUG(DEBUG_ERR,("Bad indata in ctdb_tickle_list\n"));
2414 DEBUG(DEBUG_INFO, ("Received tickle update for public address %s\n",
2415 ctdb_addr_to_str(&list->addr)));
2417 vnn = find_public_ip_vnn(ctdb, &list->addr);
2419 DEBUG(DEBUG_INFO,(__location__ " Could not set tcp tickle list, '%s' is not a public address\n",
2420 ctdb_addr_to_str(&list->addr)));
2425 if (vnn->pnn == ctdb->pnn) {
2427 ("Ignoring redundant set tcp tickle list, this node hosts '%s'\n",
2428 ctdb_addr_to_str(&list->addr)));
2432 /* remove any old ticklelist we might have */
2433 talloc_free(vnn->tcp_array);
2434 vnn->tcp_array = NULL;
2436 tcparray = talloc(vnn, struct ctdb_tcp_array);
2437 CTDB_NO_MEMORY(ctdb, tcparray);
2439 tcparray->num = list->num;
2441 tcparray->connections = talloc_array(tcparray, struct ctdb_connection, tcparray->num);
2442 CTDB_NO_MEMORY(ctdb, tcparray->connections);
2444 memcpy(tcparray->connections, &list->connections[0],
2445 sizeof(struct ctdb_connection)*tcparray->num);
2447 /* We now have a new fresh tickle list array for this vnn */
2448 vnn->tcp_array = tcparray;
2454 called to return the full list of tickles for the puclic address associated
2455 with the provided vnn
2457 int32_t ctdb_control_get_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata)
2459 ctdb_sock_addr *addr = (ctdb_sock_addr *)indata.dptr;
2460 struct ctdb_tickle_list_old *list;
2461 struct ctdb_tcp_array *tcparray;
2463 struct ctdb_vnn *vnn;
2466 vnn = find_public_ip_vnn(ctdb, addr);
2468 DEBUG(DEBUG_ERR,(__location__ " Could not get tcp tickle list, '%s' is not a public address\n",
2469 ctdb_addr_to_str(addr)));
2474 port = ctdb_addr_to_port(addr);
2476 tcparray = vnn->tcp_array;
2478 if (tcparray != NULL) {
2480 /* All connections */
2481 num = tcparray->num;
2483 /* Count connections for port */
2484 for (i = 0; i < tcparray->num; i++) {
2485 if (port == ctdb_addr_to_port(&tcparray->connections[i].dst)) {
2492 outdata->dsize = offsetof(struct ctdb_tickle_list_old, connections)
2493 + sizeof(struct ctdb_connection) * num;
2495 outdata->dptr = talloc_size(outdata, outdata->dsize);
2496 CTDB_NO_MEMORY(ctdb, outdata->dptr);
2497 list = (struct ctdb_tickle_list_old *)outdata->dptr;
2507 for (i = 0; i < tcparray->num; i++) {
2509 port == ctdb_addr_to_port(&tcparray->connections[i].dst)) {
2510 list->connections[num] = tcparray->connections[i];
2520 set the list of all tcp tickles for a public address
2522 static int ctdb_send_set_tcp_tickles_for_ip(struct ctdb_context *ctdb,
2523 ctdb_sock_addr *addr,
2524 struct ctdb_tcp_array *tcparray)
2528 struct ctdb_tickle_list_old *list;
2531 num = tcparray->num;
2536 data.dsize = offsetof(struct ctdb_tickle_list_old, connections) +
2537 sizeof(struct ctdb_connection) * num;
2538 data.dptr = talloc_size(ctdb, data.dsize);
2539 CTDB_NO_MEMORY(ctdb, data.dptr);
2541 list = (struct ctdb_tickle_list_old *)data.dptr;
2545 memcpy(&list->connections[0], tcparray->connections, sizeof(struct ctdb_connection) * num);
2548 ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_ALL, 0,
2549 CTDB_CONTROL_SET_TCP_TICKLE_LIST,
2550 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
2552 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set tcp tickles failed\n"));
2556 talloc_free(data.dptr);
2563 perform tickle updates if required
2565 static void ctdb_update_tcp_tickles(struct tevent_context *ev,
2566 struct tevent_timer *te,
2567 struct timeval t, void *private_data)
2569 struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
2571 struct ctdb_vnn *vnn;
2573 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2574 /* we only send out updates for public addresses that
2577 if (ctdb->pnn != vnn->pnn) {
2580 /* We only send out the updates if we need to */
2581 if (!vnn->tcp_update_needed) {
2584 ret = ctdb_send_set_tcp_tickles_for_ip(ctdb,
2585 &vnn->public_address,
2588 DEBUG(DEBUG_ERR,("Failed to send the tickle update for public address %s\n",
2589 ctdb_addr_to_str(&vnn->public_address)));
2592 ("Sent tickle update for public address %s\n",
2593 ctdb_addr_to_str(&vnn->public_address)));
2594 vnn->tcp_update_needed = false;
2598 tevent_add_timer(ctdb->ev, ctdb->tickle_update_context,
2599 timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
2600 ctdb_update_tcp_tickles, ctdb);
2604 start periodic update of tcp tickles
2606 void ctdb_start_tcp_tickle_update(struct ctdb_context *ctdb)
2608 ctdb->tickle_update_context = talloc_new(ctdb);
2610 tevent_add_timer(ctdb->ev, ctdb->tickle_update_context,
2611 timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
2612 ctdb_update_tcp_tickles, ctdb);
2618 struct control_gratious_arp {
2619 struct ctdb_context *ctdb;
2620 ctdb_sock_addr addr;
2626 send a control_gratuitous arp
2628 static void send_gratious_arp(struct tevent_context *ev,
2629 struct tevent_timer *te,
2630 struct timeval t, void *private_data)
2633 struct control_gratious_arp *arp = talloc_get_type(private_data,
2634 struct control_gratious_arp);
2636 ret = ctdb_sys_send_arp(&arp->addr, arp->iface);
2638 DEBUG(DEBUG_ERR,(__location__ " sending of gratious arp on iface '%s' failed (%s)\n",
2639 arp->iface, strerror(errno)));
2644 if (arp->count == CTDB_ARP_REPEAT) {
2649 tevent_add_timer(arp->ctdb->ev, arp,
2650 timeval_current_ofs(CTDB_ARP_INTERVAL, 0),
2651 send_gratious_arp, arp);
2658 int32_t ctdb_control_send_gratious_arp(struct ctdb_context *ctdb, TDB_DATA indata)
2660 struct ctdb_addr_info_old *gratious_arp = (struct ctdb_addr_info_old *)indata.dptr;
2661 struct control_gratious_arp *arp;
2663 /* verify the size of indata */
2664 if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
2665 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_gratious_arp structure. Got %u require %u bytes\n",
2666 (unsigned)indata.dsize,
2667 (unsigned)offsetof(struct ctdb_addr_info_old, iface)));
2671 ( offsetof(struct ctdb_addr_info_old, iface)
2672 + gratious_arp->len ) ){
2674 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2675 "but should be %u bytes\n",
2676 (unsigned)indata.dsize,
2677 (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+gratious_arp->len)));
2682 arp = talloc(ctdb, struct control_gratious_arp);
2683 CTDB_NO_MEMORY(ctdb, arp);
2686 arp->addr = gratious_arp->addr;
2687 arp->iface = talloc_strdup(arp, gratious_arp->iface);
2688 CTDB_NO_MEMORY(ctdb, arp->iface);
2691 tevent_add_timer(arp->ctdb->ev, arp,
2692 timeval_zero(), send_gratious_arp, arp);
2697 int32_t ctdb_control_add_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2699 struct ctdb_addr_info_old *pub = (struct ctdb_addr_info_old *)indata.dptr;
2702 /* verify the size of indata */
2703 if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
2704 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_addr_info structure\n"));
2708 ( offsetof(struct ctdb_addr_info_old, iface)
2711 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2712 "but should be %u bytes\n",
2713 (unsigned)indata.dsize,
2714 (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+pub->len)));
2718 DEBUG(DEBUG_NOTICE,("Add IP %s\n", ctdb_addr_to_str(&pub->addr)));
2720 ret = ctdb_add_public_address(ctdb, &pub->addr, pub->mask, &pub->iface[0], true);
2723 DEBUG(DEBUG_ERR,(__location__ " Failed to add public address\n"));
2730 int32_t ctdb_control_del_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2732 struct ctdb_addr_info_old *pub = (struct ctdb_addr_info_old *)indata.dptr;
2733 struct ctdb_vnn *vnn;
2735 /* verify the size of indata */
2736 if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
2737 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_addr_info structure\n"));
2741 ( offsetof(struct ctdb_addr_info_old, iface)
2744 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2745 "but should be %u bytes\n",
2746 (unsigned)indata.dsize,
2747 (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+pub->len)));
2751 DEBUG(DEBUG_NOTICE,("Delete IP %s\n", ctdb_addr_to_str(&pub->addr)));
2753 /* walk over all public addresses until we find a match */
2754 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2755 if (ctdb_same_ip(&vnn->public_address, &pub->addr)) {
2756 if (vnn->pnn == ctdb->pnn) {
2757 /* This IP is currently being hosted.
2758 * Defer the deletion until the next
2759 * takeover run. "ctdb reloadips" will
2760 * always cause a takeover run. "ctdb
2761 * delip" will now need an explicit
2762 * "ctdb ipreallocated" afterwards. */
2763 vnn->delete_pending = true;
2765 /* This IP is not hosted on the
2766 * current node so just delete it
2768 do_delete_ip(ctdb, vnn);
2775 DEBUG(DEBUG_ERR,("Delete IP of unknown public IP address %s\n",
2776 ctdb_addr_to_str(&pub->addr)));
2781 struct ipreallocated_callback_state {
2782 struct ctdb_req_control_old *c;
2785 static void ctdb_ipreallocated_callback(struct ctdb_context *ctdb,
2786 int status, void *p)
2788 struct ipreallocated_callback_state *state =
2789 talloc_get_type(p, struct ipreallocated_callback_state);
2793 (" \"ipreallocated\" event script failed (status %d)\n",
2795 if (status == -ETIME) {
2796 ctdb_ban_self(ctdb);
2800 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
2804 /* A control to run the ipreallocated event */
2805 int32_t ctdb_control_ipreallocated(struct ctdb_context *ctdb,
2806 struct ctdb_req_control_old *c,
2810 struct ipreallocated_callback_state *state;
2812 state = talloc(ctdb, struct ipreallocated_callback_state);
2813 CTDB_NO_MEMORY(ctdb, state);
2815 DEBUG(DEBUG_INFO,(__location__ " Running \"ipreallocated\" event\n"));
2817 ret = ctdb_event_script_callback(ctdb, state,
2818 ctdb_ipreallocated_callback, state,
2819 CTDB_EVENT_IPREALLOCATED,
2823 DEBUG(DEBUG_ERR,("Failed to run \"ipreallocated\" event \n"));
2828 /* tell the control that we will be reply asynchronously */
2829 state->c = talloc_steal(state, c);
2830 *async_reply = true;
2836 struct ctdb_reloadips_handle {
2837 struct ctdb_context *ctdb;
2838 struct ctdb_req_control_old *c;
2842 struct tevent_fd *fde;
2845 static int ctdb_reloadips_destructor(struct ctdb_reloadips_handle *h)
2847 if (h == h->ctdb->reload_ips) {
2848 h->ctdb->reload_ips = NULL;
2851 ctdb_request_control_reply(h->ctdb, h->c, NULL, h->status, NULL);
2854 ctdb_kill(h->ctdb, h->child, SIGKILL);
2858 static void ctdb_reloadips_timeout_event(struct tevent_context *ev,
2859 struct tevent_timer *te,
2860 struct timeval t, void *private_data)
2862 struct ctdb_reloadips_handle *h = talloc_get_type(private_data, struct ctdb_reloadips_handle);
2867 static void ctdb_reloadips_child_handler(struct tevent_context *ev,
2868 struct tevent_fd *fde,
2869 uint16_t flags, void *private_data)
2871 struct ctdb_reloadips_handle *h = talloc_get_type(private_data, struct ctdb_reloadips_handle);
2876 ret = sys_read(h->fd[0], &res, 1);
2877 if (ret < 1 || res != 0) {
2878 DEBUG(DEBUG_ERR, (__location__ " Reloadips child process returned error\n"));
2886 static int ctdb_reloadips_child(struct ctdb_context *ctdb)
2888 TALLOC_CTX *mem_ctx = talloc_new(NULL);
2889 struct ctdb_public_ip_list_old *ips;
2890 struct ctdb_vnn *vnn;
2891 struct client_async_data *async_data;
2892 struct timeval timeout;
2894 struct ctdb_client_control_state *state;
2898 CTDB_NO_MEMORY(ctdb, mem_ctx);
2900 /* Read IPs from local node */
2901 ret = ctdb_ctrl_get_public_ips(ctdb, TAKEOVER_TIMEOUT(),
2902 CTDB_CURRENT_NODE, mem_ctx, &ips);
2905 ("Unable to fetch public IPs from local node\n"));
2906 talloc_free(mem_ctx);
2910 /* Read IPs file - this is safe since this is a child process */
2912 if (ctdb_set_public_addresses(ctdb, false) != 0) {
2913 DEBUG(DEBUG_ERR,("Failed to re-read public addresses file\n"));
2914 talloc_free(mem_ctx);
2918 async_data = talloc_zero(mem_ctx, struct client_async_data);
2919 CTDB_NO_MEMORY(ctdb, async_data);
2921 /* Compare IPs between node and file for IPs to be deleted */
2922 for (i = 0; i < ips->num; i++) {
2924 for (vnn = ctdb->vnn; vnn; vnn = vnn->next) {
2925 if (ctdb_same_ip(&vnn->public_address,
2926 &ips->ips[i].addr)) {
2927 /* IP is still in file */
2933 /* Delete IP ips->ips[i] */
2934 struct ctdb_addr_info_old *pub;
2937 ("IP %s no longer configured, deleting it\n",
2938 ctdb_addr_to_str(&ips->ips[i].addr)));
2940 pub = talloc_zero(mem_ctx, struct ctdb_addr_info_old);
2941 CTDB_NO_MEMORY(ctdb, pub);
2943 pub->addr = ips->ips[i].addr;
2947 timeout = TAKEOVER_TIMEOUT();
2949 data.dsize = offsetof(struct ctdb_addr_info_old,
2951 data.dptr = (uint8_t *)pub;
2953 state = ctdb_control_send(ctdb, CTDB_CURRENT_NODE, 0,
2954 CTDB_CONTROL_DEL_PUBLIC_IP,
2955 0, data, async_data,
2957 if (state == NULL) {
2960 " failed sending CTDB_CONTROL_DEL_PUBLIC_IP\n"));
2964 ctdb_client_async_add(async_data, state);
2968 /* Compare IPs between node and file for IPs to be added */
2970 for (vnn = ctdb->vnn; vnn; vnn = vnn->next) {
2971 for (i = 0; i < ips->num; i++) {
2972 if (ctdb_same_ip(&vnn->public_address,
2973 &ips->ips[i].addr)) {
2974 /* IP already on node */
2978 if (i == ips->num) {
2979 /* Add IP ips->ips[i] */
2980 struct ctdb_addr_info_old *pub;
2981 const char *ifaces = NULL;
2986 ("New IP %s configured, adding it\n",
2987 ctdb_addr_to_str(&vnn->public_address)));
2989 uint32_t pnn = ctdb_get_pnn(ctdb);
2991 data.dsize = sizeof(pnn);
2992 data.dptr = (uint8_t *)&pnn;
2994 ret = ctdb_client_send_message(
2996 CTDB_BROADCAST_CONNECTED,
2997 CTDB_SRVID_REBALANCE_NODE,
3000 DEBUG(DEBUG_WARNING,
3001 ("Failed to send message to force node reallocation - IPs may be unbalanced\n"));
3007 ifaces = vnn->ifaces[0];
3009 while (vnn->ifaces[iface] != NULL) {
3010 ifaces = talloc_asprintf(vnn, "%s,%s", ifaces,
3011 vnn->ifaces[iface]);
3015 len = strlen(ifaces) + 1;
3016 pub = talloc_zero_size(mem_ctx,
3017 offsetof(struct ctdb_addr_info_old, iface) + len);
3018 CTDB_NO_MEMORY(ctdb, pub);
3020 pub->addr = vnn->public_address;
3021 pub->mask = vnn->public_netmask_bits;
3023 memcpy(&pub->iface[0], ifaces, pub->len);
3025 timeout = TAKEOVER_TIMEOUT();
3027 data.dsize = offsetof(struct ctdb_addr_info_old,
3029 data.dptr = (uint8_t *)pub;
3031 state = ctdb_control_send(ctdb, CTDB_CURRENT_NODE, 0,
3032 CTDB_CONTROL_ADD_PUBLIC_IP,
3033 0, data, async_data,
3035 if (state == NULL) {
3038 " failed sending CTDB_CONTROL_ADD_PUBLIC_IP\n"));
3042 ctdb_client_async_add(async_data, state);
3046 if (ctdb_client_async_wait(ctdb, async_data) != 0) {
3047 DEBUG(DEBUG_ERR,(__location__ " Add/delete IPs failed\n"));
3051 talloc_free(mem_ctx);
3055 talloc_free(mem_ctx);
3059 /* This control is sent to force the node to re-read the public addresses file
3060 and drop any addresses we should nnot longer host, and add new addresses
3061 that we are now able to host
3063 int32_t ctdb_control_reload_public_ips(struct ctdb_context *ctdb, struct ctdb_req_control_old *c, bool *async_reply)
3065 struct ctdb_reloadips_handle *h;
3066 pid_t parent = getpid();
3068 if (ctdb->reload_ips != NULL) {
3069 talloc_free(ctdb->reload_ips);
3070 ctdb->reload_ips = NULL;
3073 h = talloc(ctdb, struct ctdb_reloadips_handle);
3074 CTDB_NO_MEMORY(ctdb, h);
3079 if (pipe(h->fd) == -1) {
3080 DEBUG(DEBUG_ERR,("Failed to create pipe for ctdb_freeze_lock\n"));
3085 h->child = ctdb_fork(ctdb);
3086 if (h->child == (pid_t)-1) {
3087 DEBUG(DEBUG_ERR, ("Failed to fork a child for reloadips\n"));
3095 if (h->child == 0) {
3096 signed char res = 0;
3099 debug_extra = talloc_asprintf(NULL, "reloadips:");
3101 prctl_set_comment("ctdb_reloadips");
3102 if (switch_from_server_to_client(ctdb, "reloadips-child") != 0) {
3103 DEBUG(DEBUG_CRIT,("ERROR: Failed to switch reloadips child into client mode\n"));
3106 res = ctdb_reloadips_child(ctdb);
3108 DEBUG(DEBUG_ERR,("Failed to reload ips on local node\n"));
3112 sys_write(h->fd[1], &res, 1);
3113 ctdb_wait_for_process_to_exit(parent);
3117 h->c = talloc_steal(h, c);
3120 set_close_on_exec(h->fd[0]);
3122 talloc_set_destructor(h, ctdb_reloadips_destructor);
3125 h->fde = tevent_add_fd(ctdb->ev, h, h->fd[0], TEVENT_FD_READ,
3126 ctdb_reloadips_child_handler, (void *)h);
3127 tevent_fd_set_auto_close(h->fde);
3129 tevent_add_timer(ctdb->ev, h, timeval_current_ofs(120, 0),
3130 ctdb_reloadips_timeout_event, h);
3132 /* we reply later */
3133 *async_reply = true;