4 Copyright (C) Ronnie Sahlberg 2007
5 Copyright (C) Andrew Tridgell 2007
6 Copyright (C) Martin Schwenke 2011
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, see <http://www.gnu.org/licenses/>.
22 #include "system/network.h"
23 #include "system/filesys.h"
24 #include "system/time.h"
25 #include "system/wait.h"
30 #include "lib/util/dlinklist.h"
31 #include "lib/util/debug.h"
32 #include "lib/util/samba_util.h"
33 #include "lib/util/util_process.h"
35 #include "ctdb_private.h"
36 #include "ctdb_client.h"
38 #include "common/rb_tree.h"
39 #include "common/reqid.h"
40 #include "common/system.h"
41 #include "common/common.h"
42 #include "common/logging.h"
44 #include "server/ipalloc.h"
46 #define TAKEOVER_TIMEOUT() timeval_current_ofs(ctdb->tunable.takeover_timeout,0)
48 #define CTDB_ARP_INTERVAL 1
49 #define CTDB_ARP_REPEAT 3
51 struct ctdb_interface {
52 struct ctdb_interface *prev, *next;
58 /* state associated with a public ip address */
60 struct ctdb_vnn *prev, *next;
62 struct ctdb_interface *iface;
64 ctdb_sock_addr public_address;
65 uint8_t public_netmask_bits;
67 /* the node number that is serving this public address, if any.
68 If no node serves this ip it is set to -1 */
71 /* List of clients to tickle for this public address */
72 struct ctdb_tcp_array *tcp_array;
74 /* whether we need to update the other nodes with changes to our list
75 of connected clients */
76 bool tcp_update_needed;
78 /* a context to hang sending gratious arp events off */
79 TALLOC_CTX *takeover_ctx;
81 /* Set to true any time an update to this VNN is in flight.
82 This helps to avoid races. */
83 bool update_in_flight;
85 /* If CTDB_CONTROL_DEL_PUBLIC_IP is received for this IP
86 * address then this flag is set. It will be deleted in the
87 * release IP callback. */
91 static const char *ctdb_vnn_iface_string(const struct ctdb_vnn *vnn)
94 return vnn->iface->name;
100 static int ctdb_add_local_iface(struct ctdb_context *ctdb, const char *iface)
102 struct ctdb_interface *i;
104 if (strlen(iface) > CTDB_IFACE_SIZE) {
105 DEBUG(DEBUG_ERR, ("Interface name too long \"%s\"\n", iface));
109 /* Verify that we don't have an entry for this ip yet */
110 for (i=ctdb->ifaces;i;i=i->next) {
111 if (strcmp(i->name, iface) == 0) {
116 /* create a new structure for this interface */
117 i = talloc_zero(ctdb, struct ctdb_interface);
118 CTDB_NO_MEMORY_FATAL(ctdb, i);
119 i->name = talloc_strdup(i, iface);
120 CTDB_NO_MEMORY(ctdb, i->name);
124 DLIST_ADD(ctdb->ifaces, i);
129 static bool vnn_has_interface_with_name(struct ctdb_vnn *vnn,
134 for (n = 0; vnn->ifaces[n] != NULL; n++) {
135 if (strcmp(name, vnn->ifaces[n]) == 0) {
143 /* If any interfaces now have no possible IPs then delete them. This
144 * implementation is naive (i.e. simple) rather than clever
145 * (i.e. complex). Given that this is run on delip and that operation
146 * is rare, this doesn't need to be efficient - it needs to be
147 * foolproof. One alternative is reference counting, where the logic
148 * is distributed and can, therefore, be broken in multiple places.
149 * Another alternative is to build a red-black tree of interfaces that
150 * can have addresses (by walking ctdb->vnn once) and then walking
151 * ctdb->ifaces once and deleting those not in the tree. Let's go to
152 * one of those if the naive implementation causes problems... :-)
154 static void ctdb_remove_orphaned_ifaces(struct ctdb_context *ctdb,
155 struct ctdb_vnn *vnn)
157 struct ctdb_interface *i, *next;
159 /* For each interface, check if there's an IP using it. */
160 for (i = ctdb->ifaces; i != NULL; i = next) {
165 /* Only consider interfaces named in the given VNN. */
166 if (!vnn_has_interface_with_name(vnn, i->name)) {
170 /* Search for a vnn with this interface. */
172 for (tv=ctdb->vnn; tv; tv=tv->next) {
173 if (vnn_has_interface_with_name(tv, i->name)) {
180 /* None of the VNNs are using this interface. */
181 DLIST_REMOVE(ctdb->ifaces, i);
188 static struct ctdb_interface *ctdb_find_iface(struct ctdb_context *ctdb,
191 struct ctdb_interface *i;
193 for (i=ctdb->ifaces;i;i=i->next) {
194 if (strcmp(i->name, iface) == 0) {
202 static struct ctdb_interface *ctdb_vnn_best_iface(struct ctdb_context *ctdb,
203 struct ctdb_vnn *vnn)
206 struct ctdb_interface *cur = NULL;
207 struct ctdb_interface *best = NULL;
209 for (i=0; vnn->ifaces[i]; i++) {
211 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
225 if (cur->references < best->references) {
234 static int32_t ctdb_vnn_assign_iface(struct ctdb_context *ctdb,
235 struct ctdb_vnn *vnn)
237 struct ctdb_interface *best = NULL;
240 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
241 "still assigned to iface '%s'\n",
242 ctdb_addr_to_str(&vnn->public_address),
243 ctdb_vnn_iface_string(vnn)));
247 best = ctdb_vnn_best_iface(ctdb, vnn);
249 DEBUG(DEBUG_ERR, (__location__ " public address '%s' "
250 "cannot assign to iface any iface\n",
251 ctdb_addr_to_str(&vnn->public_address)));
257 vnn->pnn = ctdb->pnn;
259 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
260 "now assigned to iface '%s' refs[%d]\n",
261 ctdb_addr_to_str(&vnn->public_address),
262 ctdb_vnn_iface_string(vnn),
267 static void ctdb_vnn_unassign_iface(struct ctdb_context *ctdb,
268 struct ctdb_vnn *vnn)
270 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
271 "now unassigned (old iface '%s' refs[%d])\n",
272 ctdb_addr_to_str(&vnn->public_address),
273 ctdb_vnn_iface_string(vnn),
274 vnn->iface?vnn->iface->references:0));
276 vnn->iface->references--;
279 if (vnn->pnn == ctdb->pnn) {
284 static bool ctdb_vnn_available(struct ctdb_context *ctdb,
285 struct ctdb_vnn *vnn)
289 /* Nodes that are not RUNNING can not host IPs */
290 if (ctdb->runstate != CTDB_RUNSTATE_RUNNING) {
294 if (vnn->delete_pending) {
298 if (vnn->iface && vnn->iface->link_up) {
302 for (i=0; vnn->ifaces[i]; i++) {
303 struct ctdb_interface *cur;
305 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
318 struct ctdb_takeover_arp {
319 struct ctdb_context *ctdb;
322 struct ctdb_tcp_array *tcparray;
323 struct ctdb_vnn *vnn;
328 lists of tcp endpoints
330 struct ctdb_tcp_list {
331 struct ctdb_tcp_list *prev, *next;
332 struct ctdb_connection connection;
336 list of clients to kill on IP release
338 struct ctdb_client_ip {
339 struct ctdb_client_ip *prev, *next;
340 struct ctdb_context *ctdb;
347 send a gratuitous arp
349 static void ctdb_control_send_arp(struct tevent_context *ev,
350 struct tevent_timer *te,
351 struct timeval t, void *private_data)
353 struct ctdb_takeover_arp *arp = talloc_get_type(private_data,
354 struct ctdb_takeover_arp);
356 struct ctdb_tcp_array *tcparray;
357 const char *iface = ctdb_vnn_iface_string(arp->vnn);
359 ret = ctdb_sys_send_arp(&arp->addr, iface);
361 DEBUG(DEBUG_CRIT,(__location__ " sending of arp failed on iface '%s' (%s)\n",
362 iface, strerror(errno)));
365 tcparray = arp->tcparray;
367 for (i=0;i<tcparray->num;i++) {
368 struct ctdb_connection *tcon;
370 tcon = &tcparray->connections[i];
371 DEBUG(DEBUG_INFO,("sending tcp tickle ack for %u->%s:%u\n",
372 (unsigned)ntohs(tcon->dst.ip.sin_port),
373 ctdb_addr_to_str(&tcon->src),
374 (unsigned)ntohs(tcon->src.ip.sin_port)));
375 ret = ctdb_sys_send_tcp(
380 DEBUG(DEBUG_CRIT,(__location__ " Failed to send tcp tickle ack for %s\n",
381 ctdb_addr_to_str(&tcon->src)));
388 if (arp->count == CTDB_ARP_REPEAT) {
393 tevent_add_timer(arp->ctdb->ev, arp->vnn->takeover_ctx,
394 timeval_current_ofs(CTDB_ARP_INTERVAL, 100000),
395 ctdb_control_send_arp, arp);
398 static int32_t ctdb_announce_vnn_iface(struct ctdb_context *ctdb,
399 struct ctdb_vnn *vnn)
401 struct ctdb_takeover_arp *arp;
402 struct ctdb_tcp_array *tcparray;
404 if (!vnn->takeover_ctx) {
405 vnn->takeover_ctx = talloc_new(vnn);
406 if (!vnn->takeover_ctx) {
411 arp = talloc_zero(vnn->takeover_ctx, struct ctdb_takeover_arp);
417 arp->addr = vnn->public_address;
420 tcparray = vnn->tcp_array;
422 /* add all of the known tcp connections for this IP to the
423 list of tcp connections to send tickle acks for */
424 arp->tcparray = talloc_steal(arp, tcparray);
426 vnn->tcp_array = NULL;
427 vnn->tcp_update_needed = true;
430 tevent_add_timer(arp->ctdb->ev, vnn->takeover_ctx,
431 timeval_zero(), ctdb_control_send_arp, arp);
436 struct takeover_callback_state {
437 struct ctdb_req_control_old *c;
438 ctdb_sock_addr *addr;
439 struct ctdb_vnn *vnn;
442 struct ctdb_do_takeip_state {
443 struct ctdb_req_control_old *c;
444 struct ctdb_vnn *vnn;
448 called when takeip event finishes
450 static void ctdb_do_takeip_callback(struct ctdb_context *ctdb, int status,
453 struct ctdb_do_takeip_state *state =
454 talloc_get_type(private_data, struct ctdb_do_takeip_state);
459 if (status == -ETIME) {
462 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
463 ctdb_addr_to_str(&state->vnn->public_address),
464 ctdb_vnn_iface_string(state->vnn)));
465 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
471 if (ctdb->do_checkpublicip) {
473 ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
475 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
482 data.dptr = (uint8_t *)ctdb_addr_to_str(&state->vnn->public_address);
483 data.dsize = strlen((char *)data.dptr) + 1;
484 DEBUG(DEBUG_INFO,(__location__ " sending TAKE_IP for '%s'\n", data.dptr));
486 ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_TAKE_IP, data);
489 /* the control succeeded */
490 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
495 static int ctdb_takeip_destructor(struct ctdb_do_takeip_state *state)
497 state->vnn->update_in_flight = false;
502 take over an ip address
504 static int32_t ctdb_do_takeip(struct ctdb_context *ctdb,
505 struct ctdb_req_control_old *c,
506 struct ctdb_vnn *vnn)
509 struct ctdb_do_takeip_state *state;
511 if (vnn->update_in_flight) {
512 DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u rejected "
513 "update for this IP already in flight\n",
514 ctdb_addr_to_str(&vnn->public_address),
515 vnn->public_netmask_bits));
519 ret = ctdb_vnn_assign_iface(ctdb, vnn);
521 DEBUG(DEBUG_ERR,("Takeover of IP %s/%u failed to "
522 "assign a usable interface\n",
523 ctdb_addr_to_str(&vnn->public_address),
524 vnn->public_netmask_bits));
528 state = talloc(vnn, struct ctdb_do_takeip_state);
529 CTDB_NO_MEMORY(ctdb, state);
531 state->c = talloc_steal(ctdb, c);
534 vnn->update_in_flight = true;
535 talloc_set_destructor(state, ctdb_takeip_destructor);
537 DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u on interface %s\n",
538 ctdb_addr_to_str(&vnn->public_address),
539 vnn->public_netmask_bits,
540 ctdb_vnn_iface_string(vnn)));
542 ret = ctdb_event_script_callback(ctdb,
544 ctdb_do_takeip_callback,
548 ctdb_vnn_iface_string(vnn),
549 ctdb_addr_to_str(&vnn->public_address),
550 vnn->public_netmask_bits);
553 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
554 ctdb_addr_to_str(&vnn->public_address),
555 ctdb_vnn_iface_string(vnn)));
563 struct ctdb_do_updateip_state {
564 struct ctdb_req_control_old *c;
565 struct ctdb_interface *old;
566 struct ctdb_vnn *vnn;
570 called when updateip event finishes
572 static void ctdb_do_updateip_callback(struct ctdb_context *ctdb, int status,
575 struct ctdb_do_updateip_state *state =
576 talloc_get_type(private_data, struct ctdb_do_updateip_state);
580 if (status == -ETIME) {
583 DEBUG(DEBUG_ERR,(__location__ " Failed to move IP %s from interface %s to %s\n",
584 ctdb_addr_to_str(&state->vnn->public_address),
586 ctdb_vnn_iface_string(state->vnn)));
589 * All we can do is reset the old interface
590 * and let the next run fix it
592 ctdb_vnn_unassign_iface(ctdb, state->vnn);
593 state->vnn->iface = state->old;
594 state->vnn->iface->references++;
596 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
601 if (ctdb->do_checkpublicip) {
603 ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
605 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
612 /* the control succeeded */
613 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
618 static int ctdb_updateip_destructor(struct ctdb_do_updateip_state *state)
620 state->vnn->update_in_flight = false;
625 update (move) an ip address
627 static int32_t ctdb_do_updateip(struct ctdb_context *ctdb,
628 struct ctdb_req_control_old *c,
629 struct ctdb_vnn *vnn)
632 struct ctdb_do_updateip_state *state;
633 struct ctdb_interface *old = vnn->iface;
634 const char *new_name;
636 if (vnn->update_in_flight) {
637 DEBUG(DEBUG_NOTICE,("Update of IP %s/%u rejected "
638 "update for this IP already in flight\n",
639 ctdb_addr_to_str(&vnn->public_address),
640 vnn->public_netmask_bits));
644 ctdb_vnn_unassign_iface(ctdb, vnn);
645 ret = ctdb_vnn_assign_iface(ctdb, vnn);
647 DEBUG(DEBUG_ERR,("update of IP %s/%u failed to "
648 "assin a usable interface (old iface '%s')\n",
649 ctdb_addr_to_str(&vnn->public_address),
650 vnn->public_netmask_bits,
655 new_name = ctdb_vnn_iface_string(vnn);
656 if (old->name != NULL && new_name != NULL && !strcmp(old->name, new_name)) {
657 /* A benign update from one interface onto itself.
658 * no need to run the eventscripts in this case, just return
661 ctdb_request_control_reply(ctdb, c, NULL, 0, NULL);
665 state = talloc(vnn, struct ctdb_do_updateip_state);
666 CTDB_NO_MEMORY(ctdb, state);
668 state->c = talloc_steal(ctdb, c);
672 vnn->update_in_flight = true;
673 talloc_set_destructor(state, ctdb_updateip_destructor);
675 DEBUG(DEBUG_NOTICE,("Update of IP %s/%u from "
676 "interface %s to %s\n",
677 ctdb_addr_to_str(&vnn->public_address),
678 vnn->public_netmask_bits,
682 ret = ctdb_event_script_callback(ctdb,
684 ctdb_do_updateip_callback,
686 CTDB_EVENT_UPDATE_IP,
690 ctdb_addr_to_str(&vnn->public_address),
691 vnn->public_netmask_bits);
693 DEBUG(DEBUG_ERR,(__location__ " Failed update IP %s from interface %s to %s\n",
694 ctdb_addr_to_str(&vnn->public_address),
695 old->name, new_name));
704 Find the vnn of the node that has a public ip address
705 returns -1 if the address is not known as a public address
707 static struct ctdb_vnn *find_public_ip_vnn(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
709 struct ctdb_vnn *vnn;
711 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
712 if (ctdb_same_ip(&vnn->public_address, addr)) {
721 take over an ip address
723 int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb,
724 struct ctdb_req_control_old *c,
729 struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
730 struct ctdb_vnn *vnn;
731 bool have_ip = false;
732 bool do_updateip = false;
733 bool do_takeip = false;
734 struct ctdb_interface *best_iface = NULL;
736 if (pip->pnn != ctdb->pnn) {
737 DEBUG(DEBUG_ERR,(__location__" takeoverip called for an ip '%s' "
738 "with pnn %d, but we're node %d\n",
739 ctdb_addr_to_str(&pip->addr),
740 pip->pnn, ctdb->pnn));
744 /* update out vnn list */
745 vnn = find_public_ip_vnn(ctdb, &pip->addr);
747 DEBUG(DEBUG_INFO,("takeoverip called for an ip '%s' that is not a public address\n",
748 ctdb_addr_to_str(&pip->addr)));
752 if (ctdb->tunable.disable_ip_failover == 0 && ctdb->do_checkpublicip) {
753 have_ip = ctdb_sys_have_ip(&pip->addr);
755 best_iface = ctdb_vnn_best_iface(ctdb, vnn);
756 if (best_iface == NULL) {
757 DEBUG(DEBUG_ERR,("takeoverip of IP %s/%u failed to find"
758 "a usable interface (old %s, have_ip %d)\n",
759 ctdb_addr_to_str(&vnn->public_address),
760 vnn->public_netmask_bits,
761 ctdb_vnn_iface_string(vnn),
766 if (vnn->iface == NULL && vnn->pnn == -1 && have_ip && best_iface != NULL) {
767 DEBUG(DEBUG_ERR,("Taking over newly created ip\n"));
772 if (vnn->iface == NULL && have_ip) {
773 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
774 "but we have no interface assigned, has someone manually configured it? Ignore for now.\n",
775 ctdb_addr_to_str(&vnn->public_address)));
779 if (vnn->pnn != ctdb->pnn && have_ip && vnn->pnn != -1) {
780 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
781 "and we have it on iface[%s], but it was assigned to node %d"
782 "and we are node %d, banning ourself\n",
783 ctdb_addr_to_str(&vnn->public_address),
784 ctdb_vnn_iface_string(vnn), vnn->pnn, ctdb->pnn));
789 if (vnn->pnn == -1 && have_ip) {
790 vnn->pnn = ctdb->pnn;
791 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
792 "and we already have it on iface[%s], update local daemon\n",
793 ctdb_addr_to_str(&vnn->public_address),
794 ctdb_vnn_iface_string(vnn)));
799 if (vnn->iface != best_iface) {
800 if (!vnn->iface->link_up) {
802 } else if (vnn->iface->references > (best_iface->references + 1)) {
803 /* only move when the rebalance gains something */
811 ctdb_vnn_unassign_iface(ctdb, vnn);
818 ret = ctdb_do_takeip(ctdb, c, vnn);
822 } else if (do_updateip) {
823 ret = ctdb_do_updateip(ctdb, c, vnn);
829 * The interface is up and the kernel known the ip
832 DEBUG(DEBUG_INFO,("Redundant takeover of IP %s/%u on interface %s (ip already held)\n",
833 ctdb_addr_to_str(&pip->addr),
834 vnn->public_netmask_bits,
835 ctdb_vnn_iface_string(vnn)));
839 /* tell ctdb_control.c that we will be replying asynchronously */
845 static void do_delete_ip(struct ctdb_context *ctdb, struct ctdb_vnn *vnn)
847 DLIST_REMOVE(ctdb->vnn, vnn);
848 ctdb_vnn_unassign_iface(ctdb, vnn);
849 ctdb_remove_orphaned_ifaces(ctdb, vnn);
853 static struct ctdb_vnn *release_ip_post(struct ctdb_context *ctdb,
854 struct ctdb_vnn *vnn,
855 ctdb_sock_addr *addr)
859 /* Send a message to all clients of this node telling them
860 * that the cluster has been reconfigured and they should
861 * close any connections on this IP address
863 data.dptr = (uint8_t *)ctdb_addr_to_str(addr);
864 data.dsize = strlen((char *)data.dptr)+1;
865 DEBUG(DEBUG_INFO, ("Sending RELEASE_IP message for %s\n", data.dptr));
866 ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_RELEASE_IP, data);
868 ctdb_vnn_unassign_iface(ctdb, vnn);
870 /* Process the IP if it has been marked for deletion */
871 if (vnn->delete_pending) {
872 do_delete_ip(ctdb, vnn);
880 called when releaseip event finishes
882 static void release_ip_callback(struct ctdb_context *ctdb, int status,
885 struct takeover_callback_state *state =
886 talloc_get_type(private_data, struct takeover_callback_state);
888 if (status == -ETIME) {
892 if (ctdb->tunable.disable_ip_failover == 0 && ctdb->do_checkpublicip) {
893 if (ctdb_sys_have_ip(state->addr)) {
895 ("IP %s still hosted during release IP callback, failing\n",
896 ctdb_addr_to_str(state->addr)));
897 ctdb_request_control_reply(ctdb, state->c,
904 state->vnn = release_ip_post(ctdb, state->vnn, state->addr);
906 /* the control succeeded */
907 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
911 static int ctdb_releaseip_destructor(struct takeover_callback_state *state)
913 if (state->vnn != NULL) {
914 state->vnn->update_in_flight = false;
920 release an ip address
922 int32_t ctdb_control_release_ip(struct ctdb_context *ctdb,
923 struct ctdb_req_control_old *c,
928 struct takeover_callback_state *state;
929 struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
930 struct ctdb_vnn *vnn;
933 /* update our vnn list */
934 vnn = find_public_ip_vnn(ctdb, &pip->addr);
936 DEBUG(DEBUG_INFO,("releaseip called for an ip '%s' that is not a public address\n",
937 ctdb_addr_to_str(&pip->addr)));
942 /* stop any previous arps */
943 talloc_free(vnn->takeover_ctx);
944 vnn->takeover_ctx = NULL;
946 /* Some ctdb tool commands (e.g. moveip) send
947 * lazy multicast to drop an IP from any node that isn't the
948 * intended new node. The following causes makes ctdbd ignore
949 * a release for any address it doesn't host.
951 if (ctdb->tunable.disable_ip_failover == 0 && ctdb->do_checkpublicip) {
952 if (!ctdb_sys_have_ip(&pip->addr)) {
953 DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u on interface %s (ip not held)\n",
954 ctdb_addr_to_str(&pip->addr),
955 vnn->public_netmask_bits,
956 ctdb_vnn_iface_string(vnn)));
957 ctdb_vnn_unassign_iface(ctdb, vnn);
961 if (vnn->iface == NULL) {
962 DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u (ip not held)\n",
963 ctdb_addr_to_str(&pip->addr),
964 vnn->public_netmask_bits));
969 /* There is a potential race between take_ip and us because we
970 * update the VNN via a callback that run when the
971 * eventscripts have been run. Avoid the race by allowing one
972 * update to be in flight at a time.
974 if (vnn->update_in_flight) {
975 DEBUG(DEBUG_NOTICE,("Release of IP %s/%u rejected "
976 "update for this IP already in flight\n",
977 ctdb_addr_to_str(&vnn->public_address),
978 vnn->public_netmask_bits));
982 iface = strdup(ctdb_vnn_iface_string(vnn));
984 DEBUG(DEBUG_NOTICE,("Release of IP %s/%u on interface %s node:%d\n",
985 ctdb_addr_to_str(&pip->addr),
986 vnn->public_netmask_bits,
990 state = talloc(ctdb, struct takeover_callback_state);
992 ctdb_set_error(ctdb, "Out of memory at %s:%d",
998 state->c = talloc_steal(state, c);
999 state->addr = talloc(state, ctdb_sock_addr);
1000 if (state->addr == NULL) {
1001 ctdb_set_error(ctdb, "Out of memory at %s:%d",
1002 __FILE__, __LINE__);
1007 *state->addr = pip->addr;
1010 vnn->update_in_flight = true;
1011 talloc_set_destructor(state, ctdb_releaseip_destructor);
1013 ret = ctdb_event_script_callback(ctdb,
1014 state, release_ip_callback, state,
1015 CTDB_EVENT_RELEASE_IP,
1018 ctdb_addr_to_str(&pip->addr),
1019 vnn->public_netmask_bits);
1022 DEBUG(DEBUG_ERR,(__location__ " Failed to release IP %s on interface %s\n",
1023 ctdb_addr_to_str(&pip->addr),
1024 ctdb_vnn_iface_string(vnn)));
1029 /* tell the control that we will be reply asynchronously */
1030 *async_reply = true;
1034 static int ctdb_add_public_address(struct ctdb_context *ctdb,
1035 ctdb_sock_addr *addr,
1036 unsigned mask, const char *ifaces,
1039 struct ctdb_vnn *vnn;
1046 tmp = strdup(ifaces);
1047 for (iface = strtok(tmp, ","); iface; iface = strtok(NULL, ",")) {
1048 if (!ctdb_sys_check_iface_exists(iface)) {
1049 DEBUG(DEBUG_CRIT,("Interface %s does not exist. Can not add public-address : %s\n", iface, ctdb_addr_to_str(addr)));
1056 /* Verify that we don't have an entry for this ip yet */
1057 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1058 if (ctdb_same_sockaddr(addr, &vnn->public_address)) {
1059 DEBUG(DEBUG_CRIT,("Same ip '%s' specified multiple times in the public address list \n",
1060 ctdb_addr_to_str(addr)));
1065 /* create a new vnn structure for this ip address */
1066 vnn = talloc_zero(ctdb, struct ctdb_vnn);
1067 CTDB_NO_MEMORY_FATAL(ctdb, vnn);
1068 vnn->ifaces = talloc_array(vnn, const char *, num + 2);
1069 tmp = talloc_strdup(vnn, ifaces);
1070 CTDB_NO_MEMORY_FATAL(ctdb, tmp);
1071 for (iface = strtok(tmp, ","); iface; iface = strtok(NULL, ",")) {
1072 vnn->ifaces = talloc_realloc(vnn, vnn->ifaces, const char *, num + 2);
1073 CTDB_NO_MEMORY_FATAL(ctdb, vnn->ifaces);
1074 vnn->ifaces[num] = talloc_strdup(vnn, iface);
1075 CTDB_NO_MEMORY_FATAL(ctdb, vnn->ifaces[num]);
1079 vnn->ifaces[num] = NULL;
1080 vnn->public_address = *addr;
1081 vnn->public_netmask_bits = mask;
1083 if (check_address) {
1084 if (ctdb_sys_have_ip(addr)) {
1085 DEBUG(DEBUG_ERR,("We are already hosting public address '%s'. setting PNN to ourself:%d\n", ctdb_addr_to_str(addr), ctdb->pnn));
1086 vnn->pnn = ctdb->pnn;
1090 for (i=0; vnn->ifaces[i]; i++) {
1091 ret = ctdb_add_local_iface(ctdb, vnn->ifaces[i]);
1093 DEBUG(DEBUG_CRIT, (__location__ " failed to add iface[%s] "
1094 "for public_address[%s]\n",
1095 vnn->ifaces[i], ctdb_addr_to_str(addr)));
1101 DLIST_ADD(ctdb->vnn, vnn);
1107 setup the public address lists from a file
1109 int ctdb_set_public_addresses(struct ctdb_context *ctdb, bool check_addresses)
1115 lines = file_lines_load(ctdb->public_addresses_file, &nlines, 0, ctdb);
1116 if (lines == NULL) {
1117 ctdb_set_error(ctdb, "Failed to load public address list '%s'\n", ctdb->public_addresses_file);
1120 while (nlines > 0 && strcmp(lines[nlines-1], "") == 0) {
1124 for (i=0;i<nlines;i++) {
1126 ctdb_sock_addr addr;
1127 const char *addrstr;
1132 while ((*line == ' ') || (*line == '\t')) {
1138 if (strcmp(line, "") == 0) {
1141 tok = strtok(line, " \t");
1143 tok = strtok(NULL, " \t");
1145 if (NULL == ctdb->default_public_interface) {
1146 DEBUG(DEBUG_CRIT,("No default public interface and no interface specified at line %u of public address list\n",
1151 ifaces = ctdb->default_public_interface;
1156 if (!addrstr || !parse_ip_mask(addrstr, ifaces, &addr, &mask)) {
1157 DEBUG(DEBUG_CRIT,("Badly formed line %u in public address list\n", i+1));
1161 if (ctdb_add_public_address(ctdb, &addr, mask, ifaces, check_addresses)) {
1162 DEBUG(DEBUG_CRIT,("Failed to add line %u to the public address list\n", i+1));
1173 static struct ctdb_public_ip_list *
1174 ctdb_fetch_remote_public_ips(struct ctdb_context *ctdb,
1175 TALLOC_CTX *mem_ctx,
1176 struct ctdb_node_map_old *nodemap,
1177 uint32_t public_ip_flags)
1180 struct ctdb_public_ip_list_old *ip_list;
1181 struct ctdb_public_ip_list *public_ips;
1183 public_ips = talloc_zero_array(mem_ctx,
1184 struct ctdb_public_ip_list,
1186 if (public_ips == NULL) {
1187 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
1191 for (j = 0; j < nodemap->num; j++) {
1192 if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) {
1196 /* Retrieve the list of public IPs from the
1197 * node. Flags says whether it is known or
1199 ret = ctdb_ctrl_get_public_ips_flags(
1200 ctdb, TAKEOVER_TIMEOUT(), j, public_ips,
1201 public_ip_flags, &ip_list);
1204 ("Failed to read public IPs from node: %u\n", j));
1205 talloc_free(public_ips);
1208 public_ips[j].num = ip_list->num;
1209 if (ip_list->num == 0) {
1210 talloc_free(ip_list);
1213 public_ips[j].ip = talloc_zero_array(public_ips,
1214 struct ctdb_public_ip,
1216 if (public_ips[j].ip == NULL) {
1217 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
1218 talloc_free(public_ips);
1221 memcpy(public_ips[j].ip, &ip_list->ips[0],
1222 sizeof(struct ctdb_public_ip) * ip_list->num);
1223 talloc_free(ip_list);
1229 struct get_tunable_callback_data {
1230 const char *tunable;
1235 static void get_tunable_callback(struct ctdb_context *ctdb, uint32_t pnn,
1236 int32_t res, TDB_DATA outdata,
1239 struct get_tunable_callback_data *cd =
1240 (struct get_tunable_callback_data *)callback;
1244 /* Already handled in fail callback */
1248 if (outdata.dsize != sizeof(uint32_t)) {
1249 DEBUG(DEBUG_ERR,("Wrong size of returned data when reading \"%s\" tunable from node %d. Expected %d bytes but received %d bytes\n",
1250 cd->tunable, pnn, (int)sizeof(uint32_t),
1251 (int)outdata.dsize));
1256 size = talloc_array_length(cd->out);
1258 DEBUG(DEBUG_ERR,("Got %s reply from node %d but nodemap only has %d entries\n",
1259 cd->tunable, pnn, size));
1264 cd->out[pnn] = *(uint32_t *)outdata.dptr;
1267 static void get_tunable_fail_callback(struct ctdb_context *ctdb, uint32_t pnn,
1268 int32_t res, TDB_DATA outdata,
1271 struct get_tunable_callback_data *cd =
1272 (struct get_tunable_callback_data *)callback;
1277 ("Timed out getting tunable \"%s\" from node %d\n",
1283 DEBUG(DEBUG_WARNING,
1284 ("Tunable \"%s\" not implemented on node %d\n",
1289 ("Unexpected error getting tunable \"%s\" from node %d\n",
1295 static uint32_t *get_tunable_from_nodes(struct ctdb_context *ctdb,
1296 TALLOC_CTX *tmp_ctx,
1297 struct ctdb_node_map_old *nodemap,
1298 const char *tunable,
1299 uint32_t default_value)
1302 struct ctdb_control_get_tunable *t;
1305 struct get_tunable_callback_data callback_data;
1308 tvals = talloc_array(tmp_ctx, uint32_t, nodemap->num);
1309 CTDB_NO_MEMORY_NULL(ctdb, tvals);
1310 for (i=0; i<nodemap->num; i++) {
1311 tvals[i] = default_value;
1314 callback_data.out = tvals;
1315 callback_data.tunable = tunable;
1316 callback_data.fatal = false;
1318 data.dsize = offsetof(struct ctdb_control_get_tunable, name) + strlen(tunable) + 1;
1319 data.dptr = talloc_size(tmp_ctx, data.dsize);
1320 t = (struct ctdb_control_get_tunable *)data.dptr;
1321 t->length = strlen(tunable)+1;
1322 memcpy(t->name, tunable, t->length);
1323 nodes = list_of_connected_nodes(ctdb, nodemap, tmp_ctx, true);
1324 if (ctdb_client_async_control(ctdb, CTDB_CONTROL_GET_TUNABLE,
1325 nodes, 0, TAKEOVER_TIMEOUT(),
1327 get_tunable_callback,
1328 get_tunable_fail_callback,
1329 &callback_data) != 0) {
1330 if (callback_data.fatal) {
1336 talloc_free(data.dptr);
1341 static struct ctdb_node_map *
1342 ctdb_node_map_old_to_new(TALLOC_CTX *mem_ctx,
1343 const struct ctdb_node_map_old *old)
1345 struct ctdb_node_map *new;
1347 new = talloc(mem_ctx, struct ctdb_node_map);
1349 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
1352 new->num = old->num;
1353 new->node = talloc_zero_array(new,
1354 struct ctdb_node_and_flags, new->num);
1355 memcpy(new->node, &old->nodes[0],
1356 sizeof(struct ctdb_node_and_flags) * new->num);
1362 static bool set_ipflags(struct ctdb_context *ctdb,
1363 struct ipalloc_state *ipalloc_state,
1364 struct ctdb_node_map_old *nodemap)
1366 uint32_t *tval_noiptakeover;
1367 uint32_t *tval_noiphostonalldisabled;
1368 struct ctdb_node_map *new;
1370 tval_noiptakeover = get_tunable_from_nodes(ctdb, ipalloc_state, nodemap,
1372 if (tval_noiptakeover == NULL) {
1376 tval_noiphostonalldisabled =
1377 get_tunable_from_nodes(ctdb, ipalloc_state, nodemap,
1378 "NoIPHostOnAllDisabled", 0);
1379 if (tval_noiphostonalldisabled == NULL) {
1380 /* Caller frees tmp_ctx */
1384 new = ctdb_node_map_old_to_new(ipalloc_state, nodemap);
1389 ipalloc_set_node_flags(ipalloc_state, new,
1391 tval_noiphostonalldisabled);
1393 talloc_free(tval_noiptakeover);
1394 talloc_free(tval_noiphostonalldisabled);
1400 static enum ipalloc_algorithm
1401 determine_algorithm(const struct ctdb_tunable_list *tunables)
1403 if (1 == tunables->lcp2_public_ip_assignment) {
1404 return IPALLOC_LCP2;
1405 } else if (1 == tunables->deterministic_public_ips) {
1406 return IPALLOC_DETERMINISTIC;
1408 return IPALLOC_NONDETERMINISTIC;
1412 struct takeover_callback_data {
1414 unsigned int *fail_count;
1417 static struct takeover_callback_data *
1418 takeover_callback_data_init(TALLOC_CTX *mem_ctx,
1421 static struct takeover_callback_data *takeover_data;
1423 takeover_data = talloc_zero(mem_ctx, struct takeover_callback_data);
1424 if (takeover_data == NULL) {
1425 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
1429 takeover_data->fail_count = talloc_zero_array(takeover_data,
1430 unsigned int, num_nodes);
1431 if (takeover_data->fail_count == NULL) {
1432 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
1433 talloc_free(takeover_data);
1437 takeover_data->num_nodes = num_nodes;
1439 return takeover_data;
1442 static void takeover_run_fail_callback(struct ctdb_context *ctdb,
1443 uint32_t node_pnn, int32_t res,
1444 TDB_DATA outdata, void *callback_data)
1446 struct takeover_callback_data *cd =
1447 talloc_get_type_abort(callback_data,
1448 struct takeover_callback_data);
1450 if (node_pnn >= cd->num_nodes) {
1451 DEBUG(DEBUG_ERR, (__location__ " invalid PNN %u\n", node_pnn));
1455 if (cd->fail_count[node_pnn] == 0) {
1457 ("Node %u failed the takeover run\n", node_pnn));
1460 cd->fail_count[node_pnn]++;
1463 static void takeover_run_process_failures(struct ctdb_context *ctdb,
1464 struct takeover_callback_data *tcd)
1466 unsigned int max_fails = 0;
1467 uint32_t max_pnn = -1;
1470 for (i = 0; i < tcd->num_nodes; i++) {
1471 if (tcd->fail_count[i] > max_fails) {
1473 max_fails = tcd->fail_count[i];
1477 if (max_fails > 0) {
1482 ("Sending banning credits to %u with fail count %u\n",
1483 max_pnn, max_fails));
1485 data.dptr = (uint8_t *)&max_pnn;
1486 data.dsize = sizeof(uint32_t);
1487 ret = ctdb_client_send_message(ctdb,
1488 CTDB_BROADCAST_CONNECTED,
1493 ("Failed to set banning credits for node %u\n",
1500 * Recalculate the allocation of public IPs to nodes and have the
1501 * nodes host their allocated addresses.
1503 * - Initialise IP allocation state. Pass:
1504 + algorithm to be used;
1505 + whether IP rebalancing ("failback") should be done (this uses a
1506 cluster-wide configuration variable and only the value form the
1507 master node is used); and
1508 * + list of nodes to force rebalance (internal structure, currently
1509 * no way to fetch, only used by LCP2 for nodes that have had new
1510 * IP addresses added).
1511 * - Set IP flags for IP allocation based on node map and tunables
1512 * NoIPTakeover/NoIPHostOnAllDisabled from all connected nodes
1513 * (tunable fetching done separately so values can be faked in unit
1515 * - Retrieve known and available IP addresses (done separately so
1516 * values can be faked in unit testing)
1517 * - Use ipalloc_set_public_ips() to set known and available IP
1518 addresses for allocation
1519 * - If cluster can't host IP addresses then early exit
1520 * - Run IP allocation algorithm
1521 * - Send RELEASE_IP to all nodes for IPs they should not host
1522 * - Send TAKE_IP to all nodes for IPs they should host
1523 * - Send IPREALLOCATED to all nodes (with backward compatibility hack)
1525 int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map_old *nodemap,
1526 uint32_t *force_rebalance_nodes)
1529 struct ctdb_public_ip ip;
1531 struct public_ip_list *all_ips, *tmp_ip;
1533 struct timeval timeout;
1534 struct client_async_data *async_data;
1535 struct ctdb_client_control_state *state;
1536 TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
1537 struct ipalloc_state *ipalloc_state;
1538 struct ctdb_public_ip_list *known_ips, *available_ips;
1539 struct takeover_callback_data *takeover_data;
1541 /* Initialise fail callback data to be used with
1542 * takeover_run_fail_callback(). A failure in any of the
1543 * following steps will cause an early return, so this can be
1544 * reused for each of those steps without re-initialising. */
1545 takeover_data = takeover_callback_data_init(tmp_ctx,
1547 if (takeover_data == NULL) {
1548 talloc_free(tmp_ctx);
1552 /* Each of the later stages (RELEASE_IP, TAKEOVER_IP,
1553 * IPREALLOCATED) notionally has a timeout of TakeoverTimeout
1554 * seconds. However, RELEASE_IP can take longer due to TCP
1555 * connection killing, so sometimes needs more time.
1556 * Therefore, use a cumulative timeout of TakeoverTimeout * 3
1557 * seconds across all 3 stages. No explicit expiry checks are
1558 * needed before each stage because tevent is smart enough to
1559 * fire the timeouts even if they are in the past. Initialise
1560 * this here to cope with early jumps to IPREALLOCATED. */
1561 timeout = timeval_current_ofs(3 * ctdb->tunable.takeover_timeout,0);
1564 * ip failover is completely disabled, just send out the
1565 * ipreallocated event.
1567 if (ctdb->tunable.disable_ip_failover != 0) {
1571 ipalloc_state = ipalloc_state_init(tmp_ctx, ctdb->num_nodes,
1572 determine_algorithm(&ctdb->tunable),
1573 (ctdb->tunable.no_ip_failback != 0),
1574 force_rebalance_nodes);
1575 if (ipalloc_state == NULL) {
1576 talloc_free(tmp_ctx);
1580 if (!set_ipflags(ctdb, ipalloc_state, nodemap)) {
1582 ("Failed to set IP flags - aborting takeover run\n"));
1583 talloc_free(tmp_ctx);
1587 /* Fetch known/available public IPs from each active node */
1588 /* Fetch lists of known public IPs from all nodes */
1589 known_ips = ctdb_fetch_remote_public_ips(ctdb, ipalloc_state,
1591 if (known_ips == NULL) {
1592 DEBUG(DEBUG_ERR, ("Failed to read known public IPs\n"));
1593 talloc_free(tmp_ctx);
1596 available_ips = ctdb_fetch_remote_public_ips(
1597 ctdb, ipalloc_state, nodemap,
1598 CTDB_PUBLIC_IP_FLAGS_ONLY_AVAILABLE);
1599 if (available_ips == NULL) {
1600 DEBUG(DEBUG_ERR, ("Failed to read available public IPs\n"));
1601 talloc_free(tmp_ctx);
1605 if (! ipalloc_set_public_ips(ipalloc_state, known_ips, available_ips)) {
1606 DEBUG(DEBUG_ERR, ("Failed to set public IPs\n"));
1607 talloc_free(tmp_ctx);
1611 if (! ipalloc_can_host_ips(ipalloc_state)) {
1612 DEBUG(DEBUG_WARNING,("No nodes available to host public IPs yet\n"));
1616 /* Do the IP reassignment calculations */
1617 all_ips = ipalloc(ipalloc_state);
1618 if (all_ips == NULL) {
1619 talloc_free(tmp_ctx);
1623 /* Now tell all nodes to release any public IPs should not
1624 * host. This will be a NOOP on nodes that don't currently
1625 * hold the given IP.
1627 async_data = talloc_zero(tmp_ctx, struct client_async_data);
1628 CTDB_NO_MEMORY_FATAL(ctdb, async_data);
1630 async_data->fail_callback = takeover_run_fail_callback;
1631 async_data->callback_data = takeover_data;
1633 ZERO_STRUCT(ip); /* Avoid valgrind warnings for union */
1635 /* Send a RELEASE_IP to all nodes that should not be hosting
1636 * each IP. For each IP, all but one of these will be
1637 * redundant. However, the redundant ones are used to tell
1638 * nodes which node should be hosting the IP so that commands
1639 * like "ctdb ip" can display a particular nodes idea of who
1640 * is hosting what. */
1641 for (i=0;i<nodemap->num;i++) {
1642 /* don't talk to unconnected nodes, but do talk to banned nodes */
1643 if (nodemap->nodes[i].flags & NODE_FLAGS_DISCONNECTED) {
1647 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1648 if (tmp_ip->pnn == nodemap->nodes[i].pnn) {
1649 /* This node should be serving this
1650 vnn so don't tell it to release the ip
1654 ip.pnn = tmp_ip->pnn;
1655 ip.addr = tmp_ip->addr;
1657 data.dsize = sizeof(ip);
1658 data.dptr = (uint8_t *)&ip;
1659 state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
1660 0, CTDB_CONTROL_RELEASE_IP, 0,
1663 if (state == NULL) {
1664 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_RELEASE_IP to node %u\n", nodemap->nodes[i].pnn));
1665 talloc_free(tmp_ctx);
1669 ctdb_client_async_add(async_data, state);
1672 if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1674 ("Async control CTDB_CONTROL_RELEASE_IP failed\n"));
1677 talloc_free(async_data);
1680 /* For each IP, send a TAKOVER_IP to the node that should be
1681 * hosting it. Many of these will often be redundant (since
1682 * the allocation won't have changed) but they can be useful
1683 * to recover from inconsistencies. */
1684 async_data = talloc_zero(tmp_ctx, struct client_async_data);
1685 CTDB_NO_MEMORY_FATAL(ctdb, async_data);
1687 async_data->fail_callback = takeover_run_fail_callback;
1688 async_data->callback_data = takeover_data;
1690 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1691 if (tmp_ip->pnn == -1) {
1692 /* this IP won't be taken over */
1696 ip.pnn = tmp_ip->pnn;
1697 ip.addr = tmp_ip->addr;
1699 data.dsize = sizeof(ip);
1700 data.dptr = (uint8_t *)&ip;
1701 state = ctdb_control_send(ctdb, tmp_ip->pnn,
1702 0, CTDB_CONTROL_TAKEOVER_IP, 0,
1703 data, async_data, &timeout, NULL);
1704 if (state == NULL) {
1705 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_TAKEOVER_IP to node %u\n", tmp_ip->pnn));
1706 talloc_free(tmp_ctx);
1710 ctdb_client_async_add(async_data, state);
1712 if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1714 ("Async control CTDB_CONTROL_TAKEOVER_IP failed\n"));
1720 * Tell all nodes to run eventscripts to process the
1721 * "ipreallocated" event. This can do a lot of things,
1722 * including restarting services to reconfigure them if public
1723 * IPs have moved. Once upon a time this event only used to
1726 nodes = list_of_connected_nodes(ctdb, nodemap, tmp_ctx, true);
1727 ret = ctdb_client_async_control(ctdb, CTDB_CONTROL_IPREALLOCATED,
1730 NULL, takeover_run_fail_callback,
1734 ("Async CTDB_CONTROL_IPREALLOCATED control failed\n"));
1738 talloc_free(tmp_ctx);
1742 takeover_run_process_failures(ctdb, takeover_data);
1743 talloc_free(tmp_ctx);
1749 destroy a ctdb_client_ip structure
1751 static int ctdb_client_ip_destructor(struct ctdb_client_ip *ip)
1753 DEBUG(DEBUG_DEBUG,("destroying client tcp for %s:%u (client_id %u)\n",
1754 ctdb_addr_to_str(&ip->addr),
1755 ntohs(ip->addr.ip.sin_port),
1758 DLIST_REMOVE(ip->ctdb->client_ip_list, ip);
1763 called by a client to inform us of a TCP connection that it is managing
1764 that should tickled with an ACK when IP takeover is done
1766 int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id,
1769 struct ctdb_client *client = reqid_find(ctdb->idr, client_id, struct ctdb_client);
1770 struct ctdb_connection *tcp_sock = NULL;
1771 struct ctdb_tcp_list *tcp;
1772 struct ctdb_connection t;
1775 struct ctdb_client_ip *ip;
1776 struct ctdb_vnn *vnn;
1777 ctdb_sock_addr addr;
1779 /* If we don't have public IPs, tickles are useless */
1780 if (ctdb->vnn == NULL) {
1784 tcp_sock = (struct ctdb_connection *)indata.dptr;
1786 addr = tcp_sock->src;
1787 ctdb_canonicalize_ip(&addr, &tcp_sock->src);
1788 addr = tcp_sock->dst;
1789 ctdb_canonicalize_ip(&addr, &tcp_sock->dst);
1792 memcpy(&addr, &tcp_sock->dst, sizeof(addr));
1793 vnn = find_public_ip_vnn(ctdb, &addr);
1795 switch (addr.sa.sa_family) {
1797 if (ntohl(addr.ip.sin_addr.s_addr) != INADDR_LOOPBACK) {
1798 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public address.\n",
1799 ctdb_addr_to_str(&addr)));
1803 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public ipv6 address.\n",
1804 ctdb_addr_to_str(&addr)));
1807 DEBUG(DEBUG_ERR,(__location__ " Unknown family type %d\n", addr.sa.sa_family));
1813 if (vnn->pnn != ctdb->pnn) {
1814 DEBUG(DEBUG_ERR,("Attempt to register tcp client for IP %s we don't hold - failing (client_id %u pid %u)\n",
1815 ctdb_addr_to_str(&addr),
1816 client_id, client->pid));
1817 /* failing this call will tell smbd to die */
1821 ip = talloc(client, struct ctdb_client_ip);
1822 CTDB_NO_MEMORY(ctdb, ip);
1826 ip->client_id = client_id;
1827 talloc_set_destructor(ip, ctdb_client_ip_destructor);
1828 DLIST_ADD(ctdb->client_ip_list, ip);
1830 tcp = talloc(client, struct ctdb_tcp_list);
1831 CTDB_NO_MEMORY(ctdb, tcp);
1833 tcp->connection.src = tcp_sock->src;
1834 tcp->connection.dst = tcp_sock->dst;
1836 DLIST_ADD(client->tcp_list, tcp);
1838 t.src = tcp_sock->src;
1839 t.dst = tcp_sock->dst;
1841 data.dptr = (uint8_t *)&t;
1842 data.dsize = sizeof(t);
1844 switch (addr.sa.sa_family) {
1846 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1847 (unsigned)ntohs(tcp_sock->dst.ip.sin_port),
1848 ctdb_addr_to_str(&tcp_sock->src),
1849 (unsigned)ntohs(tcp_sock->src.ip.sin_port), client_id, client->pid));
1852 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1853 (unsigned)ntohs(tcp_sock->dst.ip6.sin6_port),
1854 ctdb_addr_to_str(&tcp_sock->src),
1855 (unsigned)ntohs(tcp_sock->src.ip6.sin6_port), client_id, client->pid));
1858 DEBUG(DEBUG_ERR,(__location__ " Unknown family %d\n", addr.sa.sa_family));
1862 /* tell all nodes about this tcp connection */
1863 ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0,
1864 CTDB_CONTROL_TCP_ADD,
1865 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
1867 DEBUG(DEBUG_ERR,(__location__ " Failed to send CTDB_CONTROL_TCP_ADD\n"));
1875 find a tcp address on a list
1877 static struct ctdb_connection *ctdb_tcp_find(struct ctdb_tcp_array *array,
1878 struct ctdb_connection *tcp)
1882 if (array == NULL) {
1886 for (i=0;i<array->num;i++) {
1887 if (ctdb_same_sockaddr(&array->connections[i].src, &tcp->src) &&
1888 ctdb_same_sockaddr(&array->connections[i].dst, &tcp->dst)) {
1889 return &array->connections[i];
1898 called by a daemon to inform us of a TCP connection that one of its
1899 clients managing that should tickled with an ACK when IP takeover is
1902 int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata, bool tcp_update_needed)
1904 struct ctdb_connection *p = (struct ctdb_connection *)indata.dptr;
1905 struct ctdb_tcp_array *tcparray;
1906 struct ctdb_connection tcp;
1907 struct ctdb_vnn *vnn;
1909 /* If we don't have public IPs, tickles are useless */
1910 if (ctdb->vnn == NULL) {
1914 vnn = find_public_ip_vnn(ctdb, &p->dst);
1916 DEBUG(DEBUG_INFO,(__location__ " got TCP_ADD control for an address which is not a public address '%s'\n",
1917 ctdb_addr_to_str(&p->dst)));
1923 tcparray = vnn->tcp_array;
1925 /* If this is the first tickle */
1926 if (tcparray == NULL) {
1927 tcparray = talloc(vnn, struct ctdb_tcp_array);
1928 CTDB_NO_MEMORY(ctdb, tcparray);
1929 vnn->tcp_array = tcparray;
1932 tcparray->connections = talloc_size(tcparray, sizeof(struct ctdb_connection));
1933 CTDB_NO_MEMORY(ctdb, tcparray->connections);
1935 tcparray->connections[tcparray->num].src = p->src;
1936 tcparray->connections[tcparray->num].dst = p->dst;
1939 if (tcp_update_needed) {
1940 vnn->tcp_update_needed = true;
1946 /* Do we already have this tickle ?*/
1949 if (ctdb_tcp_find(tcparray, &tcp) != NULL) {
1950 DEBUG(DEBUG_DEBUG,("Already had tickle info for %s:%u for vnn:%u\n",
1951 ctdb_addr_to_str(&tcp.dst),
1952 ntohs(tcp.dst.ip.sin_port),
1957 /* A new tickle, we must add it to the array */
1958 tcparray->connections = talloc_realloc(tcparray, tcparray->connections,
1959 struct ctdb_connection,
1961 CTDB_NO_MEMORY(ctdb, tcparray->connections);
1963 tcparray->connections[tcparray->num].src = p->src;
1964 tcparray->connections[tcparray->num].dst = p->dst;
1967 DEBUG(DEBUG_INFO,("Added tickle info for %s:%u from vnn %u\n",
1968 ctdb_addr_to_str(&tcp.dst),
1969 ntohs(tcp.dst.ip.sin_port),
1972 if (tcp_update_needed) {
1973 vnn->tcp_update_needed = true;
1980 static void ctdb_remove_connection(struct ctdb_vnn *vnn, struct ctdb_connection *conn)
1982 struct ctdb_connection *tcpp;
1988 /* if the array is empty we cant remove it
1989 and we don't need to do anything
1991 if (vnn->tcp_array == NULL) {
1992 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist (array is empty) %s:%u\n",
1993 ctdb_addr_to_str(&conn->dst),
1994 ntohs(conn->dst.ip.sin_port)));
1999 /* See if we know this connection
2000 if we don't know this connection then we dont need to do anything
2002 tcpp = ctdb_tcp_find(vnn->tcp_array, conn);
2004 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist %s:%u\n",
2005 ctdb_addr_to_str(&conn->dst),
2006 ntohs(conn->dst.ip.sin_port)));
2011 /* We need to remove this entry from the array.
2012 Instead of allocating a new array and copying data to it
2013 we cheat and just copy the last entry in the existing array
2014 to the entry that is to be removed and just shring the
2017 *tcpp = vnn->tcp_array->connections[vnn->tcp_array->num - 1];
2018 vnn->tcp_array->num--;
2020 /* If we deleted the last entry we also need to remove the entire array
2022 if (vnn->tcp_array->num == 0) {
2023 talloc_free(vnn->tcp_array);
2024 vnn->tcp_array = NULL;
2027 vnn->tcp_update_needed = true;
2029 DEBUG(DEBUG_INFO,("Removed tickle info for %s:%u\n",
2030 ctdb_addr_to_str(&conn->src),
2031 ntohs(conn->src.ip.sin_port)));
2036 called by a daemon to inform us of a TCP connection that one of its
2037 clients used are no longer needed in the tickle database
2039 int32_t ctdb_control_tcp_remove(struct ctdb_context *ctdb, TDB_DATA indata)
2041 struct ctdb_vnn *vnn;
2042 struct ctdb_connection *conn = (struct ctdb_connection *)indata.dptr;
2044 /* If we don't have public IPs, tickles are useless */
2045 if (ctdb->vnn == NULL) {
2049 vnn = find_public_ip_vnn(ctdb, &conn->dst);
2052 (__location__ " unable to find public address %s\n",
2053 ctdb_addr_to_str(&conn->dst)));
2057 ctdb_remove_connection(vnn, conn);
2064 Called when another daemon starts - causes all tickles for all
2065 public addresses we are serving to be sent to the new node on the
2066 next check. This actually causes the next scheduled call to
2067 tdb_update_tcp_tickles() to update all nodes. This is simple and
2068 doesn't require careful error handling.
2070 int32_t ctdb_control_startup(struct ctdb_context *ctdb, uint32_t pnn)
2072 struct ctdb_vnn *vnn;
2074 DEBUG(DEBUG_INFO, ("Received startup control from node %lu\n",
2075 (unsigned long) pnn));
2077 for (vnn = ctdb->vnn; vnn != NULL; vnn = vnn->next) {
2078 vnn->tcp_update_needed = true;
2086 called when a client structure goes away - hook to remove
2087 elements from the tcp_list in all daemons
2089 void ctdb_takeover_client_destructor_hook(struct ctdb_client *client)
2091 while (client->tcp_list) {
2092 struct ctdb_vnn *vnn;
2093 struct ctdb_tcp_list *tcp = client->tcp_list;
2094 struct ctdb_connection *conn = &tcp->connection;
2096 DLIST_REMOVE(client->tcp_list, tcp);
2098 vnn = find_public_ip_vnn(client->ctdb,
2102 (__location__ " unable to find public address %s\n",
2103 ctdb_addr_to_str(&conn->dst)));
2107 /* If the IP address is hosted on this node then
2108 * remove the connection. */
2109 if (vnn->pnn == client->ctdb->pnn) {
2110 ctdb_remove_connection(vnn, conn);
2113 /* Otherwise this function has been called because the
2114 * server IP address has been released to another node
2115 * and the client has exited. This means that we
2116 * should not delete the connection information. The
2117 * takeover node processes connections too. */
2122 void ctdb_release_all_ips(struct ctdb_context *ctdb)
2124 struct ctdb_vnn *vnn, *next;
2127 if (ctdb->tunable.disable_ip_failover == 1) {
2131 for (vnn = ctdb->vnn; vnn != NULL; vnn = next) {
2132 /* vnn can be freed below in release_ip_post() */
2135 if (!ctdb_sys_have_ip(&vnn->public_address)) {
2136 ctdb_vnn_unassign_iface(ctdb, vnn);
2140 /* Don't allow multiple releases at once. Some code,
2141 * particularly ctdb_tickle_sentenced_connections() is
2143 if (vnn->update_in_flight) {
2144 DEBUG(DEBUG_WARNING,
2146 " Not releasing IP %s/%u on interface %s, an update is already in progess\n",
2147 ctdb_addr_to_str(&vnn->public_address),
2148 vnn->public_netmask_bits,
2149 ctdb_vnn_iface_string(vnn)));
2152 vnn->update_in_flight = true;
2154 DEBUG(DEBUG_INFO,("Release of IP %s/%u on interface %s node:-1\n",
2155 ctdb_addr_to_str(&vnn->public_address),
2156 vnn->public_netmask_bits,
2157 ctdb_vnn_iface_string(vnn)));
2159 ctdb_event_script_args(ctdb, CTDB_EVENT_RELEASE_IP, "%s %s %u",
2160 ctdb_vnn_iface_string(vnn),
2161 ctdb_addr_to_str(&vnn->public_address),
2162 vnn->public_netmask_bits);
2163 /* releaseip timeouts are converted to success, so to
2164 * detect failures just check if the IP address is
2167 if (ctdb_sys_have_ip(&vnn->public_address)) {
2170 " IP address %s not released\n",
2171 ctdb_addr_to_str(&vnn->public_address)));
2172 vnn->update_in_flight = false;
2176 vnn = release_ip_post(ctdb, vnn, &vnn->public_address);
2178 vnn->update_in_flight = false;
2183 DEBUG(DEBUG_NOTICE,(__location__ " Released %d public IPs\n", count));
2188 get list of public IPs
2190 int32_t ctdb_control_get_public_ips(struct ctdb_context *ctdb,
2191 struct ctdb_req_control_old *c, TDB_DATA *outdata)
2194 struct ctdb_public_ip_list_old *ips;
2195 struct ctdb_vnn *vnn;
2196 bool only_available = false;
2198 if (c->flags & CTDB_PUBLIC_IP_FLAGS_ONLY_AVAILABLE) {
2199 only_available = true;
2202 /* count how many public ip structures we have */
2204 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2208 len = offsetof(struct ctdb_public_ip_list_old, ips) +
2209 num*sizeof(struct ctdb_public_ip);
2210 ips = talloc_zero_size(outdata, len);
2211 CTDB_NO_MEMORY(ctdb, ips);
2214 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2215 if (only_available && !ctdb_vnn_available(ctdb, vnn)) {
2218 ips->ips[i].pnn = vnn->pnn;
2219 ips->ips[i].addr = vnn->public_address;
2223 len = offsetof(struct ctdb_public_ip_list_old, ips) +
2224 i*sizeof(struct ctdb_public_ip);
2226 outdata->dsize = len;
2227 outdata->dptr = (uint8_t *)ips;
2233 int32_t ctdb_control_get_public_ip_info(struct ctdb_context *ctdb,
2234 struct ctdb_req_control_old *c,
2239 ctdb_sock_addr *addr;
2240 struct ctdb_public_ip_info_old *info;
2241 struct ctdb_vnn *vnn;
2243 addr = (ctdb_sock_addr *)indata.dptr;
2245 vnn = find_public_ip_vnn(ctdb, addr);
2247 DEBUG(DEBUG_ERR,(__location__ " Could not get public ip info, "
2248 "'%s'not a public address\n",
2249 ctdb_addr_to_str(addr)));
2253 /* count how many public ip structures we have */
2255 for (;vnn->ifaces[num];) {
2259 len = offsetof(struct ctdb_public_ip_info_old, ifaces) +
2260 num*sizeof(struct ctdb_iface);
2261 info = talloc_zero_size(outdata, len);
2262 CTDB_NO_MEMORY(ctdb, info);
2264 info->ip.addr = vnn->public_address;
2265 info->ip.pnn = vnn->pnn;
2266 info->active_idx = 0xFFFFFFFF;
2268 for (i=0; vnn->ifaces[i]; i++) {
2269 struct ctdb_interface *cur;
2271 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
2273 DEBUG(DEBUG_CRIT, (__location__ " internal error iface[%s] unknown\n",
2277 if (vnn->iface == cur) {
2278 info->active_idx = i;
2280 strncpy(info->ifaces[i].name, cur->name,
2281 sizeof(info->ifaces[i].name));
2282 info->ifaces[i].name[sizeof(info->ifaces[i].name)-1] = '\0';
2283 info->ifaces[i].link_state = cur->link_up;
2284 info->ifaces[i].references = cur->references;
2287 len = offsetof(struct ctdb_public_ip_info_old, ifaces) +
2288 i*sizeof(struct ctdb_iface);
2290 outdata->dsize = len;
2291 outdata->dptr = (uint8_t *)info;
2296 int32_t ctdb_control_get_ifaces(struct ctdb_context *ctdb,
2297 struct ctdb_req_control_old *c,
2301 struct ctdb_iface_list_old *ifaces;
2302 struct ctdb_interface *cur;
2304 /* count how many public ip structures we have */
2306 for (cur=ctdb->ifaces;cur;cur=cur->next) {
2310 len = offsetof(struct ctdb_iface_list_old, ifaces) +
2311 num*sizeof(struct ctdb_iface);
2312 ifaces = talloc_zero_size(outdata, len);
2313 CTDB_NO_MEMORY(ctdb, ifaces);
2316 for (cur=ctdb->ifaces;cur;cur=cur->next) {
2317 strncpy(ifaces->ifaces[i].name, cur->name,
2318 sizeof(ifaces->ifaces[i].name));
2319 ifaces->ifaces[i].name[sizeof(ifaces->ifaces[i].name)-1] = '\0';
2320 ifaces->ifaces[i].link_state = cur->link_up;
2321 ifaces->ifaces[i].references = cur->references;
2325 len = offsetof(struct ctdb_iface_list_old, ifaces) +
2326 i*sizeof(struct ctdb_iface);
2328 outdata->dsize = len;
2329 outdata->dptr = (uint8_t *)ifaces;
2334 int32_t ctdb_control_set_iface_link(struct ctdb_context *ctdb,
2335 struct ctdb_req_control_old *c,
2338 struct ctdb_iface *info;
2339 struct ctdb_interface *iface;
2340 bool link_up = false;
2342 info = (struct ctdb_iface *)indata.dptr;
2344 if (info->name[CTDB_IFACE_SIZE] != '\0') {
2345 int len = strnlen(info->name, CTDB_IFACE_SIZE);
2346 DEBUG(DEBUG_ERR, (__location__ " name[%*.*s] not terminated\n",
2347 len, len, info->name));
2351 switch (info->link_state) {
2359 DEBUG(DEBUG_ERR, (__location__ " link_state[%u] invalid\n",
2360 (unsigned int)info->link_state));
2364 if (info->references != 0) {
2365 DEBUG(DEBUG_ERR, (__location__ " references[%u] should be 0\n",
2366 (unsigned int)info->references));
2370 iface = ctdb_find_iface(ctdb, info->name);
2371 if (iface == NULL) {
2375 if (link_up == iface->link_up) {
2380 ("iface[%s] has changed it's link status %s => %s\n",
2382 iface->link_up?"up":"down",
2383 link_up?"up":"down"));
2385 iface->link_up = link_up;
2391 called by a daemon to inform us of the entire list of TCP tickles for
2392 a particular public address.
2393 this control should only be sent by the node that is currently serving
2394 that public address.
2396 int32_t ctdb_control_set_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata)
2398 struct ctdb_tickle_list_old *list = (struct ctdb_tickle_list_old *)indata.dptr;
2399 struct ctdb_tcp_array *tcparray;
2400 struct ctdb_vnn *vnn;
2402 /* We must at least have tickles.num or else we cant verify the size
2403 of the received data blob
2405 if (indata.dsize < offsetof(struct ctdb_tickle_list_old, connections)) {
2406 DEBUG(DEBUG_ERR,("Bad indata in ctdb_tickle_list. Not enough data for the tickle.num field\n"));
2410 /* verify that the size of data matches what we expect */
2411 if (indata.dsize < offsetof(struct ctdb_tickle_list_old, connections)
2412 + sizeof(struct ctdb_connection) * list->num) {
2413 DEBUG(DEBUG_ERR,("Bad indata in ctdb_tickle_list\n"));
2417 DEBUG(DEBUG_INFO, ("Received tickle update for public address %s\n",
2418 ctdb_addr_to_str(&list->addr)));
2420 vnn = find_public_ip_vnn(ctdb, &list->addr);
2422 DEBUG(DEBUG_INFO,(__location__ " Could not set tcp tickle list, '%s' is not a public address\n",
2423 ctdb_addr_to_str(&list->addr)));
2428 if (vnn->pnn == ctdb->pnn) {
2430 ("Ignoring redundant set tcp tickle list, this node hosts '%s'\n",
2431 ctdb_addr_to_str(&list->addr)));
2435 /* remove any old ticklelist we might have */
2436 talloc_free(vnn->tcp_array);
2437 vnn->tcp_array = NULL;
2439 tcparray = talloc(vnn, struct ctdb_tcp_array);
2440 CTDB_NO_MEMORY(ctdb, tcparray);
2442 tcparray->num = list->num;
2444 tcparray->connections = talloc_array(tcparray, struct ctdb_connection, tcparray->num);
2445 CTDB_NO_MEMORY(ctdb, tcparray->connections);
2447 memcpy(tcparray->connections, &list->connections[0],
2448 sizeof(struct ctdb_connection)*tcparray->num);
2450 /* We now have a new fresh tickle list array for this vnn */
2451 vnn->tcp_array = tcparray;
2457 called to return the full list of tickles for the puclic address associated
2458 with the provided vnn
2460 int32_t ctdb_control_get_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata)
2462 ctdb_sock_addr *addr = (ctdb_sock_addr *)indata.dptr;
2463 struct ctdb_tickle_list_old *list;
2464 struct ctdb_tcp_array *tcparray;
2466 struct ctdb_vnn *vnn;
2469 vnn = find_public_ip_vnn(ctdb, addr);
2471 DEBUG(DEBUG_ERR,(__location__ " Could not get tcp tickle list, '%s' is not a public address\n",
2472 ctdb_addr_to_str(addr)));
2477 port = ctdb_addr_to_port(addr);
2479 tcparray = vnn->tcp_array;
2481 if (tcparray != NULL) {
2483 /* All connections */
2484 num = tcparray->num;
2486 /* Count connections for port */
2487 for (i = 0; i < tcparray->num; i++) {
2488 if (port == ctdb_addr_to_port(&tcparray->connections[i].dst)) {
2495 outdata->dsize = offsetof(struct ctdb_tickle_list_old, connections)
2496 + sizeof(struct ctdb_connection) * num;
2498 outdata->dptr = talloc_size(outdata, outdata->dsize);
2499 CTDB_NO_MEMORY(ctdb, outdata->dptr);
2500 list = (struct ctdb_tickle_list_old *)outdata->dptr;
2510 for (i = 0; i < tcparray->num; i++) {
2512 port == ctdb_addr_to_port(&tcparray->connections[i].dst)) {
2513 list->connections[num] = tcparray->connections[i];
2523 set the list of all tcp tickles for a public address
2525 static int ctdb_send_set_tcp_tickles_for_ip(struct ctdb_context *ctdb,
2526 ctdb_sock_addr *addr,
2527 struct ctdb_tcp_array *tcparray)
2531 struct ctdb_tickle_list_old *list;
2534 num = tcparray->num;
2539 data.dsize = offsetof(struct ctdb_tickle_list_old, connections) +
2540 sizeof(struct ctdb_connection) * num;
2541 data.dptr = talloc_size(ctdb, data.dsize);
2542 CTDB_NO_MEMORY(ctdb, data.dptr);
2544 list = (struct ctdb_tickle_list_old *)data.dptr;
2548 memcpy(&list->connections[0], tcparray->connections, sizeof(struct ctdb_connection) * num);
2551 ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_ALL, 0,
2552 CTDB_CONTROL_SET_TCP_TICKLE_LIST,
2553 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
2555 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set tcp tickles failed\n"));
2559 talloc_free(data.dptr);
2566 perform tickle updates if required
2568 static void ctdb_update_tcp_tickles(struct tevent_context *ev,
2569 struct tevent_timer *te,
2570 struct timeval t, void *private_data)
2572 struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
2574 struct ctdb_vnn *vnn;
2576 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2577 /* we only send out updates for public addresses that
2580 if (ctdb->pnn != vnn->pnn) {
2583 /* We only send out the updates if we need to */
2584 if (!vnn->tcp_update_needed) {
2587 ret = ctdb_send_set_tcp_tickles_for_ip(ctdb,
2588 &vnn->public_address,
2591 DEBUG(DEBUG_ERR,("Failed to send the tickle update for public address %s\n",
2592 ctdb_addr_to_str(&vnn->public_address)));
2595 ("Sent tickle update for public address %s\n",
2596 ctdb_addr_to_str(&vnn->public_address)));
2597 vnn->tcp_update_needed = false;
2601 tevent_add_timer(ctdb->ev, ctdb->tickle_update_context,
2602 timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
2603 ctdb_update_tcp_tickles, ctdb);
2607 start periodic update of tcp tickles
2609 void ctdb_start_tcp_tickle_update(struct ctdb_context *ctdb)
2611 ctdb->tickle_update_context = talloc_new(ctdb);
2613 tevent_add_timer(ctdb->ev, ctdb->tickle_update_context,
2614 timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
2615 ctdb_update_tcp_tickles, ctdb);
2621 struct control_gratious_arp {
2622 struct ctdb_context *ctdb;
2623 ctdb_sock_addr addr;
2629 send a control_gratuitous arp
2631 static void send_gratious_arp(struct tevent_context *ev,
2632 struct tevent_timer *te,
2633 struct timeval t, void *private_data)
2636 struct control_gratious_arp *arp = talloc_get_type(private_data,
2637 struct control_gratious_arp);
2639 ret = ctdb_sys_send_arp(&arp->addr, arp->iface);
2641 DEBUG(DEBUG_ERR,(__location__ " sending of gratious arp on iface '%s' failed (%s)\n",
2642 arp->iface, strerror(errno)));
2647 if (arp->count == CTDB_ARP_REPEAT) {
2652 tevent_add_timer(arp->ctdb->ev, arp,
2653 timeval_current_ofs(CTDB_ARP_INTERVAL, 0),
2654 send_gratious_arp, arp);
2661 int32_t ctdb_control_send_gratious_arp(struct ctdb_context *ctdb, TDB_DATA indata)
2663 struct ctdb_addr_info_old *gratious_arp = (struct ctdb_addr_info_old *)indata.dptr;
2664 struct control_gratious_arp *arp;
2666 /* verify the size of indata */
2667 if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
2668 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_gratious_arp structure. Got %u require %u bytes\n",
2669 (unsigned)indata.dsize,
2670 (unsigned)offsetof(struct ctdb_addr_info_old, iface)));
2674 ( offsetof(struct ctdb_addr_info_old, iface)
2675 + gratious_arp->len ) ){
2677 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2678 "but should be %u bytes\n",
2679 (unsigned)indata.dsize,
2680 (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+gratious_arp->len)));
2685 arp = talloc(ctdb, struct control_gratious_arp);
2686 CTDB_NO_MEMORY(ctdb, arp);
2689 arp->addr = gratious_arp->addr;
2690 arp->iface = talloc_strdup(arp, gratious_arp->iface);
2691 CTDB_NO_MEMORY(ctdb, arp->iface);
2694 tevent_add_timer(arp->ctdb->ev, arp,
2695 timeval_zero(), send_gratious_arp, arp);
2700 int32_t ctdb_control_add_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2702 struct ctdb_addr_info_old *pub = (struct ctdb_addr_info_old *)indata.dptr;
2705 /* verify the size of indata */
2706 if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
2707 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_addr_info structure\n"));
2711 ( offsetof(struct ctdb_addr_info_old, iface)
2714 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2715 "but should be %u bytes\n",
2716 (unsigned)indata.dsize,
2717 (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+pub->len)));
2721 DEBUG(DEBUG_NOTICE,("Add IP %s\n", ctdb_addr_to_str(&pub->addr)));
2723 ret = ctdb_add_public_address(ctdb, &pub->addr, pub->mask, &pub->iface[0], true);
2726 DEBUG(DEBUG_ERR,(__location__ " Failed to add public address\n"));
2733 int32_t ctdb_control_del_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2735 struct ctdb_addr_info_old *pub = (struct ctdb_addr_info_old *)indata.dptr;
2736 struct ctdb_vnn *vnn;
2738 /* verify the size of indata */
2739 if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
2740 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_addr_info structure\n"));
2744 ( offsetof(struct ctdb_addr_info_old, iface)
2747 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2748 "but should be %u bytes\n",
2749 (unsigned)indata.dsize,
2750 (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+pub->len)));
2754 DEBUG(DEBUG_NOTICE,("Delete IP %s\n", ctdb_addr_to_str(&pub->addr)));
2756 /* walk over all public addresses until we find a match */
2757 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2758 if (ctdb_same_ip(&vnn->public_address, &pub->addr)) {
2759 if (vnn->pnn == ctdb->pnn) {
2760 /* This IP is currently being hosted.
2761 * Defer the deletion until the next
2762 * takeover run. "ctdb reloadips" will
2763 * always cause a takeover run. "ctdb
2764 * delip" will now need an explicit
2765 * "ctdb ipreallocated" afterwards. */
2766 vnn->delete_pending = true;
2768 /* This IP is not hosted on the
2769 * current node so just delete it
2771 do_delete_ip(ctdb, vnn);
2778 DEBUG(DEBUG_ERR,("Delete IP of unknown public IP address %s\n",
2779 ctdb_addr_to_str(&pub->addr)));
2784 struct ipreallocated_callback_state {
2785 struct ctdb_req_control_old *c;
2788 static void ctdb_ipreallocated_callback(struct ctdb_context *ctdb,
2789 int status, void *p)
2791 struct ipreallocated_callback_state *state =
2792 talloc_get_type(p, struct ipreallocated_callback_state);
2796 (" \"ipreallocated\" event script failed (status %d)\n",
2798 if (status == -ETIME) {
2799 ctdb_ban_self(ctdb);
2803 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
2807 /* A control to run the ipreallocated event */
2808 int32_t ctdb_control_ipreallocated(struct ctdb_context *ctdb,
2809 struct ctdb_req_control_old *c,
2813 struct ipreallocated_callback_state *state;
2815 state = talloc(ctdb, struct ipreallocated_callback_state);
2816 CTDB_NO_MEMORY(ctdb, state);
2818 DEBUG(DEBUG_INFO,(__location__ " Running \"ipreallocated\" event\n"));
2820 ret = ctdb_event_script_callback(ctdb, state,
2821 ctdb_ipreallocated_callback, state,
2822 CTDB_EVENT_IPREALLOCATED,
2826 DEBUG(DEBUG_ERR,("Failed to run \"ipreallocated\" event \n"));
2831 /* tell the control that we will be reply asynchronously */
2832 state->c = talloc_steal(state, c);
2833 *async_reply = true;
2839 struct ctdb_reloadips_handle {
2840 struct ctdb_context *ctdb;
2841 struct ctdb_req_control_old *c;
2845 struct tevent_fd *fde;
2848 static int ctdb_reloadips_destructor(struct ctdb_reloadips_handle *h)
2850 if (h == h->ctdb->reload_ips) {
2851 h->ctdb->reload_ips = NULL;
2854 ctdb_request_control_reply(h->ctdb, h->c, NULL, h->status, NULL);
2857 ctdb_kill(h->ctdb, h->child, SIGKILL);
2861 static void ctdb_reloadips_timeout_event(struct tevent_context *ev,
2862 struct tevent_timer *te,
2863 struct timeval t, void *private_data)
2865 struct ctdb_reloadips_handle *h = talloc_get_type(private_data, struct ctdb_reloadips_handle);
2870 static void ctdb_reloadips_child_handler(struct tevent_context *ev,
2871 struct tevent_fd *fde,
2872 uint16_t flags, void *private_data)
2874 struct ctdb_reloadips_handle *h = talloc_get_type(private_data, struct ctdb_reloadips_handle);
2879 ret = sys_read(h->fd[0], &res, 1);
2880 if (ret < 1 || res != 0) {
2881 DEBUG(DEBUG_ERR, (__location__ " Reloadips child process returned error\n"));
2889 static int ctdb_reloadips_child(struct ctdb_context *ctdb)
2891 TALLOC_CTX *mem_ctx = talloc_new(NULL);
2892 struct ctdb_public_ip_list_old *ips;
2893 struct ctdb_vnn *vnn;
2894 struct client_async_data *async_data;
2895 struct timeval timeout;
2897 struct ctdb_client_control_state *state;
2901 CTDB_NO_MEMORY(ctdb, mem_ctx);
2903 /* Read IPs from local node */
2904 ret = ctdb_ctrl_get_public_ips(ctdb, TAKEOVER_TIMEOUT(),
2905 CTDB_CURRENT_NODE, mem_ctx, &ips);
2908 ("Unable to fetch public IPs from local node\n"));
2909 talloc_free(mem_ctx);
2913 /* Read IPs file - this is safe since this is a child process */
2915 if (ctdb_set_public_addresses(ctdb, false) != 0) {
2916 DEBUG(DEBUG_ERR,("Failed to re-read public addresses file\n"));
2917 talloc_free(mem_ctx);
2921 async_data = talloc_zero(mem_ctx, struct client_async_data);
2922 CTDB_NO_MEMORY(ctdb, async_data);
2924 /* Compare IPs between node and file for IPs to be deleted */
2925 for (i = 0; i < ips->num; i++) {
2927 for (vnn = ctdb->vnn; vnn; vnn = vnn->next) {
2928 if (ctdb_same_ip(&vnn->public_address,
2929 &ips->ips[i].addr)) {
2930 /* IP is still in file */
2936 /* Delete IP ips->ips[i] */
2937 struct ctdb_addr_info_old *pub;
2940 ("IP %s no longer configured, deleting it\n",
2941 ctdb_addr_to_str(&ips->ips[i].addr)));
2943 pub = talloc_zero(mem_ctx, struct ctdb_addr_info_old);
2944 CTDB_NO_MEMORY(ctdb, pub);
2946 pub->addr = ips->ips[i].addr;
2950 timeout = TAKEOVER_TIMEOUT();
2952 data.dsize = offsetof(struct ctdb_addr_info_old,
2954 data.dptr = (uint8_t *)pub;
2956 state = ctdb_control_send(ctdb, CTDB_CURRENT_NODE, 0,
2957 CTDB_CONTROL_DEL_PUBLIC_IP,
2958 0, data, async_data,
2960 if (state == NULL) {
2963 " failed sending CTDB_CONTROL_DEL_PUBLIC_IP\n"));
2967 ctdb_client_async_add(async_data, state);
2971 /* Compare IPs between node and file for IPs to be added */
2973 for (vnn = ctdb->vnn; vnn; vnn = vnn->next) {
2974 for (i = 0; i < ips->num; i++) {
2975 if (ctdb_same_ip(&vnn->public_address,
2976 &ips->ips[i].addr)) {
2977 /* IP already on node */
2981 if (i == ips->num) {
2982 /* Add IP ips->ips[i] */
2983 struct ctdb_addr_info_old *pub;
2984 const char *ifaces = NULL;
2989 ("New IP %s configured, adding it\n",
2990 ctdb_addr_to_str(&vnn->public_address)));
2992 uint32_t pnn = ctdb_get_pnn(ctdb);
2994 data.dsize = sizeof(pnn);
2995 data.dptr = (uint8_t *)&pnn;
2997 ret = ctdb_client_send_message(
2999 CTDB_BROADCAST_CONNECTED,
3000 CTDB_SRVID_REBALANCE_NODE,
3003 DEBUG(DEBUG_WARNING,
3004 ("Failed to send message to force node reallocation - IPs may be unbalanced\n"));
3010 ifaces = vnn->ifaces[0];
3012 while (vnn->ifaces[iface] != NULL) {
3013 ifaces = talloc_asprintf(vnn, "%s,%s", ifaces,
3014 vnn->ifaces[iface]);
3018 len = strlen(ifaces) + 1;
3019 pub = talloc_zero_size(mem_ctx,
3020 offsetof(struct ctdb_addr_info_old, iface) + len);
3021 CTDB_NO_MEMORY(ctdb, pub);
3023 pub->addr = vnn->public_address;
3024 pub->mask = vnn->public_netmask_bits;
3026 memcpy(&pub->iface[0], ifaces, pub->len);
3028 timeout = TAKEOVER_TIMEOUT();
3030 data.dsize = offsetof(struct ctdb_addr_info_old,
3032 data.dptr = (uint8_t *)pub;
3034 state = ctdb_control_send(ctdb, CTDB_CURRENT_NODE, 0,
3035 CTDB_CONTROL_ADD_PUBLIC_IP,
3036 0, data, async_data,
3038 if (state == NULL) {
3041 " failed sending CTDB_CONTROL_ADD_PUBLIC_IP\n"));
3045 ctdb_client_async_add(async_data, state);
3049 if (ctdb_client_async_wait(ctdb, async_data) != 0) {
3050 DEBUG(DEBUG_ERR,(__location__ " Add/delete IPs failed\n"));
3054 talloc_free(mem_ctx);
3058 talloc_free(mem_ctx);
3062 /* This control is sent to force the node to re-read the public addresses file
3063 and drop any addresses we should nnot longer host, and add new addresses
3064 that we are now able to host
3066 int32_t ctdb_control_reload_public_ips(struct ctdb_context *ctdb, struct ctdb_req_control_old *c, bool *async_reply)
3068 struct ctdb_reloadips_handle *h;
3069 pid_t parent = getpid();
3071 if (ctdb->reload_ips != NULL) {
3072 talloc_free(ctdb->reload_ips);
3073 ctdb->reload_ips = NULL;
3076 h = talloc(ctdb, struct ctdb_reloadips_handle);
3077 CTDB_NO_MEMORY(ctdb, h);
3082 if (pipe(h->fd) == -1) {
3083 DEBUG(DEBUG_ERR,("Failed to create pipe for ctdb_freeze_lock\n"));
3088 h->child = ctdb_fork(ctdb);
3089 if (h->child == (pid_t)-1) {
3090 DEBUG(DEBUG_ERR, ("Failed to fork a child for reloadips\n"));
3098 if (h->child == 0) {
3099 signed char res = 0;
3102 debug_extra = talloc_asprintf(NULL, "reloadips:");
3104 prctl_set_comment("ctdb_reloadips");
3105 if (switch_from_server_to_client(ctdb, "reloadips-child") != 0) {
3106 DEBUG(DEBUG_CRIT,("ERROR: Failed to switch reloadips child into client mode\n"));
3109 res = ctdb_reloadips_child(ctdb);
3111 DEBUG(DEBUG_ERR,("Failed to reload ips on local node\n"));
3115 sys_write(h->fd[1], &res, 1);
3116 ctdb_wait_for_process_to_exit(parent);
3120 h->c = talloc_steal(h, c);
3123 set_close_on_exec(h->fd[0]);
3125 talloc_set_destructor(h, ctdb_reloadips_destructor);
3128 h->fde = tevent_add_fd(ctdb->ev, h, h->fd[0], TEVENT_FD_READ,
3129 ctdb_reloadips_child_handler, (void *)h);
3130 tevent_fd_set_auto_close(h->fde);
3132 tevent_add_timer(ctdb->ev, h, timeval_current_ofs(120, 0),
3133 ctdb_reloadips_timeout_event, h);
3135 /* we reply later */
3136 *async_reply = true;