4 Copyright (C) Ronnie Sahlberg 2007
5 Copyright (C) Andrew Tridgell 2007
6 Copyright (C) Martin Schwenke 2011
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, see <http://www.gnu.org/licenses/>.
22 #include "system/network.h"
23 #include "system/filesys.h"
24 #include "system/time.h"
25 #include "system/wait.h"
30 #include "lib/util/dlinklist.h"
31 #include "lib/util/debug.h"
32 #include "lib/util/samba_util.h"
33 #include "lib/util/util_process.h"
35 #include "ctdb_private.h"
36 #include "ctdb_client.h"
38 #include "common/rb_tree.h"
39 #include "common/reqid.h"
40 #include "common/system.h"
41 #include "common/common.h"
42 #include "common/logging.h"
44 #include "server/ipalloc.h"
46 #define TAKEOVER_TIMEOUT() timeval_current_ofs(ctdb->tunable.takeover_timeout,0)
48 #define CTDB_ARP_INTERVAL 1
49 #define CTDB_ARP_REPEAT 3
51 struct ctdb_interface {
52 struct ctdb_interface *prev, *next;
58 static const char *ctdb_vnn_iface_string(const struct ctdb_vnn *vnn)
61 return vnn->iface->name;
67 static int ctdb_add_local_iface(struct ctdb_context *ctdb, const char *iface)
69 struct ctdb_interface *i;
71 if (strlen(iface) > CTDB_IFACE_SIZE) {
72 DEBUG(DEBUG_ERR, ("Interface name too long \"%s\"\n", iface));
76 /* Verify that we don't have an entry for this ip yet */
77 for (i=ctdb->ifaces;i;i=i->next) {
78 if (strcmp(i->name, iface) == 0) {
83 /* create a new structure for this interface */
84 i = talloc_zero(ctdb, struct ctdb_interface);
85 CTDB_NO_MEMORY_FATAL(ctdb, i);
86 i->name = talloc_strdup(i, iface);
87 CTDB_NO_MEMORY(ctdb, i->name);
91 DLIST_ADD(ctdb->ifaces, i);
96 static bool vnn_has_interface_with_name(struct ctdb_vnn *vnn,
101 for (n = 0; vnn->ifaces[n] != NULL; n++) {
102 if (strcmp(name, vnn->ifaces[n]) == 0) {
110 /* If any interfaces now have no possible IPs then delete them. This
111 * implementation is naive (i.e. simple) rather than clever
112 * (i.e. complex). Given that this is run on delip and that operation
113 * is rare, this doesn't need to be efficient - it needs to be
114 * foolproof. One alternative is reference counting, where the logic
115 * is distributed and can, therefore, be broken in multiple places.
116 * Another alternative is to build a red-black tree of interfaces that
117 * can have addresses (by walking ctdb->vnn once) and then walking
118 * ctdb->ifaces once and deleting those not in the tree. Let's go to
119 * one of those if the naive implementation causes problems... :-)
121 static void ctdb_remove_orphaned_ifaces(struct ctdb_context *ctdb,
122 struct ctdb_vnn *vnn)
124 struct ctdb_interface *i, *next;
126 /* For each interface, check if there's an IP using it. */
127 for (i = ctdb->ifaces; i != NULL; i = next) {
132 /* Only consider interfaces named in the given VNN. */
133 if (!vnn_has_interface_with_name(vnn, i->name)) {
137 /* Search for a vnn with this interface. */
139 for (tv=ctdb->vnn; tv; tv=tv->next) {
140 if (vnn_has_interface_with_name(tv, i->name)) {
147 /* None of the VNNs are using this interface. */
148 DLIST_REMOVE(ctdb->ifaces, i);
155 static struct ctdb_interface *ctdb_find_iface(struct ctdb_context *ctdb,
158 struct ctdb_interface *i;
160 for (i=ctdb->ifaces;i;i=i->next) {
161 if (strcmp(i->name, iface) == 0) {
169 static struct ctdb_interface *ctdb_vnn_best_iface(struct ctdb_context *ctdb,
170 struct ctdb_vnn *vnn)
173 struct ctdb_interface *cur = NULL;
174 struct ctdb_interface *best = NULL;
176 for (i=0; vnn->ifaces[i]; i++) {
178 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
192 if (cur->references < best->references) {
201 static int32_t ctdb_vnn_assign_iface(struct ctdb_context *ctdb,
202 struct ctdb_vnn *vnn)
204 struct ctdb_interface *best = NULL;
207 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
208 "still assigned to iface '%s'\n",
209 ctdb_addr_to_str(&vnn->public_address),
210 ctdb_vnn_iface_string(vnn)));
214 best = ctdb_vnn_best_iface(ctdb, vnn);
216 DEBUG(DEBUG_ERR, (__location__ " public address '%s' "
217 "cannot assign to iface any iface\n",
218 ctdb_addr_to_str(&vnn->public_address)));
224 vnn->pnn = ctdb->pnn;
226 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
227 "now assigned to iface '%s' refs[%d]\n",
228 ctdb_addr_to_str(&vnn->public_address),
229 ctdb_vnn_iface_string(vnn),
234 static void ctdb_vnn_unassign_iface(struct ctdb_context *ctdb,
235 struct ctdb_vnn *vnn)
237 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
238 "now unassigned (old iface '%s' refs[%d])\n",
239 ctdb_addr_to_str(&vnn->public_address),
240 ctdb_vnn_iface_string(vnn),
241 vnn->iface?vnn->iface->references:0));
243 vnn->iface->references--;
246 if (vnn->pnn == ctdb->pnn) {
251 static bool ctdb_vnn_available(struct ctdb_context *ctdb,
252 struct ctdb_vnn *vnn)
256 /* Nodes that are not RUNNING can not host IPs */
257 if (ctdb->runstate != CTDB_RUNSTATE_RUNNING) {
261 if (vnn->delete_pending) {
265 if (vnn->iface && vnn->iface->link_up) {
269 for (i=0; vnn->ifaces[i]; i++) {
270 struct ctdb_interface *cur;
272 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
285 struct ctdb_takeover_arp {
286 struct ctdb_context *ctdb;
289 struct ctdb_tcp_array *tcparray;
290 struct ctdb_vnn *vnn;
295 lists of tcp endpoints
297 struct ctdb_tcp_list {
298 struct ctdb_tcp_list *prev, *next;
299 struct ctdb_connection connection;
303 list of clients to kill on IP release
305 struct ctdb_client_ip {
306 struct ctdb_client_ip *prev, *next;
307 struct ctdb_context *ctdb;
314 send a gratuitous arp
316 static void ctdb_control_send_arp(struct tevent_context *ev,
317 struct tevent_timer *te,
318 struct timeval t, void *private_data)
320 struct ctdb_takeover_arp *arp = talloc_get_type(private_data,
321 struct ctdb_takeover_arp);
323 struct ctdb_tcp_array *tcparray;
324 const char *iface = ctdb_vnn_iface_string(arp->vnn);
326 ret = ctdb_sys_send_arp(&arp->addr, iface);
328 DEBUG(DEBUG_CRIT,(__location__ " sending of arp failed on iface '%s' (%s)\n",
329 iface, strerror(errno)));
332 tcparray = arp->tcparray;
334 for (i=0;i<tcparray->num;i++) {
335 struct ctdb_connection *tcon;
337 tcon = &tcparray->connections[i];
338 DEBUG(DEBUG_INFO,("sending tcp tickle ack for %u->%s:%u\n",
339 (unsigned)ntohs(tcon->dst.ip.sin_port),
340 ctdb_addr_to_str(&tcon->src),
341 (unsigned)ntohs(tcon->src.ip.sin_port)));
342 ret = ctdb_sys_send_tcp(
347 DEBUG(DEBUG_CRIT,(__location__ " Failed to send tcp tickle ack for %s\n",
348 ctdb_addr_to_str(&tcon->src)));
355 if (arp->count == CTDB_ARP_REPEAT) {
360 tevent_add_timer(arp->ctdb->ev, arp->vnn->takeover_ctx,
361 timeval_current_ofs(CTDB_ARP_INTERVAL, 100000),
362 ctdb_control_send_arp, arp);
365 static int32_t ctdb_announce_vnn_iface(struct ctdb_context *ctdb,
366 struct ctdb_vnn *vnn)
368 struct ctdb_takeover_arp *arp;
369 struct ctdb_tcp_array *tcparray;
371 if (!vnn->takeover_ctx) {
372 vnn->takeover_ctx = talloc_new(vnn);
373 if (!vnn->takeover_ctx) {
378 arp = talloc_zero(vnn->takeover_ctx, struct ctdb_takeover_arp);
384 arp->addr = vnn->public_address;
387 tcparray = vnn->tcp_array;
389 /* add all of the known tcp connections for this IP to the
390 list of tcp connections to send tickle acks for */
391 arp->tcparray = talloc_steal(arp, tcparray);
393 vnn->tcp_array = NULL;
394 vnn->tcp_update_needed = true;
397 tevent_add_timer(arp->ctdb->ev, vnn->takeover_ctx,
398 timeval_zero(), ctdb_control_send_arp, arp);
403 struct takeover_callback_state {
404 struct ctdb_req_control_old *c;
405 ctdb_sock_addr *addr;
406 struct ctdb_vnn *vnn;
409 struct ctdb_do_takeip_state {
410 struct ctdb_req_control_old *c;
411 struct ctdb_vnn *vnn;
415 called when takeip event finishes
417 static void ctdb_do_takeip_callback(struct ctdb_context *ctdb, int status,
420 struct ctdb_do_takeip_state *state =
421 talloc_get_type(private_data, struct ctdb_do_takeip_state);
426 struct ctdb_node *node = ctdb->nodes[ctdb->pnn];
428 if (status == -ETIME) {
431 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
432 ctdb_addr_to_str(&state->vnn->public_address),
433 ctdb_vnn_iface_string(state->vnn)));
434 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
436 node->flags |= NODE_FLAGS_UNHEALTHY;
441 if (ctdb->do_checkpublicip) {
443 ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
445 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
452 data.dptr = (uint8_t *)ctdb_addr_to_str(&state->vnn->public_address);
453 data.dsize = strlen((char *)data.dptr) + 1;
454 DEBUG(DEBUG_INFO,(__location__ " sending TAKE_IP for '%s'\n", data.dptr));
456 ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_TAKE_IP, data);
459 /* the control succeeded */
460 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
465 static int ctdb_takeip_destructor(struct ctdb_do_takeip_state *state)
467 state->vnn->update_in_flight = false;
472 take over an ip address
474 static int32_t ctdb_do_takeip(struct ctdb_context *ctdb,
475 struct ctdb_req_control_old *c,
476 struct ctdb_vnn *vnn)
479 struct ctdb_do_takeip_state *state;
481 if (vnn->update_in_flight) {
482 DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u rejected "
483 "update for this IP already in flight\n",
484 ctdb_addr_to_str(&vnn->public_address),
485 vnn->public_netmask_bits));
489 ret = ctdb_vnn_assign_iface(ctdb, vnn);
491 DEBUG(DEBUG_ERR,("Takeover of IP %s/%u failed to "
492 "assign a usable interface\n",
493 ctdb_addr_to_str(&vnn->public_address),
494 vnn->public_netmask_bits));
498 state = talloc(vnn, struct ctdb_do_takeip_state);
499 CTDB_NO_MEMORY(ctdb, state);
501 state->c = talloc_steal(ctdb, c);
504 vnn->update_in_flight = true;
505 talloc_set_destructor(state, ctdb_takeip_destructor);
507 DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u on interface %s\n",
508 ctdb_addr_to_str(&vnn->public_address),
509 vnn->public_netmask_bits,
510 ctdb_vnn_iface_string(vnn)));
512 ret = ctdb_event_script_callback(ctdb,
514 ctdb_do_takeip_callback,
518 ctdb_vnn_iface_string(vnn),
519 ctdb_addr_to_str(&vnn->public_address),
520 vnn->public_netmask_bits);
523 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
524 ctdb_addr_to_str(&vnn->public_address),
525 ctdb_vnn_iface_string(vnn)));
533 struct ctdb_do_updateip_state {
534 struct ctdb_req_control_old *c;
535 struct ctdb_interface *old;
536 struct ctdb_vnn *vnn;
540 called when updateip event finishes
542 static void ctdb_do_updateip_callback(struct ctdb_context *ctdb, int status,
545 struct ctdb_do_updateip_state *state =
546 talloc_get_type(private_data, struct ctdb_do_updateip_state);
550 if (status == -ETIME) {
553 DEBUG(DEBUG_ERR,(__location__ " Failed to move IP %s from interface %s to %s\n",
554 ctdb_addr_to_str(&state->vnn->public_address),
556 ctdb_vnn_iface_string(state->vnn)));
559 * All we can do is reset the old interface
560 * and let the next run fix it
562 ctdb_vnn_unassign_iface(ctdb, state->vnn);
563 state->vnn->iface = state->old;
564 state->vnn->iface->references++;
566 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
571 if (ctdb->do_checkpublicip) {
573 ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
575 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
582 /* the control succeeded */
583 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
588 static int ctdb_updateip_destructor(struct ctdb_do_updateip_state *state)
590 state->vnn->update_in_flight = false;
595 update (move) an ip address
597 static int32_t ctdb_do_updateip(struct ctdb_context *ctdb,
598 struct ctdb_req_control_old *c,
599 struct ctdb_vnn *vnn)
602 struct ctdb_do_updateip_state *state;
603 struct ctdb_interface *old = vnn->iface;
604 const char *new_name;
606 if (vnn->update_in_flight) {
607 DEBUG(DEBUG_NOTICE,("Update of IP %s/%u rejected "
608 "update for this IP already in flight\n",
609 ctdb_addr_to_str(&vnn->public_address),
610 vnn->public_netmask_bits));
614 ctdb_vnn_unassign_iface(ctdb, vnn);
615 ret = ctdb_vnn_assign_iface(ctdb, vnn);
617 DEBUG(DEBUG_ERR,("update of IP %s/%u failed to "
618 "assin a usable interface (old iface '%s')\n",
619 ctdb_addr_to_str(&vnn->public_address),
620 vnn->public_netmask_bits,
625 new_name = ctdb_vnn_iface_string(vnn);
626 if (old->name != NULL && new_name != NULL && !strcmp(old->name, new_name)) {
627 /* A benign update from one interface onto itself.
628 * no need to run the eventscripts in this case, just return
631 ctdb_request_control_reply(ctdb, c, NULL, 0, NULL);
635 state = talloc(vnn, struct ctdb_do_updateip_state);
636 CTDB_NO_MEMORY(ctdb, state);
638 state->c = talloc_steal(ctdb, c);
642 vnn->update_in_flight = true;
643 talloc_set_destructor(state, ctdb_updateip_destructor);
645 DEBUG(DEBUG_NOTICE,("Update of IP %s/%u from "
646 "interface %s to %s\n",
647 ctdb_addr_to_str(&vnn->public_address),
648 vnn->public_netmask_bits,
652 ret = ctdb_event_script_callback(ctdb,
654 ctdb_do_updateip_callback,
656 CTDB_EVENT_UPDATE_IP,
660 ctdb_addr_to_str(&vnn->public_address),
661 vnn->public_netmask_bits);
663 DEBUG(DEBUG_ERR,(__location__ " Failed update IP %s from interface %s to %s\n",
664 ctdb_addr_to_str(&vnn->public_address),
665 old->name, new_name));
674 Find the vnn of the node that has a public ip address
675 returns -1 if the address is not known as a public address
677 static struct ctdb_vnn *find_public_ip_vnn(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
679 struct ctdb_vnn *vnn;
681 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
682 if (ctdb_same_ip(&vnn->public_address, addr)) {
691 take over an ip address
693 int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb,
694 struct ctdb_req_control_old *c,
699 struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
700 struct ctdb_vnn *vnn;
701 bool have_ip = false;
702 bool do_updateip = false;
703 bool do_takeip = false;
704 struct ctdb_interface *best_iface = NULL;
706 if (pip->pnn != ctdb->pnn) {
707 DEBUG(DEBUG_ERR,(__location__" takeoverip called for an ip '%s' "
708 "with pnn %d, but we're node %d\n",
709 ctdb_addr_to_str(&pip->addr),
710 pip->pnn, ctdb->pnn));
714 /* update out vnn list */
715 vnn = find_public_ip_vnn(ctdb, &pip->addr);
717 DEBUG(DEBUG_INFO,("takeoverip called for an ip '%s' that is not a public address\n",
718 ctdb_addr_to_str(&pip->addr)));
722 if (ctdb->tunable.disable_ip_failover == 0 && ctdb->do_checkpublicip) {
723 have_ip = ctdb_sys_have_ip(&pip->addr);
725 best_iface = ctdb_vnn_best_iface(ctdb, vnn);
726 if (best_iface == NULL) {
727 DEBUG(DEBUG_ERR,("takeoverip of IP %s/%u failed to find"
728 "a usable interface (old %s, have_ip %d)\n",
729 ctdb_addr_to_str(&vnn->public_address),
730 vnn->public_netmask_bits,
731 ctdb_vnn_iface_string(vnn),
736 if (vnn->iface == NULL && vnn->pnn == -1 && have_ip && best_iface != NULL) {
737 DEBUG(DEBUG_ERR,("Taking over newly created ip\n"));
742 if (vnn->iface == NULL && have_ip) {
743 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
744 "but we have no interface assigned, has someone manually configured it? Ignore for now.\n",
745 ctdb_addr_to_str(&vnn->public_address)));
749 if (vnn->pnn != ctdb->pnn && have_ip && vnn->pnn != -1) {
750 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
751 "and we have it on iface[%s], but it was assigned to node %d"
752 "and we are node %d, banning ourself\n",
753 ctdb_addr_to_str(&vnn->public_address),
754 ctdb_vnn_iface_string(vnn), vnn->pnn, ctdb->pnn));
759 if (vnn->pnn == -1 && have_ip) {
760 vnn->pnn = ctdb->pnn;
761 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
762 "and we already have it on iface[%s], update local daemon\n",
763 ctdb_addr_to_str(&vnn->public_address),
764 ctdb_vnn_iface_string(vnn)));
769 if (vnn->iface != best_iface) {
770 if (!vnn->iface->link_up) {
772 } else if (vnn->iface->references > (best_iface->references + 1)) {
773 /* only move when the rebalance gains something */
781 ctdb_vnn_unassign_iface(ctdb, vnn);
788 ret = ctdb_do_takeip(ctdb, c, vnn);
792 } else if (do_updateip) {
793 ret = ctdb_do_updateip(ctdb, c, vnn);
799 * The interface is up and the kernel known the ip
802 DEBUG(DEBUG_INFO,("Redundant takeover of IP %s/%u on interface %s (ip already held)\n",
803 ctdb_addr_to_str(&pip->addr),
804 vnn->public_netmask_bits,
805 ctdb_vnn_iface_string(vnn)));
809 /* tell ctdb_control.c that we will be replying asynchronously */
815 static void do_delete_ip(struct ctdb_context *ctdb, struct ctdb_vnn *vnn)
817 DLIST_REMOVE(ctdb->vnn, vnn);
818 ctdb_vnn_unassign_iface(ctdb, vnn);
819 ctdb_remove_orphaned_ifaces(ctdb, vnn);
824 called when releaseip event finishes
826 static void release_ip_callback(struct ctdb_context *ctdb, int status,
829 struct takeover_callback_state *state =
830 talloc_get_type(private_data, struct takeover_callback_state);
833 if (status == -ETIME) {
837 if (ctdb->tunable.disable_ip_failover == 0 && ctdb->do_checkpublicip) {
838 if (ctdb_sys_have_ip(state->addr)) {
840 ("IP %s still hosted during release IP callback, failing\n",
841 ctdb_addr_to_str(state->addr)));
842 ctdb_request_control_reply(ctdb, state->c,
849 /* send a message to all clients of this node telling them
850 that the cluster has been reconfigured and they should
851 release any sockets on this IP */
852 data.dptr = (uint8_t *)talloc_strdup(state, ctdb_addr_to_str(state->addr));
853 CTDB_NO_MEMORY_VOID(ctdb, data.dptr);
854 data.dsize = strlen((char *)data.dptr)+1;
856 DEBUG(DEBUG_INFO,(__location__ " sending RELEASE_IP for '%s'\n", data.dptr));
858 ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_RELEASE_IP, data);
860 ctdb_vnn_unassign_iface(ctdb, state->vnn);
862 /* Process the IP if it has been marked for deletion */
863 if (state->vnn->delete_pending) {
864 do_delete_ip(ctdb, state->vnn);
868 /* the control succeeded */
869 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
873 static int ctdb_releaseip_destructor(struct takeover_callback_state *state)
875 if (state->vnn != NULL) {
876 state->vnn->update_in_flight = false;
882 release an ip address
884 int32_t ctdb_control_release_ip(struct ctdb_context *ctdb,
885 struct ctdb_req_control_old *c,
890 struct takeover_callback_state *state;
891 struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
892 struct ctdb_vnn *vnn;
895 /* update our vnn list */
896 vnn = find_public_ip_vnn(ctdb, &pip->addr);
898 DEBUG(DEBUG_INFO,("releaseip called for an ip '%s' that is not a public address\n",
899 ctdb_addr_to_str(&pip->addr)));
904 /* stop any previous arps */
905 talloc_free(vnn->takeover_ctx);
906 vnn->takeover_ctx = NULL;
908 /* Some ctdb tool commands (e.g. moveip) send
909 * lazy multicast to drop an IP from any node that isn't the
910 * intended new node. The following causes makes ctdbd ignore
911 * a release for any address it doesn't host.
913 if (ctdb->tunable.disable_ip_failover == 0 && ctdb->do_checkpublicip) {
914 if (!ctdb_sys_have_ip(&pip->addr)) {
915 DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u on interface %s (ip not held)\n",
916 ctdb_addr_to_str(&pip->addr),
917 vnn->public_netmask_bits,
918 ctdb_vnn_iface_string(vnn)));
919 ctdb_vnn_unassign_iface(ctdb, vnn);
923 if (vnn->iface == NULL) {
924 DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u (ip not held)\n",
925 ctdb_addr_to_str(&pip->addr),
926 vnn->public_netmask_bits));
931 /* There is a potential race between take_ip and us because we
932 * update the VNN via a callback that run when the
933 * eventscripts have been run. Avoid the race by allowing one
934 * update to be in flight at a time.
936 if (vnn->update_in_flight) {
937 DEBUG(DEBUG_NOTICE,("Release of IP %s/%u rejected "
938 "update for this IP already in flight\n",
939 ctdb_addr_to_str(&vnn->public_address),
940 vnn->public_netmask_bits));
944 iface = strdup(ctdb_vnn_iface_string(vnn));
946 DEBUG(DEBUG_NOTICE,("Release of IP %s/%u on interface %s node:%d\n",
947 ctdb_addr_to_str(&pip->addr),
948 vnn->public_netmask_bits,
952 state = talloc(ctdb, struct takeover_callback_state);
954 ctdb_set_error(ctdb, "Out of memory at %s:%d",
960 state->c = talloc_steal(state, c);
961 state->addr = talloc(state, ctdb_sock_addr);
962 if (state->addr == NULL) {
963 ctdb_set_error(ctdb, "Out of memory at %s:%d",
969 *state->addr = pip->addr;
972 vnn->update_in_flight = true;
973 talloc_set_destructor(state, ctdb_releaseip_destructor);
975 ret = ctdb_event_script_callback(ctdb,
976 state, release_ip_callback, state,
977 CTDB_EVENT_RELEASE_IP,
980 ctdb_addr_to_str(&pip->addr),
981 vnn->public_netmask_bits);
984 DEBUG(DEBUG_ERR,(__location__ " Failed to release IP %s on interface %s\n",
985 ctdb_addr_to_str(&pip->addr),
986 ctdb_vnn_iface_string(vnn)));
991 /* tell the control that we will be reply asynchronously */
996 static int ctdb_add_public_address(struct ctdb_context *ctdb,
997 ctdb_sock_addr *addr,
998 unsigned mask, const char *ifaces,
1001 struct ctdb_vnn *vnn;
1008 tmp = strdup(ifaces);
1009 for (iface = strtok(tmp, ","); iface; iface = strtok(NULL, ",")) {
1010 if (!ctdb_sys_check_iface_exists(iface)) {
1011 DEBUG(DEBUG_CRIT,("Interface %s does not exist. Can not add public-address : %s\n", iface, ctdb_addr_to_str(addr)));
1018 /* Verify that we don't have an entry for this ip yet */
1019 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1020 if (ctdb_same_sockaddr(addr, &vnn->public_address)) {
1021 DEBUG(DEBUG_CRIT,("Same ip '%s' specified multiple times in the public address list \n",
1022 ctdb_addr_to_str(addr)));
1027 /* create a new vnn structure for this ip address */
1028 vnn = talloc_zero(ctdb, struct ctdb_vnn);
1029 CTDB_NO_MEMORY_FATAL(ctdb, vnn);
1030 vnn->ifaces = talloc_array(vnn, const char *, num + 2);
1031 tmp = talloc_strdup(vnn, ifaces);
1032 CTDB_NO_MEMORY_FATAL(ctdb, tmp);
1033 for (iface = strtok(tmp, ","); iface; iface = strtok(NULL, ",")) {
1034 vnn->ifaces = talloc_realloc(vnn, vnn->ifaces, const char *, num + 2);
1035 CTDB_NO_MEMORY_FATAL(ctdb, vnn->ifaces);
1036 vnn->ifaces[num] = talloc_strdup(vnn, iface);
1037 CTDB_NO_MEMORY_FATAL(ctdb, vnn->ifaces[num]);
1041 vnn->ifaces[num] = NULL;
1042 vnn->public_address = *addr;
1043 vnn->public_netmask_bits = mask;
1045 if (check_address) {
1046 if (ctdb_sys_have_ip(addr)) {
1047 DEBUG(DEBUG_ERR,("We are already hosting public address '%s'. setting PNN to ourself:%d\n", ctdb_addr_to_str(addr), ctdb->pnn));
1048 vnn->pnn = ctdb->pnn;
1052 for (i=0; vnn->ifaces[i]; i++) {
1053 ret = ctdb_add_local_iface(ctdb, vnn->ifaces[i]);
1055 DEBUG(DEBUG_CRIT, (__location__ " failed to add iface[%s] "
1056 "for public_address[%s]\n",
1057 vnn->ifaces[i], ctdb_addr_to_str(addr)));
1063 DLIST_ADD(ctdb->vnn, vnn);
1069 setup the public address lists from a file
1071 int ctdb_set_public_addresses(struct ctdb_context *ctdb, bool check_addresses)
1077 lines = file_lines_load(ctdb->public_addresses_file, &nlines, 0, ctdb);
1078 if (lines == NULL) {
1079 ctdb_set_error(ctdb, "Failed to load public address list '%s'\n", ctdb->public_addresses_file);
1082 while (nlines > 0 && strcmp(lines[nlines-1], "") == 0) {
1086 for (i=0;i<nlines;i++) {
1088 ctdb_sock_addr addr;
1089 const char *addrstr;
1094 while ((*line == ' ') || (*line == '\t')) {
1100 if (strcmp(line, "") == 0) {
1103 tok = strtok(line, " \t");
1105 tok = strtok(NULL, " \t");
1107 if (NULL == ctdb->default_public_interface) {
1108 DEBUG(DEBUG_CRIT,("No default public interface and no interface specified at line %u of public address list\n",
1113 ifaces = ctdb->default_public_interface;
1118 if (!addrstr || !parse_ip_mask(addrstr, ifaces, &addr, &mask)) {
1119 DEBUG(DEBUG_CRIT,("Badly formed line %u in public address list\n", i+1));
1123 if (ctdb_add_public_address(ctdb, &addr, mask, ifaces, check_addresses)) {
1124 DEBUG(DEBUG_CRIT,("Failed to add line %u to the public address list\n", i+1));
1135 static void *add_ip_callback(void *parm, void *data)
1137 struct public_ip_list *this_ip = parm;
1138 struct public_ip_list *prev_ip = data;
1140 if (prev_ip == NULL) {
1143 if (this_ip->pnn == -1) {
1144 this_ip->pnn = prev_ip->pnn;
1150 static int getips_count_callback(void *param, void *data)
1152 struct public_ip_list **ip_list = (struct public_ip_list **)param;
1153 struct public_ip_list *new_ip = (struct public_ip_list *)data;
1155 new_ip->next = *ip_list;
1160 static int verify_remote_ip_allocation(struct ctdb_context *ctdb,
1161 struct ctdb_public_ip_list *ips,
1164 static int ctdb_reload_remote_public_ips(struct ctdb_context *ctdb,
1165 struct ipalloc_state *ipalloc_state,
1166 struct ctdb_node_map_old *nodemap)
1170 struct ctdb_public_ip_list_old *ip_list;
1172 if (ipalloc_state->num != nodemap->num) {
1175 " ipalloc_state->num (%d) != nodemap->num (%d) invalid param\n",
1176 ipalloc_state->num, nodemap->num));
1180 for (j=0; j<nodemap->num; j++) {
1181 if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) {
1185 /* Retrieve the list of known public IPs from the node */
1186 ret = ctdb_ctrl_get_public_ips_flags(ctdb,
1189 ipalloc_state->known_public_ips,
1194 ("Failed to read known public IPs from node: %u\n",
1198 ipalloc_state->known_public_ips[j].num = ip_list->num;
1199 /* This could be copied and freed. However, ip_list
1200 * is allocated off ipalloc_state->known_public_ips,
1201 * so this is a safe hack. This will go away in a
1202 * while anyway... */
1203 ipalloc_state->known_public_ips[j].ip = &ip_list->ips[0];
1205 if (ctdb->do_checkpublicip) {
1206 verify_remote_ip_allocation(
1208 &ipalloc_state->known_public_ips[j],
1212 /* Retrieve the list of available public IPs from the node */
1213 ret = ctdb_ctrl_get_public_ips_flags(ctdb,
1216 ipalloc_state->available_public_ips,
1217 CTDB_PUBLIC_IP_FLAGS_ONLY_AVAILABLE,
1221 ("Failed to read available public IPs from node: %u\n",
1225 ipalloc_state->available_public_ips[j].num = ip_list->num;
1226 /* This could be copied and freed. However, ip_list
1227 * is allocated off ipalloc_state->available_public_ips,
1228 * so this is a safe hack. This will go away in a
1229 * while anyway... */
1230 ipalloc_state->available_public_ips[j].ip = &ip_list->ips[0];
1236 static struct public_ip_list *
1237 create_merged_ip_list(struct ctdb_context *ctdb, struct ipalloc_state *ipalloc_state)
1240 struct public_ip_list *ip_list;
1241 struct ctdb_public_ip_list *public_ips;
1243 TALLOC_FREE(ctdb->ip_tree);
1244 ctdb->ip_tree = trbt_create(ctdb, 0);
1246 for (i=0; i < ctdb->num_nodes; i++) {
1248 if (ctdb->nodes[i]->flags & NODE_FLAGS_DELETED) {
1252 /* there were no public ips for this node */
1253 if (ipalloc_state->known_public_ips == NULL) {
1257 public_ips = &ipalloc_state->known_public_ips[i];
1259 for (j=0; j < public_ips->num; j++) {
1260 struct public_ip_list *tmp_ip;
1262 tmp_ip = talloc_zero(ctdb->ip_tree, struct public_ip_list);
1263 CTDB_NO_MEMORY_NULL(ctdb, tmp_ip);
1264 /* Do not use information about IP addresses hosted
1265 * on other nodes, it may not be accurate */
1266 if (public_ips->ip[j].pnn == ctdb->nodes[i]->pnn) {
1267 tmp_ip->pnn = public_ips->ip[j].pnn;
1271 tmp_ip->addr = public_ips->ip[j].addr;
1272 tmp_ip->next = NULL;
1274 trbt_insertarray32_callback(ctdb->ip_tree,
1275 IP_KEYLEN, ip_key(&public_ips->ip[j].addr),
1282 trbt_traversearray32(ctdb->ip_tree, IP_KEYLEN, getips_count_callback, &ip_list);
1287 static bool all_nodes_are_disabled(struct ctdb_node_map_old *nodemap)
1291 for (i=0;i<nodemap->num;i++) {
1292 if (!(nodemap->nodes[i].flags & (NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED))) {
1293 /* Found one completely healthy node */
1301 struct get_tunable_callback_data {
1302 const char *tunable;
1307 static void get_tunable_callback(struct ctdb_context *ctdb, uint32_t pnn,
1308 int32_t res, TDB_DATA outdata,
1311 struct get_tunable_callback_data *cd =
1312 (struct get_tunable_callback_data *)callback;
1316 /* Already handled in fail callback */
1320 if (outdata.dsize != sizeof(uint32_t)) {
1321 DEBUG(DEBUG_ERR,("Wrong size of returned data when reading \"%s\" tunable from node %d. Expected %d bytes but received %d bytes\n",
1322 cd->tunable, pnn, (int)sizeof(uint32_t),
1323 (int)outdata.dsize));
1328 size = talloc_array_length(cd->out);
1330 DEBUG(DEBUG_ERR,("Got %s reply from node %d but nodemap only has %d entries\n",
1331 cd->tunable, pnn, size));
1336 cd->out[pnn] = *(uint32_t *)outdata.dptr;
1339 static void get_tunable_fail_callback(struct ctdb_context *ctdb, uint32_t pnn,
1340 int32_t res, TDB_DATA outdata,
1343 struct get_tunable_callback_data *cd =
1344 (struct get_tunable_callback_data *)callback;
1349 ("Timed out getting tunable \"%s\" from node %d\n",
1355 DEBUG(DEBUG_WARNING,
1356 ("Tunable \"%s\" not implemented on node %d\n",
1361 ("Unexpected error getting tunable \"%s\" from node %d\n",
1367 static uint32_t *get_tunable_from_nodes(struct ctdb_context *ctdb,
1368 TALLOC_CTX *tmp_ctx,
1369 struct ctdb_node_map_old *nodemap,
1370 const char *tunable,
1371 uint32_t default_value)
1374 struct ctdb_control_get_tunable *t;
1377 struct get_tunable_callback_data callback_data;
1380 tvals = talloc_array(tmp_ctx, uint32_t, nodemap->num);
1381 CTDB_NO_MEMORY_NULL(ctdb, tvals);
1382 for (i=0; i<nodemap->num; i++) {
1383 tvals[i] = default_value;
1386 callback_data.out = tvals;
1387 callback_data.tunable = tunable;
1388 callback_data.fatal = false;
1390 data.dsize = offsetof(struct ctdb_control_get_tunable, name) + strlen(tunable) + 1;
1391 data.dptr = talloc_size(tmp_ctx, data.dsize);
1392 t = (struct ctdb_control_get_tunable *)data.dptr;
1393 t->length = strlen(tunable)+1;
1394 memcpy(t->name, tunable, t->length);
1395 nodes = list_of_connected_nodes(ctdb, nodemap, tmp_ctx, true);
1396 if (ctdb_client_async_control(ctdb, CTDB_CONTROL_GET_TUNABLE,
1397 nodes, 0, TAKEOVER_TIMEOUT(),
1399 get_tunable_callback,
1400 get_tunable_fail_callback,
1401 &callback_data) != 0) {
1402 if (callback_data.fatal) {
1408 talloc_free(data.dptr);
1413 /* Set internal flags for IP allocation:
1415 * Set NOIPTAKOVER ip flags from per-node NoIPTakeover tunable
1416 * Set NOIPHOST ip flag for each INACTIVE node
1417 * if all nodes are disabled:
1418 * Set NOIPHOST ip flags from per-node NoIPHostOnAllDisabled tunable
1420 * Set NOIPHOST ip flags for disabled nodes
1422 static void set_ipflags_internal(struct ipalloc_state *ipalloc_state,
1423 struct ctdb_node_map_old *nodemap,
1424 uint32_t *tval_noiptakeover,
1425 uint32_t *tval_noiphostonalldisabled)
1429 for (i=0;i<nodemap->num;i++) {
1430 /* Can not take IPs on node with NoIPTakeover set */
1431 if (tval_noiptakeover[i] != 0) {
1432 ipalloc_state->noiptakeover[i] = true;
1435 /* Can not host IPs on INACTIVE node */
1436 if (nodemap->nodes[i].flags & NODE_FLAGS_INACTIVE) {
1437 ipalloc_state->noiphost[i] = true;
1441 if (all_nodes_are_disabled(nodemap)) {
1442 /* If all nodes are disabled, can not host IPs on node
1443 * with NoIPHostOnAllDisabled set
1445 for (i=0;i<nodemap->num;i++) {
1446 if (tval_noiphostonalldisabled[i] != 0) {
1447 ipalloc_state->noiphost[i] = true;
1451 /* If some nodes are not disabled, then can not host
1452 * IPs on DISABLED node
1454 for (i=0;i<nodemap->num;i++) {
1455 if (nodemap->nodes[i].flags & NODE_FLAGS_DISABLED) {
1456 ipalloc_state->noiphost[i] = true;
1462 static bool set_ipflags(struct ctdb_context *ctdb,
1463 struct ipalloc_state *ipalloc_state,
1464 struct ctdb_node_map_old *nodemap)
1466 uint32_t *tval_noiptakeover;
1467 uint32_t *tval_noiphostonalldisabled;
1469 tval_noiptakeover = get_tunable_from_nodes(ctdb, ipalloc_state, nodemap,
1471 if (tval_noiptakeover == NULL) {
1475 tval_noiphostonalldisabled =
1476 get_tunable_from_nodes(ctdb, ipalloc_state, nodemap,
1477 "NoIPHostOnAllDisabled", 0);
1478 if (tval_noiphostonalldisabled == NULL) {
1479 /* Caller frees tmp_ctx */
1483 set_ipflags_internal(ipalloc_state, nodemap,
1485 tval_noiphostonalldisabled);
1487 talloc_free(tval_noiptakeover);
1488 talloc_free(tval_noiphostonalldisabled);
1493 static struct ipalloc_state * ipalloc_state_init(struct ctdb_context *ctdb,
1494 TALLOC_CTX *mem_ctx)
1496 struct ipalloc_state *ipalloc_state =
1497 talloc_zero(mem_ctx, struct ipalloc_state);
1498 if (ipalloc_state == NULL) {
1499 DEBUG(DEBUG_ERR, (__location__ " Out of memory\n"));
1503 ipalloc_state->num = ctdb->num_nodes;
1505 ipalloc_state->known_public_ips =
1506 talloc_zero_array(ipalloc_state,
1507 struct ctdb_public_ip_list,
1508 ipalloc_state->num);
1509 if (ipalloc_state->known_public_ips == NULL) {
1510 DEBUG(DEBUG_ERR, (__location__ " Out of memory\n"));
1514 ipalloc_state->available_public_ips =
1515 talloc_zero_array(ipalloc_state,
1516 struct ctdb_public_ip_list,
1517 ipalloc_state->num);
1518 if (ipalloc_state->available_public_ips == NULL) {
1519 DEBUG(DEBUG_ERR, (__location__ " Out of memory\n"));
1522 ipalloc_state->noiptakeover =
1523 talloc_zero_array(ipalloc_state,
1525 ipalloc_state->num);
1526 if (ipalloc_state->noiptakeover == NULL) {
1527 DEBUG(DEBUG_ERR, (__location__ " Out of memory\n"));
1530 ipalloc_state->noiphost =
1531 talloc_zero_array(ipalloc_state,
1533 ipalloc_state->num);
1534 if (ipalloc_state->noiphost == NULL) {
1535 DEBUG(DEBUG_ERR, (__location__ " Out of memory\n"));
1539 if (1 == ctdb->tunable.lcp2_public_ip_assignment) {
1540 ipalloc_state->algorithm = IPALLOC_LCP2;
1541 } else if (1 == ctdb->tunable.deterministic_public_ips) {
1542 ipalloc_state->algorithm = IPALLOC_DETERMINISTIC;
1544 ipalloc_state->algorithm = IPALLOC_NONDETERMINISTIC;
1547 ipalloc_state->no_ip_failback = ctdb->tunable.no_ip_failback;
1549 return ipalloc_state;
1551 talloc_free(ipalloc_state);
1555 struct takeover_callback_data {
1558 client_async_callback fail_callback;
1559 void *fail_callback_data;
1562 static struct takeover_callback_data *
1563 takeover_callback_data_init(TALLOC_CTX *mem_ctx,
1565 client_async_callback fail_callback,
1566 void *callback_data)
1568 static struct takeover_callback_data *takeover_data;
1570 takeover_data = talloc_zero(mem_ctx, struct takeover_callback_data);
1571 if (takeover_data == NULL) {
1572 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
1576 takeover_data->node_failed = talloc_zero_array(takeover_data,
1578 if (takeover_data->node_failed == NULL) {
1579 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
1580 talloc_free(takeover_data);
1584 takeover_data->num_nodes = num_nodes;
1585 takeover_data->fail_callback = fail_callback;
1586 takeover_data->fail_callback_data = callback_data;
1588 return takeover_data;
1591 static void takeover_run_fail_callback(struct ctdb_context *ctdb,
1592 uint32_t node_pnn, int32_t res,
1593 TDB_DATA outdata, void *callback_data)
1595 struct takeover_callback_data *cd =
1596 talloc_get_type_abort(callback_data,
1597 struct takeover_callback_data);
1599 if (node_pnn >= cd->num_nodes) {
1600 DEBUG(DEBUG_ERR, (__location__ " invalid PNN %u\n", node_pnn));
1604 if (!cd->node_failed[node_pnn]) {
1605 cd->node_failed[node_pnn] = true;
1606 cd->fail_callback(ctdb, node_pnn, res, outdata,
1607 cd->fail_callback_data);
1612 * Recalculate the allocation of public IPs to nodes and have the
1613 * nodes host their allocated addresses.
1615 * - Allocate memory for IP allocation state, including per node
1617 * - Populate IP allocation algorithm in IP allocation state
1618 * - Populate local value of tunable NoIPFailback in IP allocation
1619 state - this is really a cluster-wide configuration variable and
1620 only the value form the master node is used
1621 * - Retrieve tunables NoIPTakeover and NoIPHostOnAllDisabled from all
1622 * connected nodes - this is done separately so tunable values can
1623 * be faked in unit testing
1624 * - Populate NoIPTakover tunable in IP allocation state
1625 * - Populate NoIPHost in IP allocation state, derived from node flags
1626 * and NoIPHostOnAllDisabled tunable
1627 * - Retrieve and populate known and available IP lists in IP
1629 * - If no available IP addresses then early exit
1630 * - Build list of (known IPs, currently assigned node)
1631 * - Populate list of nodes to force rebalance - internal structure,
1632 * currently no way to fetch, only used by LCP2 for nodes that have
1633 * had new IP addresses added
1634 * - Run IP allocation algorithm
1635 * - Send RELEASE_IP to all nodes for IPs they should not host
1636 * - Send TAKE_IP to all nodes for IPs they should host
1637 * - Send IPREALLOCATED to all nodes (with backward compatibility hack)
1639 int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map_old *nodemap,
1640 uint32_t *force_rebalance_nodes,
1641 client_async_callback fail_callback, void *callback_data)
1644 struct ctdb_public_ip ip;
1646 struct public_ip_list *all_ips, *tmp_ip;
1648 struct timeval timeout;
1649 struct client_async_data *async_data;
1650 struct ctdb_client_control_state *state;
1651 TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
1652 struct ipalloc_state *ipalloc_state;
1653 struct takeover_callback_data *takeover_data;
1657 * ip failover is completely disabled, just send out the
1658 * ipreallocated event.
1660 if (ctdb->tunable.disable_ip_failover != 0) {
1664 ipalloc_state = ipalloc_state_init(ctdb, tmp_ctx);
1665 if (ipalloc_state == NULL) {
1666 talloc_free(tmp_ctx);
1670 if (!set_ipflags(ctdb, ipalloc_state, nodemap)) {
1671 DEBUG(DEBUG_ERR,("Failed to set IP flags - aborting takeover run\n"));
1672 talloc_free(tmp_ctx);
1676 /* Fetch known/available public IPs from each active node */
1677 ret = ctdb_reload_remote_public_ips(ctdb, ipalloc_state, nodemap);
1679 talloc_free(tmp_ctx);
1683 /* Short-circuit IP allocation if no node has available IPs */
1684 can_host_ips = false;
1685 for (i=0; i < ipalloc_state->num; i++) {
1686 if (ipalloc_state->available_public_ips[i].num != 0) {
1687 can_host_ips = true;
1690 if (!can_host_ips) {
1691 DEBUG(DEBUG_WARNING,("No nodes available to host public IPs yet\n"));
1695 /* since nodes only know about those public addresses that
1696 can be served by that particular node, no single node has
1697 a full list of all public addresses that exist in the cluster.
1698 Walk over all node structures and create a merged list of
1699 all public addresses that exist in the cluster.
1701 keep the tree of ips around as ctdb->ip_tree
1703 all_ips = create_merged_ip_list(ctdb, ipalloc_state);
1704 ipalloc_state->all_ips = all_ips;
1706 ipalloc_state->force_rebalance_nodes = force_rebalance_nodes;
1708 /* Do the IP reassignment calculations */
1709 ipalloc(ipalloc_state);
1711 /* Now tell all nodes to release any public IPs should not
1712 * host. This will be a NOOP on nodes that don't currently
1713 * hold the given IP.
1715 takeover_data = takeover_callback_data_init(tmp_ctx,
1719 if (takeover_data == NULL) {
1720 talloc_free(tmp_ctx);
1724 async_data = talloc_zero(tmp_ctx, struct client_async_data);
1725 CTDB_NO_MEMORY_FATAL(ctdb, async_data);
1727 async_data->fail_callback = takeover_run_fail_callback;
1728 async_data->callback_data = takeover_data;
1730 ZERO_STRUCT(ip); /* Avoid valgrind warnings for union */
1732 /* Send a RELEASE_IP to all nodes that should not be hosting
1733 * each IP. For each IP, all but one of these will be
1734 * redundant. However, the redundant ones are used to tell
1735 * nodes which node should be hosting the IP so that commands
1736 * like "ctdb ip" can display a particular nodes idea of who
1737 * is hosting what. */
1738 for (i=0;i<nodemap->num;i++) {
1739 /* don't talk to unconnected nodes, but do talk to banned nodes */
1740 if (nodemap->nodes[i].flags & NODE_FLAGS_DISCONNECTED) {
1744 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1745 if (tmp_ip->pnn == nodemap->nodes[i].pnn) {
1746 /* This node should be serving this
1747 vnn so don't tell it to release the ip
1751 ip.pnn = tmp_ip->pnn;
1752 ip.addr = tmp_ip->addr;
1754 timeout = TAKEOVER_TIMEOUT();
1755 data.dsize = sizeof(ip);
1756 data.dptr = (uint8_t *)&ip;
1757 state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
1758 0, CTDB_CONTROL_RELEASE_IP, 0,
1761 if (state == NULL) {
1762 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_RELEASE_IP to node %u\n", nodemap->nodes[i].pnn));
1763 talloc_free(tmp_ctx);
1767 ctdb_client_async_add(async_data, state);
1770 if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1771 DEBUG(DEBUG_ERR,(__location__ " Async control CTDB_CONTROL_RELEASE_IP failed\n"));
1772 talloc_free(tmp_ctx);
1775 talloc_free(async_data);
1778 /* For each IP, send a TAKOVER_IP to the node that should be
1779 * hosting it. Many of these will often be redundant (since
1780 * the allocation won't have changed) but they can be useful
1781 * to recover from inconsistencies. */
1782 async_data = talloc_zero(tmp_ctx, struct client_async_data);
1783 CTDB_NO_MEMORY_FATAL(ctdb, async_data);
1785 async_data->fail_callback = fail_callback;
1786 async_data->callback_data = callback_data;
1788 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1789 if (tmp_ip->pnn == -1) {
1790 /* this IP won't be taken over */
1794 ip.pnn = tmp_ip->pnn;
1795 ip.addr = tmp_ip->addr;
1797 timeout = TAKEOVER_TIMEOUT();
1798 data.dsize = sizeof(ip);
1799 data.dptr = (uint8_t *)&ip;
1800 state = ctdb_control_send(ctdb, tmp_ip->pnn,
1801 0, CTDB_CONTROL_TAKEOVER_IP, 0,
1802 data, async_data, &timeout, NULL);
1803 if (state == NULL) {
1804 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_TAKEOVER_IP to node %u\n", tmp_ip->pnn));
1805 talloc_free(tmp_ctx);
1809 ctdb_client_async_add(async_data, state);
1811 if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1812 DEBUG(DEBUG_ERR,(__location__ " Async control CTDB_CONTROL_TAKEOVER_IP failed\n"));
1813 talloc_free(tmp_ctx);
1819 * Tell all nodes to run eventscripts to process the
1820 * "ipreallocated" event. This can do a lot of things,
1821 * including restarting services to reconfigure them if public
1822 * IPs have moved. Once upon a time this event only used to
1825 nodes = list_of_connected_nodes(ctdb, nodemap, tmp_ctx, true);
1826 ret = ctdb_client_async_control(ctdb, CTDB_CONTROL_IPREALLOCATED,
1827 nodes, 0, TAKEOVER_TIMEOUT(),
1829 NULL, fail_callback,
1833 ("Async CTDB_CONTROL_IPREALLOCATED control failed\n"));
1836 talloc_free(tmp_ctx);
1842 destroy a ctdb_client_ip structure
1844 static int ctdb_client_ip_destructor(struct ctdb_client_ip *ip)
1846 DEBUG(DEBUG_DEBUG,("destroying client tcp for %s:%u (client_id %u)\n",
1847 ctdb_addr_to_str(&ip->addr),
1848 ntohs(ip->addr.ip.sin_port),
1851 DLIST_REMOVE(ip->ctdb->client_ip_list, ip);
1856 called by a client to inform us of a TCP connection that it is managing
1857 that should tickled with an ACK when IP takeover is done
1859 int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id,
1862 struct ctdb_client *client = reqid_find(ctdb->idr, client_id, struct ctdb_client);
1863 struct ctdb_connection *tcp_sock = NULL;
1864 struct ctdb_tcp_list *tcp;
1865 struct ctdb_connection t;
1868 struct ctdb_client_ip *ip;
1869 struct ctdb_vnn *vnn;
1870 ctdb_sock_addr addr;
1872 /* If we don't have public IPs, tickles are useless */
1873 if (ctdb->vnn == NULL) {
1877 tcp_sock = (struct ctdb_connection *)indata.dptr;
1879 addr = tcp_sock->src;
1880 ctdb_canonicalize_ip(&addr, &tcp_sock->src);
1881 addr = tcp_sock->dst;
1882 ctdb_canonicalize_ip(&addr, &tcp_sock->dst);
1885 memcpy(&addr, &tcp_sock->dst, sizeof(addr));
1886 vnn = find_public_ip_vnn(ctdb, &addr);
1888 switch (addr.sa.sa_family) {
1890 if (ntohl(addr.ip.sin_addr.s_addr) != INADDR_LOOPBACK) {
1891 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public address.\n",
1892 ctdb_addr_to_str(&addr)));
1896 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public ipv6 address.\n",
1897 ctdb_addr_to_str(&addr)));
1900 DEBUG(DEBUG_ERR,(__location__ " Unknown family type %d\n", addr.sa.sa_family));
1906 if (vnn->pnn != ctdb->pnn) {
1907 DEBUG(DEBUG_ERR,("Attempt to register tcp client for IP %s we don't hold - failing (client_id %u pid %u)\n",
1908 ctdb_addr_to_str(&addr),
1909 client_id, client->pid));
1910 /* failing this call will tell smbd to die */
1914 ip = talloc(client, struct ctdb_client_ip);
1915 CTDB_NO_MEMORY(ctdb, ip);
1919 ip->client_id = client_id;
1920 talloc_set_destructor(ip, ctdb_client_ip_destructor);
1921 DLIST_ADD(ctdb->client_ip_list, ip);
1923 tcp = talloc(client, struct ctdb_tcp_list);
1924 CTDB_NO_MEMORY(ctdb, tcp);
1926 tcp->connection.src = tcp_sock->src;
1927 tcp->connection.dst = tcp_sock->dst;
1929 DLIST_ADD(client->tcp_list, tcp);
1931 t.src = tcp_sock->src;
1932 t.dst = tcp_sock->dst;
1934 data.dptr = (uint8_t *)&t;
1935 data.dsize = sizeof(t);
1937 switch (addr.sa.sa_family) {
1939 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1940 (unsigned)ntohs(tcp_sock->dst.ip.sin_port),
1941 ctdb_addr_to_str(&tcp_sock->src),
1942 (unsigned)ntohs(tcp_sock->src.ip.sin_port), client_id, client->pid));
1945 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1946 (unsigned)ntohs(tcp_sock->dst.ip6.sin6_port),
1947 ctdb_addr_to_str(&tcp_sock->src),
1948 (unsigned)ntohs(tcp_sock->src.ip6.sin6_port), client_id, client->pid));
1951 DEBUG(DEBUG_ERR,(__location__ " Unknown family %d\n", addr.sa.sa_family));
1955 /* tell all nodes about this tcp connection */
1956 ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0,
1957 CTDB_CONTROL_TCP_ADD,
1958 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
1960 DEBUG(DEBUG_ERR,(__location__ " Failed to send CTDB_CONTROL_TCP_ADD\n"));
1968 find a tcp address on a list
1970 static struct ctdb_connection *ctdb_tcp_find(struct ctdb_tcp_array *array,
1971 struct ctdb_connection *tcp)
1975 if (array == NULL) {
1979 for (i=0;i<array->num;i++) {
1980 if (ctdb_same_sockaddr(&array->connections[i].src, &tcp->src) &&
1981 ctdb_same_sockaddr(&array->connections[i].dst, &tcp->dst)) {
1982 return &array->connections[i];
1991 called by a daemon to inform us of a TCP connection that one of its
1992 clients managing that should tickled with an ACK when IP takeover is
1995 int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata, bool tcp_update_needed)
1997 struct ctdb_connection *p = (struct ctdb_connection *)indata.dptr;
1998 struct ctdb_tcp_array *tcparray;
1999 struct ctdb_connection tcp;
2000 struct ctdb_vnn *vnn;
2002 /* If we don't have public IPs, tickles are useless */
2003 if (ctdb->vnn == NULL) {
2007 vnn = find_public_ip_vnn(ctdb, &p->dst);
2009 DEBUG(DEBUG_INFO,(__location__ " got TCP_ADD control for an address which is not a public address '%s'\n",
2010 ctdb_addr_to_str(&p->dst)));
2016 tcparray = vnn->tcp_array;
2018 /* If this is the first tickle */
2019 if (tcparray == NULL) {
2020 tcparray = talloc(vnn, struct ctdb_tcp_array);
2021 CTDB_NO_MEMORY(ctdb, tcparray);
2022 vnn->tcp_array = tcparray;
2025 tcparray->connections = talloc_size(tcparray, sizeof(struct ctdb_connection));
2026 CTDB_NO_MEMORY(ctdb, tcparray->connections);
2028 tcparray->connections[tcparray->num].src = p->src;
2029 tcparray->connections[tcparray->num].dst = p->dst;
2032 if (tcp_update_needed) {
2033 vnn->tcp_update_needed = true;
2039 /* Do we already have this tickle ?*/
2042 if (ctdb_tcp_find(tcparray, &tcp) != NULL) {
2043 DEBUG(DEBUG_DEBUG,("Already had tickle info for %s:%u for vnn:%u\n",
2044 ctdb_addr_to_str(&tcp.dst),
2045 ntohs(tcp.dst.ip.sin_port),
2050 /* A new tickle, we must add it to the array */
2051 tcparray->connections = talloc_realloc(tcparray, tcparray->connections,
2052 struct ctdb_connection,
2054 CTDB_NO_MEMORY(ctdb, tcparray->connections);
2056 tcparray->connections[tcparray->num].src = p->src;
2057 tcparray->connections[tcparray->num].dst = p->dst;
2060 DEBUG(DEBUG_INFO,("Added tickle info for %s:%u from vnn %u\n",
2061 ctdb_addr_to_str(&tcp.dst),
2062 ntohs(tcp.dst.ip.sin_port),
2065 if (tcp_update_needed) {
2066 vnn->tcp_update_needed = true;
2073 static void ctdb_remove_connection(struct ctdb_vnn *vnn, struct ctdb_connection *conn)
2075 struct ctdb_connection *tcpp;
2081 /* if the array is empty we cant remove it
2082 and we don't need to do anything
2084 if (vnn->tcp_array == NULL) {
2085 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist (array is empty) %s:%u\n",
2086 ctdb_addr_to_str(&conn->dst),
2087 ntohs(conn->dst.ip.sin_port)));
2092 /* See if we know this connection
2093 if we don't know this connection then we dont need to do anything
2095 tcpp = ctdb_tcp_find(vnn->tcp_array, conn);
2097 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist %s:%u\n",
2098 ctdb_addr_to_str(&conn->dst),
2099 ntohs(conn->dst.ip.sin_port)));
2104 /* We need to remove this entry from the array.
2105 Instead of allocating a new array and copying data to it
2106 we cheat and just copy the last entry in the existing array
2107 to the entry that is to be removed and just shring the
2110 *tcpp = vnn->tcp_array->connections[vnn->tcp_array->num - 1];
2111 vnn->tcp_array->num--;
2113 /* If we deleted the last entry we also need to remove the entire array
2115 if (vnn->tcp_array->num == 0) {
2116 talloc_free(vnn->tcp_array);
2117 vnn->tcp_array = NULL;
2120 vnn->tcp_update_needed = true;
2122 DEBUG(DEBUG_INFO,("Removed tickle info for %s:%u\n",
2123 ctdb_addr_to_str(&conn->src),
2124 ntohs(conn->src.ip.sin_port)));
2129 called by a daemon to inform us of a TCP connection that one of its
2130 clients used are no longer needed in the tickle database
2132 int32_t ctdb_control_tcp_remove(struct ctdb_context *ctdb, TDB_DATA indata)
2134 struct ctdb_vnn *vnn;
2135 struct ctdb_connection *conn = (struct ctdb_connection *)indata.dptr;
2137 /* If we don't have public IPs, tickles are useless */
2138 if (ctdb->vnn == NULL) {
2142 vnn = find_public_ip_vnn(ctdb, &conn->dst);
2145 (__location__ " unable to find public address %s\n",
2146 ctdb_addr_to_str(&conn->dst)));
2150 ctdb_remove_connection(vnn, conn);
2157 Called when another daemon starts - causes all tickles for all
2158 public addresses we are serving to be sent to the new node on the
2159 next check. This actually causes the next scheduled call to
2160 tdb_update_tcp_tickles() to update all nodes. This is simple and
2161 doesn't require careful error handling.
2163 int32_t ctdb_control_startup(struct ctdb_context *ctdb, uint32_t pnn)
2165 struct ctdb_vnn *vnn;
2167 DEBUG(DEBUG_INFO, ("Received startup control from node %lu\n",
2168 (unsigned long) pnn));
2170 for (vnn = ctdb->vnn; vnn != NULL; vnn = vnn->next) {
2171 vnn->tcp_update_needed = true;
2179 called when a client structure goes away - hook to remove
2180 elements from the tcp_list in all daemons
2182 void ctdb_takeover_client_destructor_hook(struct ctdb_client *client)
2184 while (client->tcp_list) {
2185 struct ctdb_vnn *vnn;
2186 struct ctdb_tcp_list *tcp = client->tcp_list;
2187 struct ctdb_connection *conn = &tcp->connection;
2189 DLIST_REMOVE(client->tcp_list, tcp);
2191 vnn = find_public_ip_vnn(client->ctdb,
2195 (__location__ " unable to find public address %s\n",
2196 ctdb_addr_to_str(&conn->dst)));
2200 /* If the IP address is hosted on this node then
2201 * remove the connection. */
2202 if (vnn->pnn == client->ctdb->pnn) {
2203 ctdb_remove_connection(vnn, conn);
2206 /* Otherwise this function has been called because the
2207 * server IP address has been released to another node
2208 * and the client has exited. This means that we
2209 * should not delete the connection information. The
2210 * takeover node processes connections too. */
2215 void ctdb_release_all_ips(struct ctdb_context *ctdb)
2217 struct ctdb_vnn *vnn;
2221 if (ctdb->tunable.disable_ip_failover == 1) {
2225 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2226 if (!ctdb_sys_have_ip(&vnn->public_address)) {
2227 ctdb_vnn_unassign_iface(ctdb, vnn);
2234 /* Don't allow multiple releases at once. Some code,
2235 * particularly ctdb_tickle_sentenced_connections() is
2237 if (vnn->update_in_flight) {
2238 DEBUG(DEBUG_WARNING,
2240 " Not releasing IP %s/%u on interface %s, an update is already in progess\n",
2241 ctdb_addr_to_str(&vnn->public_address),
2242 vnn->public_netmask_bits,
2243 ctdb_vnn_iface_string(vnn)));
2246 vnn->update_in_flight = true;
2248 DEBUG(DEBUG_INFO,("Release of IP %s/%u on interface %s node:-1\n",
2249 ctdb_addr_to_str(&vnn->public_address),
2250 vnn->public_netmask_bits,
2251 ctdb_vnn_iface_string(vnn)));
2253 ctdb_event_script_args(ctdb, CTDB_EVENT_RELEASE_IP, "%s %s %u",
2254 ctdb_vnn_iface_string(vnn),
2255 ctdb_addr_to_str(&vnn->public_address),
2256 vnn->public_netmask_bits);
2258 data.dptr = (uint8_t *)talloc_strdup(
2259 vnn, ctdb_addr_to_str(&vnn->public_address));
2260 if (data.dptr != NULL) {
2261 data.dsize = strlen((char *)data.dptr) + 1;
2262 ctdb_daemon_send_message(ctdb, ctdb->pnn,
2263 CTDB_SRVID_RELEASE_IP, data);
2264 talloc_free(data.dptr);
2267 ctdb_vnn_unassign_iface(ctdb, vnn);
2268 vnn->update_in_flight = false;
2272 DEBUG(DEBUG_NOTICE,(__location__ " Released %d public IPs\n", count));
2277 get list of public IPs
2279 int32_t ctdb_control_get_public_ips(struct ctdb_context *ctdb,
2280 struct ctdb_req_control_old *c, TDB_DATA *outdata)
2283 struct ctdb_public_ip_list_old *ips;
2284 struct ctdb_vnn *vnn;
2285 bool only_available = false;
2287 if (c->flags & CTDB_PUBLIC_IP_FLAGS_ONLY_AVAILABLE) {
2288 only_available = true;
2291 /* count how many public ip structures we have */
2293 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2297 len = offsetof(struct ctdb_public_ip_list_old, ips) +
2298 num*sizeof(struct ctdb_public_ip);
2299 ips = talloc_zero_size(outdata, len);
2300 CTDB_NO_MEMORY(ctdb, ips);
2303 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2304 if (only_available && !ctdb_vnn_available(ctdb, vnn)) {
2307 ips->ips[i].pnn = vnn->pnn;
2308 ips->ips[i].addr = vnn->public_address;
2312 len = offsetof(struct ctdb_public_ip_list_old, ips) +
2313 i*sizeof(struct ctdb_public_ip);
2315 outdata->dsize = len;
2316 outdata->dptr = (uint8_t *)ips;
2322 int32_t ctdb_control_get_public_ip_info(struct ctdb_context *ctdb,
2323 struct ctdb_req_control_old *c,
2328 ctdb_sock_addr *addr;
2329 struct ctdb_public_ip_info_old *info;
2330 struct ctdb_vnn *vnn;
2332 addr = (ctdb_sock_addr *)indata.dptr;
2334 vnn = find_public_ip_vnn(ctdb, addr);
2336 DEBUG(DEBUG_ERR,(__location__ " Could not get public ip info, "
2337 "'%s'not a public address\n",
2338 ctdb_addr_to_str(addr)));
2342 /* count how many public ip structures we have */
2344 for (;vnn->ifaces[num];) {
2348 len = offsetof(struct ctdb_public_ip_info_old, ifaces) +
2349 num*sizeof(struct ctdb_iface);
2350 info = talloc_zero_size(outdata, len);
2351 CTDB_NO_MEMORY(ctdb, info);
2353 info->ip.addr = vnn->public_address;
2354 info->ip.pnn = vnn->pnn;
2355 info->active_idx = 0xFFFFFFFF;
2357 for (i=0; vnn->ifaces[i]; i++) {
2358 struct ctdb_interface *cur;
2360 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
2362 DEBUG(DEBUG_CRIT, (__location__ " internal error iface[%s] unknown\n",
2366 if (vnn->iface == cur) {
2367 info->active_idx = i;
2369 strncpy(info->ifaces[i].name, cur->name,
2370 sizeof(info->ifaces[i].name));
2371 info->ifaces[i].name[sizeof(info->ifaces[i].name)-1] = '\0';
2372 info->ifaces[i].link_state = cur->link_up;
2373 info->ifaces[i].references = cur->references;
2376 len = offsetof(struct ctdb_public_ip_info_old, ifaces) +
2377 i*sizeof(struct ctdb_iface);
2379 outdata->dsize = len;
2380 outdata->dptr = (uint8_t *)info;
2385 int32_t ctdb_control_get_ifaces(struct ctdb_context *ctdb,
2386 struct ctdb_req_control_old *c,
2390 struct ctdb_iface_list_old *ifaces;
2391 struct ctdb_interface *cur;
2393 /* count how many public ip structures we have */
2395 for (cur=ctdb->ifaces;cur;cur=cur->next) {
2399 len = offsetof(struct ctdb_iface_list_old, ifaces) +
2400 num*sizeof(struct ctdb_iface);
2401 ifaces = talloc_zero_size(outdata, len);
2402 CTDB_NO_MEMORY(ctdb, ifaces);
2405 for (cur=ctdb->ifaces;cur;cur=cur->next) {
2406 strncpy(ifaces->ifaces[i].name, cur->name,
2407 sizeof(ifaces->ifaces[i].name));
2408 ifaces->ifaces[i].name[sizeof(ifaces->ifaces[i].name)-1] = '\0';
2409 ifaces->ifaces[i].link_state = cur->link_up;
2410 ifaces->ifaces[i].references = cur->references;
2414 len = offsetof(struct ctdb_iface_list_old, ifaces) +
2415 i*sizeof(struct ctdb_iface);
2417 outdata->dsize = len;
2418 outdata->dptr = (uint8_t *)ifaces;
2423 int32_t ctdb_control_set_iface_link(struct ctdb_context *ctdb,
2424 struct ctdb_req_control_old *c,
2427 struct ctdb_iface *info;
2428 struct ctdb_interface *iface;
2429 bool link_up = false;
2431 info = (struct ctdb_iface *)indata.dptr;
2433 if (info->name[CTDB_IFACE_SIZE] != '\0') {
2434 int len = strnlen(info->name, CTDB_IFACE_SIZE);
2435 DEBUG(DEBUG_ERR, (__location__ " name[%*.*s] not terminated\n",
2436 len, len, info->name));
2440 switch (info->link_state) {
2448 DEBUG(DEBUG_ERR, (__location__ " link_state[%u] invalid\n",
2449 (unsigned int)info->link_state));
2453 if (info->references != 0) {
2454 DEBUG(DEBUG_ERR, (__location__ " references[%u] should be 0\n",
2455 (unsigned int)info->references));
2459 iface = ctdb_find_iface(ctdb, info->name);
2460 if (iface == NULL) {
2464 if (link_up == iface->link_up) {
2468 DEBUG(iface->link_up?DEBUG_ERR:DEBUG_NOTICE,
2469 ("iface[%s] has changed it's link status %s => %s\n",
2471 iface->link_up?"up":"down",
2472 link_up?"up":"down"));
2474 iface->link_up = link_up;
2480 called by a daemon to inform us of the entire list of TCP tickles for
2481 a particular public address.
2482 this control should only be sent by the node that is currently serving
2483 that public address.
2485 int32_t ctdb_control_set_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata)
2487 struct ctdb_tickle_list_old *list = (struct ctdb_tickle_list_old *)indata.dptr;
2488 struct ctdb_tcp_array *tcparray;
2489 struct ctdb_vnn *vnn;
2491 /* We must at least have tickles.num or else we cant verify the size
2492 of the received data blob
2494 if (indata.dsize < offsetof(struct ctdb_tickle_list_old, connections)) {
2495 DEBUG(DEBUG_ERR,("Bad indata in ctdb_tickle_list. Not enough data for the tickle.num field\n"));
2499 /* verify that the size of data matches what we expect */
2500 if (indata.dsize < offsetof(struct ctdb_tickle_list_old, connections)
2501 + sizeof(struct ctdb_connection) * list->num) {
2502 DEBUG(DEBUG_ERR,("Bad indata in ctdb_tickle_list\n"));
2506 DEBUG(DEBUG_INFO, ("Received tickle update for public address %s\n",
2507 ctdb_addr_to_str(&list->addr)));
2509 vnn = find_public_ip_vnn(ctdb, &list->addr);
2511 DEBUG(DEBUG_INFO,(__location__ " Could not set tcp tickle list, '%s' is not a public address\n",
2512 ctdb_addr_to_str(&list->addr)));
2517 if (vnn->pnn == ctdb->pnn) {
2519 ("Ignoring redundant set tcp tickle list, this node hosts '%s'\n",
2520 ctdb_addr_to_str(&list->addr)));
2524 /* remove any old ticklelist we might have */
2525 talloc_free(vnn->tcp_array);
2526 vnn->tcp_array = NULL;
2528 tcparray = talloc(vnn, struct ctdb_tcp_array);
2529 CTDB_NO_MEMORY(ctdb, tcparray);
2531 tcparray->num = list->num;
2533 tcparray->connections = talloc_array(tcparray, struct ctdb_connection, tcparray->num);
2534 CTDB_NO_MEMORY(ctdb, tcparray->connections);
2536 memcpy(tcparray->connections, &list->connections[0],
2537 sizeof(struct ctdb_connection)*tcparray->num);
2539 /* We now have a new fresh tickle list array for this vnn */
2540 vnn->tcp_array = tcparray;
2546 called to return the full list of tickles for the puclic address associated
2547 with the provided vnn
2549 int32_t ctdb_control_get_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata)
2551 ctdb_sock_addr *addr = (ctdb_sock_addr *)indata.dptr;
2552 struct ctdb_tickle_list_old *list;
2553 struct ctdb_tcp_array *tcparray;
2555 struct ctdb_vnn *vnn;
2558 vnn = find_public_ip_vnn(ctdb, addr);
2560 DEBUG(DEBUG_ERR,(__location__ " Could not get tcp tickle list, '%s' is not a public address\n",
2561 ctdb_addr_to_str(addr)));
2566 port = ctdb_addr_to_port(addr);
2568 tcparray = vnn->tcp_array;
2570 if (tcparray != NULL) {
2572 /* All connections */
2573 num = tcparray->num;
2575 /* Count connections for port */
2576 for (i = 0; i < tcparray->num; i++) {
2577 if (port == ctdb_addr_to_port(&tcparray->connections[i].dst)) {
2584 outdata->dsize = offsetof(struct ctdb_tickle_list_old, connections)
2585 + sizeof(struct ctdb_connection) * num;
2587 outdata->dptr = talloc_size(outdata, outdata->dsize);
2588 CTDB_NO_MEMORY(ctdb, outdata->dptr);
2589 list = (struct ctdb_tickle_list_old *)outdata->dptr;
2599 for (i = 0; i < tcparray->num; i++) {
2601 port == ctdb_addr_to_port(&tcparray->connections[i].dst)) {
2602 list->connections[num] = tcparray->connections[i];
2612 set the list of all tcp tickles for a public address
2614 static int ctdb_send_set_tcp_tickles_for_ip(struct ctdb_context *ctdb,
2615 ctdb_sock_addr *addr,
2616 struct ctdb_tcp_array *tcparray)
2620 struct ctdb_tickle_list_old *list;
2623 num = tcparray->num;
2628 data.dsize = offsetof(struct ctdb_tickle_list_old, connections) +
2629 sizeof(struct ctdb_connection) * num;
2630 data.dptr = talloc_size(ctdb, data.dsize);
2631 CTDB_NO_MEMORY(ctdb, data.dptr);
2633 list = (struct ctdb_tickle_list_old *)data.dptr;
2637 memcpy(&list->connections[0], tcparray->connections, sizeof(struct ctdb_connection) * num);
2640 ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_ALL, 0,
2641 CTDB_CONTROL_SET_TCP_TICKLE_LIST,
2642 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
2644 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set tcp tickles failed\n"));
2648 talloc_free(data.dptr);
2655 perform tickle updates if required
2657 static void ctdb_update_tcp_tickles(struct tevent_context *ev,
2658 struct tevent_timer *te,
2659 struct timeval t, void *private_data)
2661 struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
2663 struct ctdb_vnn *vnn;
2665 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2666 /* we only send out updates for public addresses that
2669 if (ctdb->pnn != vnn->pnn) {
2672 /* We only send out the updates if we need to */
2673 if (!vnn->tcp_update_needed) {
2676 ret = ctdb_send_set_tcp_tickles_for_ip(ctdb,
2677 &vnn->public_address,
2680 DEBUG(DEBUG_ERR,("Failed to send the tickle update for public address %s\n",
2681 ctdb_addr_to_str(&vnn->public_address)));
2684 ("Sent tickle update for public address %s\n",
2685 ctdb_addr_to_str(&vnn->public_address)));
2686 vnn->tcp_update_needed = false;
2690 tevent_add_timer(ctdb->ev, ctdb->tickle_update_context,
2691 timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
2692 ctdb_update_tcp_tickles, ctdb);
2696 start periodic update of tcp tickles
2698 void ctdb_start_tcp_tickle_update(struct ctdb_context *ctdb)
2700 ctdb->tickle_update_context = talloc_new(ctdb);
2702 tevent_add_timer(ctdb->ev, ctdb->tickle_update_context,
2703 timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
2704 ctdb_update_tcp_tickles, ctdb);
2710 struct control_gratious_arp {
2711 struct ctdb_context *ctdb;
2712 ctdb_sock_addr addr;
2718 send a control_gratuitous arp
2720 static void send_gratious_arp(struct tevent_context *ev,
2721 struct tevent_timer *te,
2722 struct timeval t, void *private_data)
2725 struct control_gratious_arp *arp = talloc_get_type(private_data,
2726 struct control_gratious_arp);
2728 ret = ctdb_sys_send_arp(&arp->addr, arp->iface);
2730 DEBUG(DEBUG_ERR,(__location__ " sending of gratious arp on iface '%s' failed (%s)\n",
2731 arp->iface, strerror(errno)));
2736 if (arp->count == CTDB_ARP_REPEAT) {
2741 tevent_add_timer(arp->ctdb->ev, arp,
2742 timeval_current_ofs(CTDB_ARP_INTERVAL, 0),
2743 send_gratious_arp, arp);
2750 int32_t ctdb_control_send_gratious_arp(struct ctdb_context *ctdb, TDB_DATA indata)
2752 struct ctdb_addr_info_old *gratious_arp = (struct ctdb_addr_info_old *)indata.dptr;
2753 struct control_gratious_arp *arp;
2755 /* verify the size of indata */
2756 if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
2757 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_gratious_arp structure. Got %u require %u bytes\n",
2758 (unsigned)indata.dsize,
2759 (unsigned)offsetof(struct ctdb_addr_info_old, iface)));
2763 ( offsetof(struct ctdb_addr_info_old, iface)
2764 + gratious_arp->len ) ){
2766 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2767 "but should be %u bytes\n",
2768 (unsigned)indata.dsize,
2769 (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+gratious_arp->len)));
2774 arp = talloc(ctdb, struct control_gratious_arp);
2775 CTDB_NO_MEMORY(ctdb, arp);
2778 arp->addr = gratious_arp->addr;
2779 arp->iface = talloc_strdup(arp, gratious_arp->iface);
2780 CTDB_NO_MEMORY(ctdb, arp->iface);
2783 tevent_add_timer(arp->ctdb->ev, arp,
2784 timeval_zero(), send_gratious_arp, arp);
2789 int32_t ctdb_control_add_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2791 struct ctdb_addr_info_old *pub = (struct ctdb_addr_info_old *)indata.dptr;
2794 /* verify the size of indata */
2795 if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
2796 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_addr_info structure\n"));
2800 ( offsetof(struct ctdb_addr_info_old, iface)
2803 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2804 "but should be %u bytes\n",
2805 (unsigned)indata.dsize,
2806 (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+pub->len)));
2810 DEBUG(DEBUG_NOTICE,("Add IP %s\n", ctdb_addr_to_str(&pub->addr)));
2812 ret = ctdb_add_public_address(ctdb, &pub->addr, pub->mask, &pub->iface[0], true);
2815 DEBUG(DEBUG_ERR,(__location__ " Failed to add public address\n"));
2822 struct delete_ip_callback_state {
2823 struct ctdb_req_control_old *c;
2827 called when releaseip event finishes for del_public_address
2829 static void delete_ip_callback(struct ctdb_context *ctdb,
2830 int32_t status, TDB_DATA data,
2831 const char *errormsg,
2834 struct delete_ip_callback_state *state =
2835 talloc_get_type(private_data, struct delete_ip_callback_state);
2837 /* If release failed then fail. */
2838 ctdb_request_control_reply(ctdb, state->c, NULL, status, errormsg);
2839 talloc_free(private_data);
2842 int32_t ctdb_control_del_public_address(struct ctdb_context *ctdb,
2843 struct ctdb_req_control_old *c,
2844 TDB_DATA indata, bool *async_reply)
2846 struct ctdb_addr_info_old *pub = (struct ctdb_addr_info_old *)indata.dptr;
2847 struct ctdb_vnn *vnn;
2849 /* verify the size of indata */
2850 if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
2851 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_addr_info structure\n"));
2855 ( offsetof(struct ctdb_addr_info_old, iface)
2858 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2859 "but should be %u bytes\n",
2860 (unsigned)indata.dsize,
2861 (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+pub->len)));
2865 DEBUG(DEBUG_NOTICE,("Delete IP %s\n", ctdb_addr_to_str(&pub->addr)));
2867 /* walk over all public addresses until we find a match */
2868 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2869 if (ctdb_same_ip(&vnn->public_address, &pub->addr)) {
2870 if (vnn->pnn == ctdb->pnn) {
2871 struct delete_ip_callback_state *state;
2872 struct ctdb_public_ip *ip;
2876 vnn->delete_pending = true;
2878 state = talloc(ctdb,
2879 struct delete_ip_callback_state);
2880 CTDB_NO_MEMORY(ctdb, state);
2883 ip = talloc(state, struct ctdb_public_ip);
2886 (__location__ " Out of memory\n"));
2891 ip->addr = pub->addr;
2893 data.dsize = sizeof(struct ctdb_public_ip);
2894 data.dptr = (unsigned char *)ip;
2896 ret = ctdb_daemon_send_control(ctdb,
2899 CTDB_CONTROL_RELEASE_IP,
2906 (__location__ "Unable to send "
2907 "CTDB_CONTROL_RELEASE_IP\n"));
2912 state->c = talloc_steal(state, c);
2913 *async_reply = true;
2915 /* This IP is not hosted on the
2916 * current node so just delete it
2918 do_delete_ip(ctdb, vnn);
2925 DEBUG(DEBUG_ERR,("Delete IP of unknown public IP address %s\n",
2926 ctdb_addr_to_str(&pub->addr)));
2931 struct ipreallocated_callback_state {
2932 struct ctdb_req_control_old *c;
2935 static void ctdb_ipreallocated_callback(struct ctdb_context *ctdb,
2936 int status, void *p)
2938 struct ipreallocated_callback_state *state =
2939 talloc_get_type(p, struct ipreallocated_callback_state);
2943 (" \"ipreallocated\" event script failed (status %d)\n",
2945 if (status == -ETIME) {
2946 ctdb_ban_self(ctdb);
2950 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
2954 /* A control to run the ipreallocated event */
2955 int32_t ctdb_control_ipreallocated(struct ctdb_context *ctdb,
2956 struct ctdb_req_control_old *c,
2960 struct ipreallocated_callback_state *state;
2962 state = talloc(ctdb, struct ipreallocated_callback_state);
2963 CTDB_NO_MEMORY(ctdb, state);
2965 DEBUG(DEBUG_INFO,(__location__ " Running \"ipreallocated\" event\n"));
2967 ret = ctdb_event_script_callback(ctdb, state,
2968 ctdb_ipreallocated_callback, state,
2969 CTDB_EVENT_IPREALLOCATED,
2973 DEBUG(DEBUG_ERR,("Failed to run \"ipreallocated\" event \n"));
2978 /* tell the control that we will be reply asynchronously */
2979 state->c = talloc_steal(state, c);
2980 *async_reply = true;
2986 /* This function is called from the recovery daemon to verify that a remote
2987 node has the expected ip allocation.
2988 This is verified against ctdb->ip_tree
2990 static int verify_remote_ip_allocation(struct ctdb_context *ctdb,
2991 struct ctdb_public_ip_list *ips,
2994 struct public_ip_list *tmp_ip;
2997 if (ctdb->ip_tree == NULL) {
2998 /* don't know the expected allocation yet, assume remote node
3007 for (i=0; i<ips->num; i++) {
3008 tmp_ip = trbt_lookuparray32(ctdb->ip_tree, IP_KEYLEN, ip_key(&ips->ip[i].addr));
3009 if (tmp_ip == NULL) {
3010 DEBUG(DEBUG_ERR,("Node %u has new or unknown public IP %s\n", pnn, ctdb_addr_to_str(&ips->ip[i].addr)));
3014 if (tmp_ip->pnn == -1 || ips->ip[i].pnn == -1) {
3018 if (tmp_ip->pnn != ips->ip[i].pnn) {
3020 ("Inconsistent IP allocation - node %u thinks %s is held by node %u while it is assigned to node %u\n",
3022 ctdb_addr_to_str(&ips->ip[i].addr),
3023 ips->ip[i].pnn, tmp_ip->pnn));
3031 int update_ip_assignment_tree(struct ctdb_context *ctdb, struct ctdb_public_ip *ip)
3033 struct public_ip_list *tmp_ip;
3035 /* IP tree is never built if DisableIPFailover is set */
3036 if (ctdb->tunable.disable_ip_failover != 0) {
3040 if (ctdb->ip_tree == NULL) {
3041 DEBUG(DEBUG_ERR,("No ctdb->ip_tree yet. Failed to update ip assignment\n"));
3045 tmp_ip = trbt_lookuparray32(ctdb->ip_tree, IP_KEYLEN, ip_key(&ip->addr));
3046 if (tmp_ip == NULL) {
3047 DEBUG(DEBUG_ERR,(__location__ " Could not find record for address %s, update ip\n", ctdb_addr_to_str(&ip->addr)));
3051 DEBUG(DEBUG_NOTICE,("Updated ip assignment tree for ip : %s from node %u to node %u\n", ctdb_addr_to_str(&ip->addr), tmp_ip->pnn, ip->pnn));
3052 tmp_ip->pnn = ip->pnn;
3057 void clear_ip_assignment_tree(struct ctdb_context *ctdb)
3059 TALLOC_FREE(ctdb->ip_tree);
3062 struct ctdb_reloadips_handle {
3063 struct ctdb_context *ctdb;
3064 struct ctdb_req_control_old *c;
3068 struct tevent_fd *fde;
3071 static int ctdb_reloadips_destructor(struct ctdb_reloadips_handle *h)
3073 if (h == h->ctdb->reload_ips) {
3074 h->ctdb->reload_ips = NULL;
3077 ctdb_request_control_reply(h->ctdb, h->c, NULL, h->status, NULL);
3080 ctdb_kill(h->ctdb, h->child, SIGKILL);
3084 static void ctdb_reloadips_timeout_event(struct tevent_context *ev,
3085 struct tevent_timer *te,
3086 struct timeval t, void *private_data)
3088 struct ctdb_reloadips_handle *h = talloc_get_type(private_data, struct ctdb_reloadips_handle);
3093 static void ctdb_reloadips_child_handler(struct tevent_context *ev,
3094 struct tevent_fd *fde,
3095 uint16_t flags, void *private_data)
3097 struct ctdb_reloadips_handle *h = talloc_get_type(private_data, struct ctdb_reloadips_handle);
3102 ret = sys_read(h->fd[0], &res, 1);
3103 if (ret < 1 || res != 0) {
3104 DEBUG(DEBUG_ERR, (__location__ " Reloadips child process returned error\n"));
3112 static int ctdb_reloadips_child(struct ctdb_context *ctdb)
3114 TALLOC_CTX *mem_ctx = talloc_new(NULL);
3115 struct ctdb_public_ip_list_old *ips;
3116 struct ctdb_vnn *vnn;
3117 struct client_async_data *async_data;
3118 struct timeval timeout;
3120 struct ctdb_client_control_state *state;
3124 CTDB_NO_MEMORY(ctdb, mem_ctx);
3126 /* Read IPs from local node */
3127 ret = ctdb_ctrl_get_public_ips(ctdb, TAKEOVER_TIMEOUT(),
3128 CTDB_CURRENT_NODE, mem_ctx, &ips);
3131 ("Unable to fetch public IPs from local node\n"));
3132 talloc_free(mem_ctx);
3136 /* Read IPs file - this is safe since this is a child process */
3138 if (ctdb_set_public_addresses(ctdb, false) != 0) {
3139 DEBUG(DEBUG_ERR,("Failed to re-read public addresses file\n"));
3140 talloc_free(mem_ctx);
3144 async_data = talloc_zero(mem_ctx, struct client_async_data);
3145 CTDB_NO_MEMORY(ctdb, async_data);
3147 /* Compare IPs between node and file for IPs to be deleted */
3148 for (i = 0; i < ips->num; i++) {
3150 for (vnn = ctdb->vnn; vnn; vnn = vnn->next) {
3151 if (ctdb_same_ip(&vnn->public_address,
3152 &ips->ips[i].addr)) {
3153 /* IP is still in file */
3159 /* Delete IP ips->ips[i] */
3160 struct ctdb_addr_info_old *pub;
3163 ("IP %s no longer configured, deleting it\n",
3164 ctdb_addr_to_str(&ips->ips[i].addr)));
3166 pub = talloc_zero(mem_ctx, struct ctdb_addr_info_old);
3167 CTDB_NO_MEMORY(ctdb, pub);
3169 pub->addr = ips->ips[i].addr;
3173 timeout = TAKEOVER_TIMEOUT();
3175 data.dsize = offsetof(struct ctdb_addr_info_old,
3177 data.dptr = (uint8_t *)pub;
3179 state = ctdb_control_send(ctdb, CTDB_CURRENT_NODE, 0,
3180 CTDB_CONTROL_DEL_PUBLIC_IP,
3181 0, data, async_data,
3183 if (state == NULL) {
3186 " failed sending CTDB_CONTROL_DEL_PUBLIC_IP\n"));
3190 ctdb_client_async_add(async_data, state);
3194 /* Compare IPs between node and file for IPs to be added */
3196 for (vnn = ctdb->vnn; vnn; vnn = vnn->next) {
3197 for (i = 0; i < ips->num; i++) {
3198 if (ctdb_same_ip(&vnn->public_address,
3199 &ips->ips[i].addr)) {
3200 /* IP already on node */
3204 if (i == ips->num) {
3205 /* Add IP ips->ips[i] */
3206 struct ctdb_addr_info_old *pub;
3207 const char *ifaces = NULL;
3212 ("New IP %s configured, adding it\n",
3213 ctdb_addr_to_str(&vnn->public_address)));
3215 uint32_t pnn = ctdb_get_pnn(ctdb);
3217 data.dsize = sizeof(pnn);
3218 data.dptr = (uint8_t *)&pnn;
3220 ret = ctdb_client_send_message(
3222 CTDB_BROADCAST_CONNECTED,
3223 CTDB_SRVID_REBALANCE_NODE,
3226 DEBUG(DEBUG_WARNING,
3227 ("Failed to send message to force node reallocation - IPs may be unbalanced\n"));
3233 ifaces = vnn->ifaces[0];
3235 while (vnn->ifaces[iface] != NULL) {
3236 ifaces = talloc_asprintf(vnn, "%s,%s", ifaces,
3237 vnn->ifaces[iface]);
3241 len = strlen(ifaces) + 1;
3242 pub = talloc_zero_size(mem_ctx,
3243 offsetof(struct ctdb_addr_info_old, iface) + len);
3244 CTDB_NO_MEMORY(ctdb, pub);
3246 pub->addr = vnn->public_address;
3247 pub->mask = vnn->public_netmask_bits;
3249 memcpy(&pub->iface[0], ifaces, pub->len);
3251 timeout = TAKEOVER_TIMEOUT();
3253 data.dsize = offsetof(struct ctdb_addr_info_old,
3255 data.dptr = (uint8_t *)pub;
3257 state = ctdb_control_send(ctdb, CTDB_CURRENT_NODE, 0,
3258 CTDB_CONTROL_ADD_PUBLIC_IP,
3259 0, data, async_data,
3261 if (state == NULL) {
3264 " failed sending CTDB_CONTROL_ADD_PUBLIC_IP\n"));
3268 ctdb_client_async_add(async_data, state);
3272 if (ctdb_client_async_wait(ctdb, async_data) != 0) {
3273 DEBUG(DEBUG_ERR,(__location__ " Add/delete IPs failed\n"));
3277 talloc_free(mem_ctx);
3281 talloc_free(mem_ctx);
3285 /* This control is sent to force the node to re-read the public addresses file
3286 and drop any addresses we should nnot longer host, and add new addresses
3287 that we are now able to host
3289 int32_t ctdb_control_reload_public_ips(struct ctdb_context *ctdb, struct ctdb_req_control_old *c, bool *async_reply)
3291 struct ctdb_reloadips_handle *h;
3292 pid_t parent = getpid();
3294 if (ctdb->reload_ips != NULL) {
3295 talloc_free(ctdb->reload_ips);
3296 ctdb->reload_ips = NULL;
3299 h = talloc(ctdb, struct ctdb_reloadips_handle);
3300 CTDB_NO_MEMORY(ctdb, h);
3305 if (pipe(h->fd) == -1) {
3306 DEBUG(DEBUG_ERR,("Failed to create pipe for ctdb_freeze_lock\n"));
3311 h->child = ctdb_fork(ctdb);
3312 if (h->child == (pid_t)-1) {
3313 DEBUG(DEBUG_ERR, ("Failed to fork a child for reloadips\n"));
3321 if (h->child == 0) {
3322 signed char res = 0;
3325 debug_extra = talloc_asprintf(NULL, "reloadips:");
3327 prctl_set_comment("ctdb_reloadips");
3328 if (switch_from_server_to_client(ctdb, "reloadips-child") != 0) {
3329 DEBUG(DEBUG_CRIT,("ERROR: Failed to switch reloadips child into client mode\n"));
3332 res = ctdb_reloadips_child(ctdb);
3334 DEBUG(DEBUG_ERR,("Failed to reload ips on local node\n"));
3338 sys_write(h->fd[1], &res, 1);
3339 ctdb_wait_for_process_to_exit(parent);
3343 h->c = talloc_steal(h, c);
3346 set_close_on_exec(h->fd[0]);
3348 talloc_set_destructor(h, ctdb_reloadips_destructor);
3351 h->fde = tevent_add_fd(ctdb->ev, h, h->fd[0], TEVENT_FD_READ,
3352 ctdb_reloadips_child_handler, (void *)h);
3353 tevent_fd_set_auto_close(h->fde);
3355 tevent_add_timer(ctdb->ev, h, timeval_current_ofs(120, 0),
3356 ctdb_reloadips_timeout_event, h);
3358 /* we reply later */
3359 *async_reply = true;