4 Copyright (C) Ronnie Sahlberg 2007
5 Copyright (C) Andrew Tridgell 2007
6 Copyright (C) Martin Schwenke 2011
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, see <http://www.gnu.org/licenses/>.
22 #include "system/network.h"
23 #include "system/filesys.h"
24 #include "system/time.h"
25 #include "system/wait.h"
30 #include "lib/util/dlinklist.h"
31 #include "lib/util/debug.h"
32 #include "lib/util/samba_util.h"
33 #include "lib/util/util_process.h"
35 #include "ctdb_private.h"
36 #include "ctdb_client.h"
38 #include "common/rb_tree.h"
39 #include "common/reqid.h"
40 #include "common/system.h"
41 #include "common/common.h"
42 #include "common/logging.h"
44 #include "server/ipalloc.h"
46 #define TAKEOVER_TIMEOUT() timeval_current_ofs(ctdb->tunable.takeover_timeout,0)
48 #define CTDB_ARP_INTERVAL 1
49 #define CTDB_ARP_REPEAT 3
51 struct ctdb_interface {
52 struct ctdb_interface *prev, *next;
58 static const char *ctdb_vnn_iface_string(const struct ctdb_vnn *vnn)
61 return vnn->iface->name;
67 static int ctdb_add_local_iface(struct ctdb_context *ctdb, const char *iface)
69 struct ctdb_interface *i;
71 if (strlen(iface) > CTDB_IFACE_SIZE) {
72 DEBUG(DEBUG_ERR, ("Interface name too long \"%s\"\n", iface));
76 /* Verify that we don't have an entry for this ip yet */
77 for (i=ctdb->ifaces;i;i=i->next) {
78 if (strcmp(i->name, iface) == 0) {
83 /* create a new structure for this interface */
84 i = talloc_zero(ctdb, struct ctdb_interface);
85 CTDB_NO_MEMORY_FATAL(ctdb, i);
86 i->name = talloc_strdup(i, iface);
87 CTDB_NO_MEMORY(ctdb, i->name);
91 DLIST_ADD(ctdb->ifaces, i);
96 static bool vnn_has_interface_with_name(struct ctdb_vnn *vnn,
101 for (n = 0; vnn->ifaces[n] != NULL; n++) {
102 if (strcmp(name, vnn->ifaces[n]) == 0) {
110 /* If any interfaces now have no possible IPs then delete them. This
111 * implementation is naive (i.e. simple) rather than clever
112 * (i.e. complex). Given that this is run on delip and that operation
113 * is rare, this doesn't need to be efficient - it needs to be
114 * foolproof. One alternative is reference counting, where the logic
115 * is distributed and can, therefore, be broken in multiple places.
116 * Another alternative is to build a red-black tree of interfaces that
117 * can have addresses (by walking ctdb->vnn and ctdb->single_ip_vnn
118 * once) and then walking ctdb->ifaces once and deleting those not in
119 * the tree. Let's go to one of those if the naive implementation
120 * causes problems... :-)
122 static void ctdb_remove_orphaned_ifaces(struct ctdb_context *ctdb,
123 struct ctdb_vnn *vnn)
125 struct ctdb_interface *i, *next;
127 /* For each interface, check if there's an IP using it. */
128 for (i = ctdb->ifaces; i != NULL; i = next) {
133 /* Only consider interfaces named in the given VNN. */
134 if (!vnn_has_interface_with_name(vnn, i->name)) {
138 /* Is the "single IP" on this interface? */
139 if ((ctdb->single_ip_vnn != NULL) &&
140 (ctdb->single_ip_vnn->ifaces[0] != NULL) &&
141 (strcmp(i->name, ctdb->single_ip_vnn->ifaces[0]) == 0)) {
142 /* Found, next interface please... */
145 /* Search for a vnn with this interface. */
147 for (tv=ctdb->vnn; tv; tv=tv->next) {
148 if (vnn_has_interface_with_name(tv, i->name)) {
155 /* None of the VNNs are using this interface. */
156 DLIST_REMOVE(ctdb->ifaces, i);
163 static struct ctdb_interface *ctdb_find_iface(struct ctdb_context *ctdb,
166 struct ctdb_interface *i;
168 for (i=ctdb->ifaces;i;i=i->next) {
169 if (strcmp(i->name, iface) == 0) {
177 static struct ctdb_interface *ctdb_vnn_best_iface(struct ctdb_context *ctdb,
178 struct ctdb_vnn *vnn)
181 struct ctdb_interface *cur = NULL;
182 struct ctdb_interface *best = NULL;
184 for (i=0; vnn->ifaces[i]; i++) {
186 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
200 if (cur->references < best->references) {
209 static int32_t ctdb_vnn_assign_iface(struct ctdb_context *ctdb,
210 struct ctdb_vnn *vnn)
212 struct ctdb_interface *best = NULL;
215 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
216 "still assigned to iface '%s'\n",
217 ctdb_addr_to_str(&vnn->public_address),
218 ctdb_vnn_iface_string(vnn)));
222 best = ctdb_vnn_best_iface(ctdb, vnn);
224 DEBUG(DEBUG_ERR, (__location__ " public address '%s' "
225 "cannot assign to iface any iface\n",
226 ctdb_addr_to_str(&vnn->public_address)));
232 vnn->pnn = ctdb->pnn;
234 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
235 "now assigned to iface '%s' refs[%d]\n",
236 ctdb_addr_to_str(&vnn->public_address),
237 ctdb_vnn_iface_string(vnn),
242 static void ctdb_vnn_unassign_iface(struct ctdb_context *ctdb,
243 struct ctdb_vnn *vnn)
245 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
246 "now unassigned (old iface '%s' refs[%d])\n",
247 ctdb_addr_to_str(&vnn->public_address),
248 ctdb_vnn_iface_string(vnn),
249 vnn->iface?vnn->iface->references:0));
251 vnn->iface->references--;
254 if (vnn->pnn == ctdb->pnn) {
259 static bool ctdb_vnn_available(struct ctdb_context *ctdb,
260 struct ctdb_vnn *vnn)
264 /* Nodes that are not RUNNING can not host IPs */
265 if (ctdb->runstate != CTDB_RUNSTATE_RUNNING) {
269 if (vnn->delete_pending) {
273 if (vnn->iface && vnn->iface->link_up) {
277 for (i=0; vnn->ifaces[i]; i++) {
278 struct ctdb_interface *cur;
280 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
293 struct ctdb_takeover_arp {
294 struct ctdb_context *ctdb;
297 struct ctdb_tcp_array *tcparray;
298 struct ctdb_vnn *vnn;
303 lists of tcp endpoints
305 struct ctdb_tcp_list {
306 struct ctdb_tcp_list *prev, *next;
307 struct ctdb_connection connection;
311 list of clients to kill on IP release
313 struct ctdb_client_ip {
314 struct ctdb_client_ip *prev, *next;
315 struct ctdb_context *ctdb;
322 send a gratuitous arp
324 static void ctdb_control_send_arp(struct tevent_context *ev,
325 struct tevent_timer *te,
326 struct timeval t, void *private_data)
328 struct ctdb_takeover_arp *arp = talloc_get_type(private_data,
329 struct ctdb_takeover_arp);
331 struct ctdb_tcp_array *tcparray;
332 const char *iface = ctdb_vnn_iface_string(arp->vnn);
334 ret = ctdb_sys_send_arp(&arp->addr, iface);
336 DEBUG(DEBUG_CRIT,(__location__ " sending of arp failed on iface '%s' (%s)\n",
337 iface, strerror(errno)));
340 tcparray = arp->tcparray;
342 for (i=0;i<tcparray->num;i++) {
343 struct ctdb_connection *tcon;
345 tcon = &tcparray->connections[i];
346 DEBUG(DEBUG_INFO,("sending tcp tickle ack for %u->%s:%u\n",
347 (unsigned)ntohs(tcon->dst.ip.sin_port),
348 ctdb_addr_to_str(&tcon->src),
349 (unsigned)ntohs(tcon->src.ip.sin_port)));
350 ret = ctdb_sys_send_tcp(
355 DEBUG(DEBUG_CRIT,(__location__ " Failed to send tcp tickle ack for %s\n",
356 ctdb_addr_to_str(&tcon->src)));
363 if (arp->count == CTDB_ARP_REPEAT) {
368 tevent_add_timer(arp->ctdb->ev, arp->vnn->takeover_ctx,
369 timeval_current_ofs(CTDB_ARP_INTERVAL, 100000),
370 ctdb_control_send_arp, arp);
373 static int32_t ctdb_announce_vnn_iface(struct ctdb_context *ctdb,
374 struct ctdb_vnn *vnn)
376 struct ctdb_takeover_arp *arp;
377 struct ctdb_tcp_array *tcparray;
379 if (!vnn->takeover_ctx) {
380 vnn->takeover_ctx = talloc_new(vnn);
381 if (!vnn->takeover_ctx) {
386 arp = talloc_zero(vnn->takeover_ctx, struct ctdb_takeover_arp);
392 arp->addr = vnn->public_address;
395 tcparray = vnn->tcp_array;
397 /* add all of the known tcp connections for this IP to the
398 list of tcp connections to send tickle acks for */
399 arp->tcparray = talloc_steal(arp, tcparray);
401 vnn->tcp_array = NULL;
402 vnn->tcp_update_needed = true;
405 tevent_add_timer(arp->ctdb->ev, vnn->takeover_ctx,
406 timeval_zero(), ctdb_control_send_arp, arp);
411 struct takeover_callback_state {
412 struct ctdb_req_control_old *c;
413 ctdb_sock_addr *addr;
414 struct ctdb_vnn *vnn;
417 struct ctdb_do_takeip_state {
418 struct ctdb_req_control_old *c;
419 struct ctdb_vnn *vnn;
423 called when takeip event finishes
425 static void ctdb_do_takeip_callback(struct ctdb_context *ctdb, int status,
428 struct ctdb_do_takeip_state *state =
429 talloc_get_type(private_data, struct ctdb_do_takeip_state);
434 struct ctdb_node *node = ctdb->nodes[ctdb->pnn];
436 if (status == -ETIME) {
439 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
440 ctdb_addr_to_str(&state->vnn->public_address),
441 ctdb_vnn_iface_string(state->vnn)));
442 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
444 node->flags |= NODE_FLAGS_UNHEALTHY;
449 if (ctdb->do_checkpublicip) {
451 ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
453 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
460 data.dptr = (uint8_t *)ctdb_addr_to_str(&state->vnn->public_address);
461 data.dsize = strlen((char *)data.dptr) + 1;
462 DEBUG(DEBUG_INFO,(__location__ " sending TAKE_IP for '%s'\n", data.dptr));
464 ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_TAKE_IP, data);
467 /* the control succeeded */
468 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
473 static int ctdb_takeip_destructor(struct ctdb_do_takeip_state *state)
475 state->vnn->update_in_flight = false;
480 take over an ip address
482 static int32_t ctdb_do_takeip(struct ctdb_context *ctdb,
483 struct ctdb_req_control_old *c,
484 struct ctdb_vnn *vnn)
487 struct ctdb_do_takeip_state *state;
489 if (vnn->update_in_flight) {
490 DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u rejected "
491 "update for this IP already in flight\n",
492 ctdb_addr_to_str(&vnn->public_address),
493 vnn->public_netmask_bits));
497 ret = ctdb_vnn_assign_iface(ctdb, vnn);
499 DEBUG(DEBUG_ERR,("Takeover of IP %s/%u failed to "
500 "assign a usable interface\n",
501 ctdb_addr_to_str(&vnn->public_address),
502 vnn->public_netmask_bits));
506 state = talloc(vnn, struct ctdb_do_takeip_state);
507 CTDB_NO_MEMORY(ctdb, state);
509 state->c = talloc_steal(ctdb, c);
512 vnn->update_in_flight = true;
513 talloc_set_destructor(state, ctdb_takeip_destructor);
515 DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u on interface %s\n",
516 ctdb_addr_to_str(&vnn->public_address),
517 vnn->public_netmask_bits,
518 ctdb_vnn_iface_string(vnn)));
520 ret = ctdb_event_script_callback(ctdb,
522 ctdb_do_takeip_callback,
526 ctdb_vnn_iface_string(vnn),
527 ctdb_addr_to_str(&vnn->public_address),
528 vnn->public_netmask_bits);
531 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
532 ctdb_addr_to_str(&vnn->public_address),
533 ctdb_vnn_iface_string(vnn)));
541 struct ctdb_do_updateip_state {
542 struct ctdb_req_control_old *c;
543 struct ctdb_interface *old;
544 struct ctdb_vnn *vnn;
548 called when updateip event finishes
550 static void ctdb_do_updateip_callback(struct ctdb_context *ctdb, int status,
553 struct ctdb_do_updateip_state *state =
554 talloc_get_type(private_data, struct ctdb_do_updateip_state);
558 if (status == -ETIME) {
561 DEBUG(DEBUG_ERR,(__location__ " Failed to move IP %s from interface %s to %s\n",
562 ctdb_addr_to_str(&state->vnn->public_address),
564 ctdb_vnn_iface_string(state->vnn)));
567 * All we can do is reset the old interface
568 * and let the next run fix it
570 ctdb_vnn_unassign_iface(ctdb, state->vnn);
571 state->vnn->iface = state->old;
572 state->vnn->iface->references++;
574 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
579 if (ctdb->do_checkpublicip) {
581 ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
583 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
590 /* the control succeeded */
591 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
596 static int ctdb_updateip_destructor(struct ctdb_do_updateip_state *state)
598 state->vnn->update_in_flight = false;
603 update (move) an ip address
605 static int32_t ctdb_do_updateip(struct ctdb_context *ctdb,
606 struct ctdb_req_control_old *c,
607 struct ctdb_vnn *vnn)
610 struct ctdb_do_updateip_state *state;
611 struct ctdb_interface *old = vnn->iface;
612 const char *new_name;
614 if (vnn->update_in_flight) {
615 DEBUG(DEBUG_NOTICE,("Update of IP %s/%u rejected "
616 "update for this IP already in flight\n",
617 ctdb_addr_to_str(&vnn->public_address),
618 vnn->public_netmask_bits));
622 ctdb_vnn_unassign_iface(ctdb, vnn);
623 ret = ctdb_vnn_assign_iface(ctdb, vnn);
625 DEBUG(DEBUG_ERR,("update of IP %s/%u failed to "
626 "assin a usable interface (old iface '%s')\n",
627 ctdb_addr_to_str(&vnn->public_address),
628 vnn->public_netmask_bits,
633 new_name = ctdb_vnn_iface_string(vnn);
634 if (old->name != NULL && new_name != NULL && !strcmp(old->name, new_name)) {
635 /* A benign update from one interface onto itself.
636 * no need to run the eventscripts in this case, just return
639 ctdb_request_control_reply(ctdb, c, NULL, 0, NULL);
643 state = talloc(vnn, struct ctdb_do_updateip_state);
644 CTDB_NO_MEMORY(ctdb, state);
646 state->c = talloc_steal(ctdb, c);
650 vnn->update_in_flight = true;
651 talloc_set_destructor(state, ctdb_updateip_destructor);
653 DEBUG(DEBUG_NOTICE,("Update of IP %s/%u from "
654 "interface %s to %s\n",
655 ctdb_addr_to_str(&vnn->public_address),
656 vnn->public_netmask_bits,
660 ret = ctdb_event_script_callback(ctdb,
662 ctdb_do_updateip_callback,
664 CTDB_EVENT_UPDATE_IP,
668 ctdb_addr_to_str(&vnn->public_address),
669 vnn->public_netmask_bits);
671 DEBUG(DEBUG_ERR,(__location__ " Failed update IP %s from interface %s to %s\n",
672 ctdb_addr_to_str(&vnn->public_address),
673 old->name, new_name));
682 Find the vnn of the node that has a public ip address
683 returns -1 if the address is not known as a public address
685 static struct ctdb_vnn *find_public_ip_vnn(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
687 struct ctdb_vnn *vnn;
689 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
690 if (ctdb_same_ip(&vnn->public_address, addr)) {
699 take over an ip address
701 int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb,
702 struct ctdb_req_control_old *c,
707 struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
708 struct ctdb_vnn *vnn;
709 bool have_ip = false;
710 bool do_updateip = false;
711 bool do_takeip = false;
712 struct ctdb_interface *best_iface = NULL;
714 if (pip->pnn != ctdb->pnn) {
715 DEBUG(DEBUG_ERR,(__location__" takeoverip called for an ip '%s' "
716 "with pnn %d, but we're node %d\n",
717 ctdb_addr_to_str(&pip->addr),
718 pip->pnn, ctdb->pnn));
722 /* update out vnn list */
723 vnn = find_public_ip_vnn(ctdb, &pip->addr);
725 DEBUG(DEBUG_INFO,("takeoverip called for an ip '%s' that is not a public address\n",
726 ctdb_addr_to_str(&pip->addr)));
730 if (ctdb->tunable.disable_ip_failover == 0 && ctdb->do_checkpublicip) {
731 have_ip = ctdb_sys_have_ip(&pip->addr);
733 best_iface = ctdb_vnn_best_iface(ctdb, vnn);
734 if (best_iface == NULL) {
735 DEBUG(DEBUG_ERR,("takeoverip of IP %s/%u failed to find"
736 "a usable interface (old %s, have_ip %d)\n",
737 ctdb_addr_to_str(&vnn->public_address),
738 vnn->public_netmask_bits,
739 ctdb_vnn_iface_string(vnn),
744 if (vnn->iface == NULL && vnn->pnn == -1 && have_ip && best_iface != NULL) {
745 DEBUG(DEBUG_ERR,("Taking over newly created ip\n"));
750 if (vnn->iface == NULL && have_ip) {
751 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
752 "but we have no interface assigned, has someone manually configured it? Ignore for now.\n",
753 ctdb_addr_to_str(&vnn->public_address)));
757 if (vnn->pnn != ctdb->pnn && have_ip && vnn->pnn != -1) {
758 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
759 "and we have it on iface[%s], but it was assigned to node %d"
760 "and we are node %d, banning ourself\n",
761 ctdb_addr_to_str(&vnn->public_address),
762 ctdb_vnn_iface_string(vnn), vnn->pnn, ctdb->pnn));
767 if (vnn->pnn == -1 && have_ip) {
768 vnn->pnn = ctdb->pnn;
769 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
770 "and we already have it on iface[%s], update local daemon\n",
771 ctdb_addr_to_str(&vnn->public_address),
772 ctdb_vnn_iface_string(vnn)));
777 if (vnn->iface != best_iface) {
778 if (!vnn->iface->link_up) {
780 } else if (vnn->iface->references > (best_iface->references + 1)) {
781 /* only move when the rebalance gains something */
789 ctdb_vnn_unassign_iface(ctdb, vnn);
796 ret = ctdb_do_takeip(ctdb, c, vnn);
800 } else if (do_updateip) {
801 ret = ctdb_do_updateip(ctdb, c, vnn);
807 * The interface is up and the kernel known the ip
810 DEBUG(DEBUG_INFO,("Redundant takeover of IP %s/%u on interface %s (ip already held)\n",
811 ctdb_addr_to_str(&pip->addr),
812 vnn->public_netmask_bits,
813 ctdb_vnn_iface_string(vnn)));
817 /* tell ctdb_control.c that we will be replying asynchronously */
823 static void do_delete_ip(struct ctdb_context *ctdb, struct ctdb_vnn *vnn)
825 DLIST_REMOVE(ctdb->vnn, vnn);
826 ctdb_vnn_unassign_iface(ctdb, vnn);
827 ctdb_remove_orphaned_ifaces(ctdb, vnn);
832 called when releaseip event finishes
834 static void release_ip_callback(struct ctdb_context *ctdb, int status,
837 struct takeover_callback_state *state =
838 talloc_get_type(private_data, struct takeover_callback_state);
841 if (status == -ETIME) {
845 if (ctdb->tunable.disable_ip_failover == 0 && ctdb->do_checkpublicip) {
846 if (ctdb_sys_have_ip(state->addr)) {
848 ("IP %s still hosted during release IP callback, failing\n",
849 ctdb_addr_to_str(state->addr)));
850 ctdb_request_control_reply(ctdb, state->c,
857 /* send a message to all clients of this node telling them
858 that the cluster has been reconfigured and they should
859 release any sockets on this IP */
860 data.dptr = (uint8_t *)talloc_strdup(state, ctdb_addr_to_str(state->addr));
861 CTDB_NO_MEMORY_VOID(ctdb, data.dptr);
862 data.dsize = strlen((char *)data.dptr)+1;
864 DEBUG(DEBUG_INFO,(__location__ " sending RELEASE_IP for '%s'\n", data.dptr));
866 ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_RELEASE_IP, data);
868 ctdb_vnn_unassign_iface(ctdb, state->vnn);
870 /* Process the IP if it has been marked for deletion */
871 if (state->vnn->delete_pending) {
872 do_delete_ip(ctdb, state->vnn);
876 /* the control succeeded */
877 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
881 static int ctdb_releaseip_destructor(struct takeover_callback_state *state)
883 if (state->vnn != NULL) {
884 state->vnn->update_in_flight = false;
890 release an ip address
892 int32_t ctdb_control_release_ip(struct ctdb_context *ctdb,
893 struct ctdb_req_control_old *c,
898 struct takeover_callback_state *state;
899 struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
900 struct ctdb_vnn *vnn;
903 /* update our vnn list */
904 vnn = find_public_ip_vnn(ctdb, &pip->addr);
906 DEBUG(DEBUG_INFO,("releaseip called for an ip '%s' that is not a public address\n",
907 ctdb_addr_to_str(&pip->addr)));
912 /* stop any previous arps */
913 talloc_free(vnn->takeover_ctx);
914 vnn->takeover_ctx = NULL;
916 /* Some ctdb tool commands (e.g. moveip) send
917 * lazy multicast to drop an IP from any node that isn't the
918 * intended new node. The following causes makes ctdbd ignore
919 * a release for any address it doesn't host.
921 if (ctdb->tunable.disable_ip_failover == 0 && ctdb->do_checkpublicip) {
922 if (!ctdb_sys_have_ip(&pip->addr)) {
923 DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u on interface %s (ip not held)\n",
924 ctdb_addr_to_str(&pip->addr),
925 vnn->public_netmask_bits,
926 ctdb_vnn_iface_string(vnn)));
927 ctdb_vnn_unassign_iface(ctdb, vnn);
931 if (vnn->iface == NULL) {
932 DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u (ip not held)\n",
933 ctdb_addr_to_str(&pip->addr),
934 vnn->public_netmask_bits));
939 /* There is a potential race between take_ip and us because we
940 * update the VNN via a callback that run when the
941 * eventscripts have been run. Avoid the race by allowing one
942 * update to be in flight at a time.
944 if (vnn->update_in_flight) {
945 DEBUG(DEBUG_NOTICE,("Release of IP %s/%u rejected "
946 "update for this IP already in flight\n",
947 ctdb_addr_to_str(&vnn->public_address),
948 vnn->public_netmask_bits));
952 iface = strdup(ctdb_vnn_iface_string(vnn));
954 DEBUG(DEBUG_NOTICE,("Release of IP %s/%u on interface %s node:%d\n",
955 ctdb_addr_to_str(&pip->addr),
956 vnn->public_netmask_bits,
960 state = talloc(ctdb, struct takeover_callback_state);
962 ctdb_set_error(ctdb, "Out of memory at %s:%d",
968 state->c = talloc_steal(state, c);
969 state->addr = talloc(state, ctdb_sock_addr);
970 if (state->addr == NULL) {
971 ctdb_set_error(ctdb, "Out of memory at %s:%d",
977 *state->addr = pip->addr;
980 vnn->update_in_flight = true;
981 talloc_set_destructor(state, ctdb_releaseip_destructor);
983 ret = ctdb_event_script_callback(ctdb,
984 state, release_ip_callback, state,
985 CTDB_EVENT_RELEASE_IP,
988 ctdb_addr_to_str(&pip->addr),
989 vnn->public_netmask_bits);
992 DEBUG(DEBUG_ERR,(__location__ " Failed to release IP %s on interface %s\n",
993 ctdb_addr_to_str(&pip->addr),
994 ctdb_vnn_iface_string(vnn)));
999 /* tell the control that we will be reply asynchronously */
1000 *async_reply = true;
1004 static int ctdb_add_public_address(struct ctdb_context *ctdb,
1005 ctdb_sock_addr *addr,
1006 unsigned mask, const char *ifaces,
1009 struct ctdb_vnn *vnn;
1016 tmp = strdup(ifaces);
1017 for (iface = strtok(tmp, ","); iface; iface = strtok(NULL, ",")) {
1018 if (!ctdb_sys_check_iface_exists(iface)) {
1019 DEBUG(DEBUG_CRIT,("Interface %s does not exist. Can not add public-address : %s\n", iface, ctdb_addr_to_str(addr)));
1026 /* Verify that we don't have an entry for this ip yet */
1027 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1028 if (ctdb_same_sockaddr(addr, &vnn->public_address)) {
1029 DEBUG(DEBUG_CRIT,("Same ip '%s' specified multiple times in the public address list \n",
1030 ctdb_addr_to_str(addr)));
1035 /* create a new vnn structure for this ip address */
1036 vnn = talloc_zero(ctdb, struct ctdb_vnn);
1037 CTDB_NO_MEMORY_FATAL(ctdb, vnn);
1038 vnn->ifaces = talloc_array(vnn, const char *, num + 2);
1039 tmp = talloc_strdup(vnn, ifaces);
1040 CTDB_NO_MEMORY_FATAL(ctdb, tmp);
1041 for (iface = strtok(tmp, ","); iface; iface = strtok(NULL, ",")) {
1042 vnn->ifaces = talloc_realloc(vnn, vnn->ifaces, const char *, num + 2);
1043 CTDB_NO_MEMORY_FATAL(ctdb, vnn->ifaces);
1044 vnn->ifaces[num] = talloc_strdup(vnn, iface);
1045 CTDB_NO_MEMORY_FATAL(ctdb, vnn->ifaces[num]);
1049 vnn->ifaces[num] = NULL;
1050 vnn->public_address = *addr;
1051 vnn->public_netmask_bits = mask;
1053 if (check_address) {
1054 if (ctdb_sys_have_ip(addr)) {
1055 DEBUG(DEBUG_ERR,("We are already hosting public address '%s'. setting PNN to ourself:%d\n", ctdb_addr_to_str(addr), ctdb->pnn));
1056 vnn->pnn = ctdb->pnn;
1060 for (i=0; vnn->ifaces[i]; i++) {
1061 ret = ctdb_add_local_iface(ctdb, vnn->ifaces[i]);
1063 DEBUG(DEBUG_CRIT, (__location__ " failed to add iface[%s] "
1064 "for public_address[%s]\n",
1065 vnn->ifaces[i], ctdb_addr_to_str(addr)));
1071 DLIST_ADD(ctdb->vnn, vnn);
1077 setup the public address lists from a file
1079 int ctdb_set_public_addresses(struct ctdb_context *ctdb, bool check_addresses)
1085 lines = file_lines_load(ctdb->public_addresses_file, &nlines, 0, ctdb);
1086 if (lines == NULL) {
1087 ctdb_set_error(ctdb, "Failed to load public address list '%s'\n", ctdb->public_addresses_file);
1090 while (nlines > 0 && strcmp(lines[nlines-1], "") == 0) {
1094 for (i=0;i<nlines;i++) {
1096 ctdb_sock_addr addr;
1097 const char *addrstr;
1102 while ((*line == ' ') || (*line == '\t')) {
1108 if (strcmp(line, "") == 0) {
1111 tok = strtok(line, " \t");
1113 tok = strtok(NULL, " \t");
1115 if (NULL == ctdb->default_public_interface) {
1116 DEBUG(DEBUG_CRIT,("No default public interface and no interface specified at line %u of public address list\n",
1121 ifaces = ctdb->default_public_interface;
1126 if (!addrstr || !parse_ip_mask(addrstr, ifaces, &addr, &mask)) {
1127 DEBUG(DEBUG_CRIT,("Badly formed line %u in public address list\n", i+1));
1131 if (ctdb_add_public_address(ctdb, &addr, mask, ifaces, check_addresses)) {
1132 DEBUG(DEBUG_CRIT,("Failed to add line %u to the public address list\n", i+1));
1143 int ctdb_set_single_public_ip(struct ctdb_context *ctdb,
1147 struct ctdb_vnn *svnn;
1148 struct ctdb_interface *cur = NULL;
1152 svnn = talloc_zero(ctdb, struct ctdb_vnn);
1153 CTDB_NO_MEMORY(ctdb, svnn);
1155 svnn->ifaces = talloc_array(svnn, const char *, 2);
1156 CTDB_NO_MEMORY(ctdb, svnn->ifaces);
1157 svnn->ifaces[0] = talloc_strdup(svnn->ifaces, iface);
1158 CTDB_NO_MEMORY(ctdb, svnn->ifaces[0]);
1159 svnn->ifaces[1] = NULL;
1161 ok = parse_ip(ip, iface, 0, &svnn->public_address);
1167 ret = ctdb_add_local_iface(ctdb, svnn->ifaces[0]);
1169 DEBUG(DEBUG_CRIT, (__location__ " failed to add iface[%s] "
1170 "for single_ip[%s]\n",
1172 ctdb_addr_to_str(&svnn->public_address)));
1177 /* assume the single public ip interface is initially "good" */
1178 cur = ctdb_find_iface(ctdb, iface);
1180 DEBUG(DEBUG_CRIT,("Can not find public interface %s used by --single-public-ip", iface));
1183 cur->link_up = true;
1185 ret = ctdb_vnn_assign_iface(ctdb, svnn);
1191 ctdb->single_ip_vnn = svnn;
1195 static void *add_ip_callback(void *parm, void *data)
1197 struct public_ip_list *this_ip = parm;
1198 struct public_ip_list *prev_ip = data;
1200 if (prev_ip == NULL) {
1203 if (this_ip->pnn == -1) {
1204 this_ip->pnn = prev_ip->pnn;
1210 static int getips_count_callback(void *param, void *data)
1212 struct public_ip_list **ip_list = (struct public_ip_list **)param;
1213 struct public_ip_list *new_ip = (struct public_ip_list *)data;
1215 new_ip->next = *ip_list;
1220 static int verify_remote_ip_allocation(struct ctdb_context *ctdb,
1221 struct ctdb_public_ip_list *ips,
1224 static int ctdb_reload_remote_public_ips(struct ctdb_context *ctdb,
1225 struct ipalloc_state *ipalloc_state,
1226 struct ctdb_node_map_old *nodemap)
1230 struct ctdb_public_ip_list_old *ip_list;
1232 if (ipalloc_state->num != nodemap->num) {
1235 " ipalloc_state->num (%d) != nodemap->num (%d) invalid param\n",
1236 ipalloc_state->num, nodemap->num));
1240 for (j=0; j<nodemap->num; j++) {
1241 if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) {
1245 /* Retrieve the list of known public IPs from the node */
1246 ret = ctdb_ctrl_get_public_ips_flags(ctdb,
1249 ipalloc_state->known_public_ips,
1254 ("Failed to read known public IPs from node: %u\n",
1258 ipalloc_state->known_public_ips[j].num = ip_list->num;
1259 /* This could be copied and freed. However, ip_list
1260 * is allocated off ipalloc_state->known_public_ips,
1261 * so this is a safe hack. This will go away in a
1262 * while anyway... */
1263 ipalloc_state->known_public_ips[j].ip = &ip_list->ips[0];
1265 if (ctdb->do_checkpublicip) {
1266 verify_remote_ip_allocation(
1268 &ipalloc_state->known_public_ips[j],
1272 /* Retrieve the list of available public IPs from the node */
1273 ret = ctdb_ctrl_get_public_ips_flags(ctdb,
1276 ipalloc_state->available_public_ips,
1277 CTDB_PUBLIC_IP_FLAGS_ONLY_AVAILABLE,
1281 ("Failed to read available public IPs from node: %u\n",
1285 ipalloc_state->available_public_ips[j].num = ip_list->num;
1286 /* This could be copied and freed. However, ip_list
1287 * is allocated off ipalloc_state->available_public_ips,
1288 * so this is a safe hack. This will go away in a
1289 * while anyway... */
1290 ipalloc_state->available_public_ips[j].ip = &ip_list->ips[0];
1296 static struct public_ip_list *
1297 create_merged_ip_list(struct ctdb_context *ctdb, struct ipalloc_state *ipalloc_state)
1300 struct public_ip_list *ip_list;
1301 struct ctdb_public_ip_list *public_ips;
1303 TALLOC_FREE(ctdb->ip_tree);
1304 ctdb->ip_tree = trbt_create(ctdb, 0);
1306 for (i=0; i < ctdb->num_nodes; i++) {
1308 if (ctdb->nodes[i]->flags & NODE_FLAGS_DELETED) {
1312 /* there were no public ips for this node */
1313 if (ipalloc_state->known_public_ips == NULL) {
1317 public_ips = &ipalloc_state->known_public_ips[i];
1319 for (j=0; j < public_ips->num; j++) {
1320 struct public_ip_list *tmp_ip;
1322 tmp_ip = talloc_zero(ctdb->ip_tree, struct public_ip_list);
1323 CTDB_NO_MEMORY_NULL(ctdb, tmp_ip);
1324 /* Do not use information about IP addresses hosted
1325 * on other nodes, it may not be accurate */
1326 if (public_ips->ip[j].pnn == ctdb->nodes[i]->pnn) {
1327 tmp_ip->pnn = public_ips->ip[j].pnn;
1331 tmp_ip->addr = public_ips->ip[j].addr;
1332 tmp_ip->next = NULL;
1334 trbt_insertarray32_callback(ctdb->ip_tree,
1335 IP_KEYLEN, ip_key(&public_ips->ip[j].addr),
1342 trbt_traversearray32(ctdb->ip_tree, IP_KEYLEN, getips_count_callback, &ip_list);
1347 static bool all_nodes_are_disabled(struct ctdb_node_map_old *nodemap)
1351 for (i=0;i<nodemap->num;i++) {
1352 if (!(nodemap->nodes[i].flags & (NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED))) {
1353 /* Found one completely healthy node */
1361 struct get_tunable_callback_data {
1362 const char *tunable;
1367 static void get_tunable_callback(struct ctdb_context *ctdb, uint32_t pnn,
1368 int32_t res, TDB_DATA outdata,
1371 struct get_tunable_callback_data *cd =
1372 (struct get_tunable_callback_data *)callback;
1376 /* Already handled in fail callback */
1380 if (outdata.dsize != sizeof(uint32_t)) {
1381 DEBUG(DEBUG_ERR,("Wrong size of returned data when reading \"%s\" tunable from node %d. Expected %d bytes but received %d bytes\n",
1382 cd->tunable, pnn, (int)sizeof(uint32_t),
1383 (int)outdata.dsize));
1388 size = talloc_array_length(cd->out);
1390 DEBUG(DEBUG_ERR,("Got %s reply from node %d but nodemap only has %d entries\n",
1391 cd->tunable, pnn, size));
1396 cd->out[pnn] = *(uint32_t *)outdata.dptr;
1399 static void get_tunable_fail_callback(struct ctdb_context *ctdb, uint32_t pnn,
1400 int32_t res, TDB_DATA outdata,
1403 struct get_tunable_callback_data *cd =
1404 (struct get_tunable_callback_data *)callback;
1409 ("Timed out getting tunable \"%s\" from node %d\n",
1415 DEBUG(DEBUG_WARNING,
1416 ("Tunable \"%s\" not implemented on node %d\n",
1421 ("Unexpected error getting tunable \"%s\" from node %d\n",
1427 static uint32_t *get_tunable_from_nodes(struct ctdb_context *ctdb,
1428 TALLOC_CTX *tmp_ctx,
1429 struct ctdb_node_map_old *nodemap,
1430 const char *tunable,
1431 uint32_t default_value)
1434 struct ctdb_control_get_tunable *t;
1437 struct get_tunable_callback_data callback_data;
1440 tvals = talloc_array(tmp_ctx, uint32_t, nodemap->num);
1441 CTDB_NO_MEMORY_NULL(ctdb, tvals);
1442 for (i=0; i<nodemap->num; i++) {
1443 tvals[i] = default_value;
1446 callback_data.out = tvals;
1447 callback_data.tunable = tunable;
1448 callback_data.fatal = false;
1450 data.dsize = offsetof(struct ctdb_control_get_tunable, name) + strlen(tunable) + 1;
1451 data.dptr = talloc_size(tmp_ctx, data.dsize);
1452 t = (struct ctdb_control_get_tunable *)data.dptr;
1453 t->length = strlen(tunable)+1;
1454 memcpy(t->name, tunable, t->length);
1455 nodes = list_of_connected_nodes(ctdb, nodemap, tmp_ctx, true);
1456 if (ctdb_client_async_control(ctdb, CTDB_CONTROL_GET_TUNABLE,
1457 nodes, 0, TAKEOVER_TIMEOUT(),
1459 get_tunable_callback,
1460 get_tunable_fail_callback,
1461 &callback_data) != 0) {
1462 if (callback_data.fatal) {
1468 talloc_free(data.dptr);
1473 /* Set internal flags for IP allocation:
1475 * Set NOIPTAKOVER ip flags from per-node NoIPTakeover tunable
1476 * Set NOIPHOST ip flag for each INACTIVE node
1477 * if all nodes are disabled:
1478 * Set NOIPHOST ip flags from per-node NoIPHostOnAllDisabled tunable
1480 * Set NOIPHOST ip flags for disabled nodes
1482 static void set_ipflags_internal(struct ipalloc_state *ipalloc_state,
1483 struct ctdb_node_map_old *nodemap,
1484 uint32_t *tval_noiptakeover,
1485 uint32_t *tval_noiphostonalldisabled)
1489 for (i=0;i<nodemap->num;i++) {
1490 /* Can not take IPs on node with NoIPTakeover set */
1491 if (tval_noiptakeover[i] != 0) {
1492 ipalloc_state->noiptakeover[i] = true;
1495 /* Can not host IPs on INACTIVE node */
1496 if (nodemap->nodes[i].flags & NODE_FLAGS_INACTIVE) {
1497 ipalloc_state->noiphost[i] = true;
1501 if (all_nodes_are_disabled(nodemap)) {
1502 /* If all nodes are disabled, can not host IPs on node
1503 * with NoIPHostOnAllDisabled set
1505 for (i=0;i<nodemap->num;i++) {
1506 if (tval_noiphostonalldisabled[i] != 0) {
1507 ipalloc_state->noiphost[i] = true;
1511 /* If some nodes are not disabled, then can not host
1512 * IPs on DISABLED node
1514 for (i=0;i<nodemap->num;i++) {
1515 if (nodemap->nodes[i].flags & NODE_FLAGS_DISABLED) {
1516 ipalloc_state->noiphost[i] = true;
1522 static bool set_ipflags(struct ctdb_context *ctdb,
1523 struct ipalloc_state *ipalloc_state,
1524 struct ctdb_node_map_old *nodemap)
1526 uint32_t *tval_noiptakeover;
1527 uint32_t *tval_noiphostonalldisabled;
1529 tval_noiptakeover = get_tunable_from_nodes(ctdb, ipalloc_state, nodemap,
1531 if (tval_noiptakeover == NULL) {
1535 tval_noiphostonalldisabled =
1536 get_tunable_from_nodes(ctdb, ipalloc_state, nodemap,
1537 "NoIPHostOnAllDisabled", 0);
1538 if (tval_noiphostonalldisabled == NULL) {
1539 /* Caller frees tmp_ctx */
1543 set_ipflags_internal(ipalloc_state, nodemap,
1545 tval_noiphostonalldisabled);
1547 talloc_free(tval_noiptakeover);
1548 talloc_free(tval_noiphostonalldisabled);
1553 static struct ipalloc_state * ipalloc_state_init(struct ctdb_context *ctdb,
1554 TALLOC_CTX *mem_ctx)
1556 struct ipalloc_state *ipalloc_state =
1557 talloc_zero(mem_ctx, struct ipalloc_state);
1558 if (ipalloc_state == NULL) {
1559 DEBUG(DEBUG_ERR, (__location__ " Out of memory\n"));
1563 ipalloc_state->num = ctdb->num_nodes;
1565 ipalloc_state->known_public_ips =
1566 talloc_zero_array(ipalloc_state,
1567 struct ctdb_public_ip_list,
1568 ipalloc_state->num);
1569 if (ipalloc_state->known_public_ips == NULL) {
1570 DEBUG(DEBUG_ERR, (__location__ " Out of memory\n"));
1574 ipalloc_state->available_public_ips =
1575 talloc_zero_array(ipalloc_state,
1576 struct ctdb_public_ip_list,
1577 ipalloc_state->num);
1578 if (ipalloc_state->available_public_ips == NULL) {
1579 DEBUG(DEBUG_ERR, (__location__ " Out of memory\n"));
1582 ipalloc_state->noiptakeover =
1583 talloc_zero_array(ipalloc_state,
1585 ipalloc_state->num);
1586 if (ipalloc_state->noiptakeover == NULL) {
1587 DEBUG(DEBUG_ERR, (__location__ " Out of memory\n"));
1590 ipalloc_state->noiphost =
1591 talloc_zero_array(ipalloc_state,
1593 ipalloc_state->num);
1594 if (ipalloc_state->noiphost == NULL) {
1595 DEBUG(DEBUG_ERR, (__location__ " Out of memory\n"));
1599 if (1 == ctdb->tunable.lcp2_public_ip_assignment) {
1600 ipalloc_state->algorithm = IPALLOC_LCP2;
1601 } else if (1 == ctdb->tunable.deterministic_public_ips) {
1602 ipalloc_state->algorithm = IPALLOC_DETERMINISTIC;
1604 ipalloc_state->algorithm = IPALLOC_NONDETERMINISTIC;
1607 ipalloc_state->no_ip_failback = ctdb->tunable.no_ip_failback;
1609 return ipalloc_state;
1611 talloc_free(ipalloc_state);
1615 struct iprealloc_callback_data {
1618 client_async_callback fail_callback;
1619 void *fail_callback_data;
1620 struct ctdb_node_map_old *nodemap;
1623 static void iprealloc_fail_callback(struct ctdb_context *ctdb, uint32_t pnn,
1624 int32_t res, TDB_DATA outdata,
1628 struct iprealloc_callback_data *cd =
1629 (struct iprealloc_callback_data *)callback;
1631 numnodes = talloc_array_length(cd->retry_nodes);
1632 if (pnn > numnodes) {
1634 ("ipreallocated failure from node %d, "
1635 "but only %d nodes in nodemap\n",
1640 /* Can't run the "ipreallocated" event on a INACTIVE node */
1641 if (cd->nodemap->nodes[pnn].flags & NODE_FLAGS_INACTIVE) {
1642 DEBUG(DEBUG_WARNING,
1643 ("ipreallocated failed on inactive node %d, ignoring\n",
1650 /* If the control timed out then that's a real error,
1651 * so call the real fail callback
1653 if (cd->fail_callback) {
1654 cd->fail_callback(ctdb, pnn, res, outdata,
1655 cd->fail_callback_data);
1657 DEBUG(DEBUG_WARNING,
1658 ("iprealloc timed out but no callback registered\n"));
1662 /* If not a timeout then either the ipreallocated
1663 * eventscript (or some setup) failed. This might
1664 * have failed because the IPREALLOCATED control isn't
1665 * implemented - right now there is no way of knowing
1666 * because the error codes are all folded down to -1.
1667 * Consider retrying using EVENTSCRIPT control...
1669 DEBUG(DEBUG_WARNING,
1670 ("ipreallocated failure from node %d, flagging retry\n",
1672 cd->retry_nodes[pnn] = true;
1677 struct takeover_callback_data {
1679 client_async_callback fail_callback;
1680 void *fail_callback_data;
1681 struct ctdb_node_map_old *nodemap;
1684 static void takeover_run_fail_callback(struct ctdb_context *ctdb,
1685 uint32_t node_pnn, int32_t res,
1686 TDB_DATA outdata, void *callback_data)
1688 struct takeover_callback_data *cd =
1689 talloc_get_type_abort(callback_data,
1690 struct takeover_callback_data);
1693 for (i = 0; i < cd->nodemap->num; i++) {
1694 if (node_pnn == cd->nodemap->nodes[i].pnn) {
1699 if (i == cd->nodemap->num) {
1700 DEBUG(DEBUG_ERR, (__location__ " invalid PNN %u\n", node_pnn));
1704 if (!cd->node_failed[i]) {
1705 cd->node_failed[i] = true;
1706 cd->fail_callback(ctdb, node_pnn, res, outdata,
1707 cd->fail_callback_data);
1712 * Recalculate the allocation of public IPs to nodes and have the
1713 * nodes host their allocated addresses.
1715 * - Allocate memory for IP allocation state, including per node
1717 * - Populate IP allocation algorithm in IP allocation state
1718 * - Populate local value of tunable NoIPFailback in IP allocation
1719 state - this is really a cluster-wide configuration variable and
1720 only the value form the master node is used
1721 * - Retrieve tunables NoIPTakeover and NoIPHostOnAllDisabled from all
1722 * connected nodes - this is done separately so tunable values can
1723 * be faked in unit testing
1724 * - Populate NoIPTakover tunable in IP allocation state
1725 * - Populate NoIPHost in IP allocation state, derived from node flags
1726 * and NoIPHostOnAllDisabled tunable
1727 * - Retrieve and populate known and available IP lists in IP
1729 * - If no available IP addresses then early exit
1730 * - Build list of (known IPs, currently assigned node)
1731 * - Populate list of nodes to force rebalance - internal structure,
1732 * currently no way to fetch, only used by LCP2 for nodes that have
1733 * had new IP addresses added
1734 * - Run IP allocation algorithm
1735 * - Send RELEASE_IP to all nodes for IPs they should not host
1736 * - Send TAKE_IP to all nodes for IPs they should host
1737 * - Send IPREALLOCATED to all nodes (with backward compatibility hack)
1739 int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map_old *nodemap,
1740 uint32_t *force_rebalance_nodes,
1741 client_async_callback fail_callback, void *callback_data)
1744 struct ctdb_public_ip ip;
1746 struct public_ip_list *all_ips, *tmp_ip;
1748 struct timeval timeout;
1749 struct client_async_data *async_data;
1750 struct ctdb_client_control_state *state;
1751 TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
1752 struct ipalloc_state *ipalloc_state;
1753 struct takeover_callback_data *takeover_data;
1754 struct iprealloc_callback_data iprealloc_data;
1759 * ip failover is completely disabled, just send out the
1760 * ipreallocated event.
1762 if (ctdb->tunable.disable_ip_failover != 0) {
1766 ipalloc_state = ipalloc_state_init(ctdb, tmp_ctx);
1767 if (ipalloc_state == NULL) {
1768 talloc_free(tmp_ctx);
1772 if (!set_ipflags(ctdb, ipalloc_state, nodemap)) {
1773 DEBUG(DEBUG_ERR,("Failed to set IP flags - aborting takeover run\n"));
1774 talloc_free(tmp_ctx);
1778 /* Fetch known/available public IPs from each active node */
1779 ret = ctdb_reload_remote_public_ips(ctdb, ipalloc_state, nodemap);
1781 talloc_free(tmp_ctx);
1785 /* Short-circuit IP allocation if no node has available IPs */
1786 can_host_ips = false;
1787 for (i=0; i < ipalloc_state->num; i++) {
1788 if (ipalloc_state->available_public_ips[i].num != 0) {
1789 can_host_ips = true;
1792 if (!can_host_ips) {
1793 DEBUG(DEBUG_WARNING,("No nodes available to host public IPs yet\n"));
1797 /* since nodes only know about those public addresses that
1798 can be served by that particular node, no single node has
1799 a full list of all public addresses that exist in the cluster.
1800 Walk over all node structures and create a merged list of
1801 all public addresses that exist in the cluster.
1803 keep the tree of ips around as ctdb->ip_tree
1805 all_ips = create_merged_ip_list(ctdb, ipalloc_state);
1806 ipalloc_state->all_ips = all_ips;
1808 ipalloc_state->force_rebalance_nodes = force_rebalance_nodes;
1810 /* Do the IP reassignment calculations */
1811 ipalloc(ipalloc_state);
1813 /* Now tell all nodes to release any public IPs should not
1814 * host. This will be a NOOP on nodes that don't currently
1815 * hold the given IP.
1817 takeover_data = talloc_zero(tmp_ctx, struct takeover_callback_data);
1818 CTDB_NO_MEMORY_FATAL(ctdb, takeover_data);
1820 takeover_data->node_failed = talloc_zero_array(tmp_ctx,
1821 bool, nodemap->num);
1822 CTDB_NO_MEMORY_FATAL(ctdb, takeover_data->node_failed);
1823 takeover_data->fail_callback = fail_callback;
1824 takeover_data->fail_callback_data = callback_data;
1825 takeover_data->nodemap = nodemap;
1827 async_data = talloc_zero(tmp_ctx, struct client_async_data);
1828 CTDB_NO_MEMORY_FATAL(ctdb, async_data);
1830 async_data->fail_callback = takeover_run_fail_callback;
1831 async_data->callback_data = takeover_data;
1833 ZERO_STRUCT(ip); /* Avoid valgrind warnings for union */
1835 /* Send a RELEASE_IP to all nodes that should not be hosting
1836 * each IP. For each IP, all but one of these will be
1837 * redundant. However, the redundant ones are used to tell
1838 * nodes which node should be hosting the IP so that commands
1839 * like "ctdb ip" can display a particular nodes idea of who
1840 * is hosting what. */
1841 for (i=0;i<nodemap->num;i++) {
1842 /* don't talk to unconnected nodes, but do talk to banned nodes */
1843 if (nodemap->nodes[i].flags & NODE_FLAGS_DISCONNECTED) {
1847 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1848 if (tmp_ip->pnn == nodemap->nodes[i].pnn) {
1849 /* This node should be serving this
1850 vnn so don't tell it to release the ip
1854 ip.pnn = tmp_ip->pnn;
1855 ip.addr = tmp_ip->addr;
1857 timeout = TAKEOVER_TIMEOUT();
1858 data.dsize = sizeof(ip);
1859 data.dptr = (uint8_t *)&ip;
1860 state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
1861 0, CTDB_CONTROL_RELEASE_IP, 0,
1864 if (state == NULL) {
1865 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_RELEASE_IP to node %u\n", nodemap->nodes[i].pnn));
1866 talloc_free(tmp_ctx);
1870 ctdb_client_async_add(async_data, state);
1873 if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1874 DEBUG(DEBUG_ERR,(__location__ " Async control CTDB_CONTROL_RELEASE_IP failed\n"));
1875 talloc_free(tmp_ctx);
1878 talloc_free(async_data);
1881 /* For each IP, send a TAKOVER_IP to the node that should be
1882 * hosting it. Many of these will often be redundant (since
1883 * the allocation won't have changed) but they can be useful
1884 * to recover from inconsistencies. */
1885 async_data = talloc_zero(tmp_ctx, struct client_async_data);
1886 CTDB_NO_MEMORY_FATAL(ctdb, async_data);
1888 async_data->fail_callback = fail_callback;
1889 async_data->callback_data = callback_data;
1891 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1892 if (tmp_ip->pnn == -1) {
1893 /* this IP won't be taken over */
1897 ip.pnn = tmp_ip->pnn;
1898 ip.addr = tmp_ip->addr;
1900 timeout = TAKEOVER_TIMEOUT();
1901 data.dsize = sizeof(ip);
1902 data.dptr = (uint8_t *)&ip;
1903 state = ctdb_control_send(ctdb, tmp_ip->pnn,
1904 0, CTDB_CONTROL_TAKEOVER_IP, 0,
1905 data, async_data, &timeout, NULL);
1906 if (state == NULL) {
1907 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_TAKEOVER_IP to node %u\n", tmp_ip->pnn));
1908 talloc_free(tmp_ctx);
1912 ctdb_client_async_add(async_data, state);
1914 if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1915 DEBUG(DEBUG_ERR,(__location__ " Async control CTDB_CONTROL_TAKEOVER_IP failed\n"));
1916 talloc_free(tmp_ctx);
1922 * Tell all nodes to run eventscripts to process the
1923 * "ipreallocated" event. This can do a lot of things,
1924 * including restarting services to reconfigure them if public
1925 * IPs have moved. Once upon a time this event only used to
1928 retry_data = talloc_zero_array(tmp_ctx, bool, nodemap->num);
1929 CTDB_NO_MEMORY_FATAL(ctdb, retry_data);
1930 iprealloc_data.retry_nodes = retry_data;
1931 iprealloc_data.retry_count = 0;
1932 iprealloc_data.fail_callback = fail_callback;
1933 iprealloc_data.fail_callback_data = callback_data;
1934 iprealloc_data.nodemap = nodemap;
1936 nodes = list_of_connected_nodes(ctdb, nodemap, tmp_ctx, true);
1937 ret = ctdb_client_async_control(ctdb, CTDB_CONTROL_IPREALLOCATED,
1938 nodes, 0, TAKEOVER_TIMEOUT(),
1940 NULL, iprealloc_fail_callback,
1943 /* If the control failed then we should retry to any
1944 * nodes flagged by iprealloc_fail_callback using the
1945 * EVENTSCRIPT control. This is a best-effort at
1946 * backward compatiblity when running a mixed cluster
1947 * where some nodes have not yet been upgraded to
1948 * support the IPREALLOCATED control.
1950 DEBUG(DEBUG_WARNING,
1951 ("Retry ipreallocated to some nodes using eventscript control\n"));
1953 nodes = talloc_array(tmp_ctx, uint32_t,
1954 iprealloc_data.retry_count);
1955 CTDB_NO_MEMORY_FATAL(ctdb, nodes);
1958 for (i=0; i<nodemap->num; i++) {
1959 if (iprealloc_data.retry_nodes[i]) {
1965 data.dptr = discard_const("ipreallocated");
1966 data.dsize = strlen((char *)data.dptr) + 1;
1967 ret = ctdb_client_async_control(ctdb,
1968 CTDB_CONTROL_RUN_EVENTSCRIPTS,
1969 nodes, 0, TAKEOVER_TIMEOUT(),
1971 NULL, fail_callback,
1974 DEBUG(DEBUG_ERR, (__location__ " failed to send control to run eventscripts with \"ipreallocated\"\n"));
1978 talloc_free(tmp_ctx);
1984 destroy a ctdb_client_ip structure
1986 static int ctdb_client_ip_destructor(struct ctdb_client_ip *ip)
1988 DEBUG(DEBUG_DEBUG,("destroying client tcp for %s:%u (client_id %u)\n",
1989 ctdb_addr_to_str(&ip->addr),
1990 ntohs(ip->addr.ip.sin_port),
1993 DLIST_REMOVE(ip->ctdb->client_ip_list, ip);
1998 called by a client to inform us of a TCP connection that it is managing
1999 that should tickled with an ACK when IP takeover is done
2001 int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id,
2004 struct ctdb_client *client = reqid_find(ctdb->idr, client_id, struct ctdb_client);
2005 struct ctdb_connection *tcp_sock = NULL;
2006 struct ctdb_tcp_list *tcp;
2007 struct ctdb_connection t;
2010 struct ctdb_client_ip *ip;
2011 struct ctdb_vnn *vnn;
2012 ctdb_sock_addr addr;
2014 /* If we don't have public IPs, tickles are useless */
2015 if (ctdb->vnn == NULL) {
2019 tcp_sock = (struct ctdb_connection *)indata.dptr;
2021 addr = tcp_sock->src;
2022 ctdb_canonicalize_ip(&addr, &tcp_sock->src);
2023 addr = tcp_sock->dst;
2024 ctdb_canonicalize_ip(&addr, &tcp_sock->dst);
2027 memcpy(&addr, &tcp_sock->dst, sizeof(addr));
2028 vnn = find_public_ip_vnn(ctdb, &addr);
2030 switch (addr.sa.sa_family) {
2032 if (ntohl(addr.ip.sin_addr.s_addr) != INADDR_LOOPBACK) {
2033 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public address.\n",
2034 ctdb_addr_to_str(&addr)));
2038 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public ipv6 address.\n",
2039 ctdb_addr_to_str(&addr)));
2042 DEBUG(DEBUG_ERR,(__location__ " Unknown family type %d\n", addr.sa.sa_family));
2048 if (vnn->pnn != ctdb->pnn) {
2049 DEBUG(DEBUG_ERR,("Attempt to register tcp client for IP %s we don't hold - failing (client_id %u pid %u)\n",
2050 ctdb_addr_to_str(&addr),
2051 client_id, client->pid));
2052 /* failing this call will tell smbd to die */
2056 ip = talloc(client, struct ctdb_client_ip);
2057 CTDB_NO_MEMORY(ctdb, ip);
2061 ip->client_id = client_id;
2062 talloc_set_destructor(ip, ctdb_client_ip_destructor);
2063 DLIST_ADD(ctdb->client_ip_list, ip);
2065 tcp = talloc(client, struct ctdb_tcp_list);
2066 CTDB_NO_MEMORY(ctdb, tcp);
2068 tcp->connection.src = tcp_sock->src;
2069 tcp->connection.dst = tcp_sock->dst;
2071 DLIST_ADD(client->tcp_list, tcp);
2073 t.src = tcp_sock->src;
2074 t.dst = tcp_sock->dst;
2076 data.dptr = (uint8_t *)&t;
2077 data.dsize = sizeof(t);
2079 switch (addr.sa.sa_family) {
2081 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
2082 (unsigned)ntohs(tcp_sock->dst.ip.sin_port),
2083 ctdb_addr_to_str(&tcp_sock->src),
2084 (unsigned)ntohs(tcp_sock->src.ip.sin_port), client_id, client->pid));
2087 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
2088 (unsigned)ntohs(tcp_sock->dst.ip6.sin6_port),
2089 ctdb_addr_to_str(&tcp_sock->src),
2090 (unsigned)ntohs(tcp_sock->src.ip6.sin6_port), client_id, client->pid));
2093 DEBUG(DEBUG_ERR,(__location__ " Unknown family %d\n", addr.sa.sa_family));
2097 /* tell all nodes about this tcp connection */
2098 ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0,
2099 CTDB_CONTROL_TCP_ADD,
2100 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
2102 DEBUG(DEBUG_ERR,(__location__ " Failed to send CTDB_CONTROL_TCP_ADD\n"));
2110 find a tcp address on a list
2112 static struct ctdb_connection *ctdb_tcp_find(struct ctdb_tcp_array *array,
2113 struct ctdb_connection *tcp)
2117 if (array == NULL) {
2121 for (i=0;i<array->num;i++) {
2122 if (ctdb_same_sockaddr(&array->connections[i].src, &tcp->src) &&
2123 ctdb_same_sockaddr(&array->connections[i].dst, &tcp->dst)) {
2124 return &array->connections[i];
2133 called by a daemon to inform us of a TCP connection that one of its
2134 clients managing that should tickled with an ACK when IP takeover is
2137 int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata, bool tcp_update_needed)
2139 struct ctdb_connection *p = (struct ctdb_connection *)indata.dptr;
2140 struct ctdb_tcp_array *tcparray;
2141 struct ctdb_connection tcp;
2142 struct ctdb_vnn *vnn;
2144 /* If we don't have public IPs, tickles are useless */
2145 if (ctdb->vnn == NULL) {
2149 vnn = find_public_ip_vnn(ctdb, &p->dst);
2151 DEBUG(DEBUG_INFO,(__location__ " got TCP_ADD control for an address which is not a public address '%s'\n",
2152 ctdb_addr_to_str(&p->dst)));
2158 tcparray = vnn->tcp_array;
2160 /* If this is the first tickle */
2161 if (tcparray == NULL) {
2162 tcparray = talloc(vnn, struct ctdb_tcp_array);
2163 CTDB_NO_MEMORY(ctdb, tcparray);
2164 vnn->tcp_array = tcparray;
2167 tcparray->connections = talloc_size(tcparray, sizeof(struct ctdb_connection));
2168 CTDB_NO_MEMORY(ctdb, tcparray->connections);
2170 tcparray->connections[tcparray->num].src = p->src;
2171 tcparray->connections[tcparray->num].dst = p->dst;
2174 if (tcp_update_needed) {
2175 vnn->tcp_update_needed = true;
2181 /* Do we already have this tickle ?*/
2184 if (ctdb_tcp_find(tcparray, &tcp) != NULL) {
2185 DEBUG(DEBUG_DEBUG,("Already had tickle info for %s:%u for vnn:%u\n",
2186 ctdb_addr_to_str(&tcp.dst),
2187 ntohs(tcp.dst.ip.sin_port),
2192 /* A new tickle, we must add it to the array */
2193 tcparray->connections = talloc_realloc(tcparray, tcparray->connections,
2194 struct ctdb_connection,
2196 CTDB_NO_MEMORY(ctdb, tcparray->connections);
2198 tcparray->connections[tcparray->num].src = p->src;
2199 tcparray->connections[tcparray->num].dst = p->dst;
2202 DEBUG(DEBUG_INFO,("Added tickle info for %s:%u from vnn %u\n",
2203 ctdb_addr_to_str(&tcp.dst),
2204 ntohs(tcp.dst.ip.sin_port),
2207 if (tcp_update_needed) {
2208 vnn->tcp_update_needed = true;
2215 static void ctdb_remove_connection(struct ctdb_vnn *vnn, struct ctdb_connection *conn)
2217 struct ctdb_connection *tcpp;
2223 /* if the array is empty we cant remove it
2224 and we don't need to do anything
2226 if (vnn->tcp_array == NULL) {
2227 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist (array is empty) %s:%u\n",
2228 ctdb_addr_to_str(&conn->dst),
2229 ntohs(conn->dst.ip.sin_port)));
2234 /* See if we know this connection
2235 if we don't know this connection then we dont need to do anything
2237 tcpp = ctdb_tcp_find(vnn->tcp_array, conn);
2239 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist %s:%u\n",
2240 ctdb_addr_to_str(&conn->dst),
2241 ntohs(conn->dst.ip.sin_port)));
2246 /* We need to remove this entry from the array.
2247 Instead of allocating a new array and copying data to it
2248 we cheat and just copy the last entry in the existing array
2249 to the entry that is to be removed and just shring the
2252 *tcpp = vnn->tcp_array->connections[vnn->tcp_array->num - 1];
2253 vnn->tcp_array->num--;
2255 /* If we deleted the last entry we also need to remove the entire array
2257 if (vnn->tcp_array->num == 0) {
2258 talloc_free(vnn->tcp_array);
2259 vnn->tcp_array = NULL;
2262 vnn->tcp_update_needed = true;
2264 DEBUG(DEBUG_INFO,("Removed tickle info for %s:%u\n",
2265 ctdb_addr_to_str(&conn->src),
2266 ntohs(conn->src.ip.sin_port)));
2271 called by a daemon to inform us of a TCP connection that one of its
2272 clients used are no longer needed in the tickle database
2274 int32_t ctdb_control_tcp_remove(struct ctdb_context *ctdb, TDB_DATA indata)
2276 struct ctdb_vnn *vnn;
2277 struct ctdb_connection *conn = (struct ctdb_connection *)indata.dptr;
2279 /* If we don't have public IPs, tickles are useless */
2280 if (ctdb->vnn == NULL) {
2284 vnn = find_public_ip_vnn(ctdb, &conn->dst);
2287 (__location__ " unable to find public address %s\n",
2288 ctdb_addr_to_str(&conn->dst)));
2292 ctdb_remove_connection(vnn, conn);
2299 Called when another daemon starts - causes all tickles for all
2300 public addresses we are serving to be sent to the new node on the
2301 next check. This actually causes the next scheduled call to
2302 tdb_update_tcp_tickles() to update all nodes. This is simple and
2303 doesn't require careful error handling.
2305 int32_t ctdb_control_startup(struct ctdb_context *ctdb, uint32_t pnn)
2307 struct ctdb_vnn *vnn;
2309 DEBUG(DEBUG_INFO, ("Received startup control from node %lu\n",
2310 (unsigned long) pnn));
2312 for (vnn = ctdb->vnn; vnn != NULL; vnn = vnn->next) {
2313 vnn->tcp_update_needed = true;
2321 called when a client structure goes away - hook to remove
2322 elements from the tcp_list in all daemons
2324 void ctdb_takeover_client_destructor_hook(struct ctdb_client *client)
2326 while (client->tcp_list) {
2327 struct ctdb_vnn *vnn;
2328 struct ctdb_tcp_list *tcp = client->tcp_list;
2329 struct ctdb_connection *conn = &tcp->connection;
2331 DLIST_REMOVE(client->tcp_list, tcp);
2333 vnn = find_public_ip_vnn(client->ctdb,
2337 (__location__ " unable to find public address %s\n",
2338 ctdb_addr_to_str(&conn->dst)));
2342 /* If the IP address is hosted on this node then
2343 * remove the connection. */
2344 if (vnn->pnn == client->ctdb->pnn) {
2345 ctdb_remove_connection(vnn, conn);
2348 /* Otherwise this function has been called because the
2349 * server IP address has been released to another node
2350 * and the client has exited. This means that we
2351 * should not delete the connection information. The
2352 * takeover node processes connections too. */
2357 void ctdb_release_all_ips(struct ctdb_context *ctdb)
2359 struct ctdb_vnn *vnn;
2363 if (ctdb->tunable.disable_ip_failover == 1) {
2367 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2368 if (!ctdb_sys_have_ip(&vnn->public_address)) {
2369 ctdb_vnn_unassign_iface(ctdb, vnn);
2376 /* Don't allow multiple releases at once. Some code,
2377 * particularly ctdb_tickle_sentenced_connections() is
2379 if (vnn->update_in_flight) {
2380 DEBUG(DEBUG_WARNING,
2382 " Not releasing IP %s/%u on interface %s, an update is already in progess\n",
2383 ctdb_addr_to_str(&vnn->public_address),
2384 vnn->public_netmask_bits,
2385 ctdb_vnn_iface_string(vnn)));
2388 vnn->update_in_flight = true;
2390 DEBUG(DEBUG_INFO,("Release of IP %s/%u on interface %s node:-1\n",
2391 ctdb_addr_to_str(&vnn->public_address),
2392 vnn->public_netmask_bits,
2393 ctdb_vnn_iface_string(vnn)));
2395 ctdb_event_script_args(ctdb, CTDB_EVENT_RELEASE_IP, "%s %s %u",
2396 ctdb_vnn_iface_string(vnn),
2397 ctdb_addr_to_str(&vnn->public_address),
2398 vnn->public_netmask_bits);
2400 data.dptr = (uint8_t *)talloc_strdup(
2401 vnn, ctdb_addr_to_str(&vnn->public_address));
2402 if (data.dptr != NULL) {
2403 data.dsize = strlen((char *)data.dptr) + 1;
2404 ctdb_daemon_send_message(ctdb, ctdb->pnn,
2405 CTDB_SRVID_RELEASE_IP, data);
2406 talloc_free(data.dptr);
2409 ctdb_vnn_unassign_iface(ctdb, vnn);
2410 vnn->update_in_flight = false;
2414 DEBUG(DEBUG_NOTICE,(__location__ " Released %d public IPs\n", count));
2419 get list of public IPs
2421 int32_t ctdb_control_get_public_ips(struct ctdb_context *ctdb,
2422 struct ctdb_req_control_old *c, TDB_DATA *outdata)
2425 struct ctdb_public_ip_list_old *ips;
2426 struct ctdb_vnn *vnn;
2427 bool only_available = false;
2429 if (c->flags & CTDB_PUBLIC_IP_FLAGS_ONLY_AVAILABLE) {
2430 only_available = true;
2433 /* count how many public ip structures we have */
2435 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2439 len = offsetof(struct ctdb_public_ip_list_old, ips) +
2440 num*sizeof(struct ctdb_public_ip);
2441 ips = talloc_zero_size(outdata, len);
2442 CTDB_NO_MEMORY(ctdb, ips);
2445 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2446 if (only_available && !ctdb_vnn_available(ctdb, vnn)) {
2449 ips->ips[i].pnn = vnn->pnn;
2450 ips->ips[i].addr = vnn->public_address;
2454 len = offsetof(struct ctdb_public_ip_list_old, ips) +
2455 i*sizeof(struct ctdb_public_ip);
2457 outdata->dsize = len;
2458 outdata->dptr = (uint8_t *)ips;
2464 int32_t ctdb_control_get_public_ip_info(struct ctdb_context *ctdb,
2465 struct ctdb_req_control_old *c,
2470 ctdb_sock_addr *addr;
2471 struct ctdb_public_ip_info_old *info;
2472 struct ctdb_vnn *vnn;
2474 addr = (ctdb_sock_addr *)indata.dptr;
2476 vnn = find_public_ip_vnn(ctdb, addr);
2478 /* if it is not a public ip it could be our 'single ip' */
2479 if (ctdb->single_ip_vnn) {
2480 if (ctdb_same_ip(&ctdb->single_ip_vnn->public_address, addr)) {
2481 vnn = ctdb->single_ip_vnn;
2486 DEBUG(DEBUG_ERR,(__location__ " Could not get public ip info, "
2487 "'%s'not a public address\n",
2488 ctdb_addr_to_str(addr)));
2492 /* count how many public ip structures we have */
2494 for (;vnn->ifaces[num];) {
2498 len = offsetof(struct ctdb_public_ip_info_old, ifaces) +
2499 num*sizeof(struct ctdb_iface);
2500 info = talloc_zero_size(outdata, len);
2501 CTDB_NO_MEMORY(ctdb, info);
2503 info->ip.addr = vnn->public_address;
2504 info->ip.pnn = vnn->pnn;
2505 info->active_idx = 0xFFFFFFFF;
2507 for (i=0; vnn->ifaces[i]; i++) {
2508 struct ctdb_interface *cur;
2510 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
2512 DEBUG(DEBUG_CRIT, (__location__ " internal error iface[%s] unknown\n",
2516 if (vnn->iface == cur) {
2517 info->active_idx = i;
2519 strncpy(info->ifaces[i].name, cur->name,
2520 sizeof(info->ifaces[i].name));
2521 info->ifaces[i].name[sizeof(info->ifaces[i].name)-1] = '\0';
2522 info->ifaces[i].link_state = cur->link_up;
2523 info->ifaces[i].references = cur->references;
2526 len = offsetof(struct ctdb_public_ip_info_old, ifaces) +
2527 i*sizeof(struct ctdb_iface);
2529 outdata->dsize = len;
2530 outdata->dptr = (uint8_t *)info;
2535 int32_t ctdb_control_get_ifaces(struct ctdb_context *ctdb,
2536 struct ctdb_req_control_old *c,
2540 struct ctdb_iface_list_old *ifaces;
2541 struct ctdb_interface *cur;
2543 /* count how many public ip structures we have */
2545 for (cur=ctdb->ifaces;cur;cur=cur->next) {
2549 len = offsetof(struct ctdb_iface_list_old, ifaces) +
2550 num*sizeof(struct ctdb_iface);
2551 ifaces = talloc_zero_size(outdata, len);
2552 CTDB_NO_MEMORY(ctdb, ifaces);
2555 for (cur=ctdb->ifaces;cur;cur=cur->next) {
2556 strncpy(ifaces->ifaces[i].name, cur->name,
2557 sizeof(ifaces->ifaces[i].name));
2558 ifaces->ifaces[i].name[sizeof(ifaces->ifaces[i].name)-1] = '\0';
2559 ifaces->ifaces[i].link_state = cur->link_up;
2560 ifaces->ifaces[i].references = cur->references;
2564 len = offsetof(struct ctdb_iface_list_old, ifaces) +
2565 i*sizeof(struct ctdb_iface);
2567 outdata->dsize = len;
2568 outdata->dptr = (uint8_t *)ifaces;
2573 int32_t ctdb_control_set_iface_link(struct ctdb_context *ctdb,
2574 struct ctdb_req_control_old *c,
2577 struct ctdb_iface *info;
2578 struct ctdb_interface *iface;
2579 bool link_up = false;
2581 info = (struct ctdb_iface *)indata.dptr;
2583 if (info->name[CTDB_IFACE_SIZE] != '\0') {
2584 int len = strnlen(info->name, CTDB_IFACE_SIZE);
2585 DEBUG(DEBUG_ERR, (__location__ " name[%*.*s] not terminated\n",
2586 len, len, info->name));
2590 switch (info->link_state) {
2598 DEBUG(DEBUG_ERR, (__location__ " link_state[%u] invalid\n",
2599 (unsigned int)info->link_state));
2603 if (info->references != 0) {
2604 DEBUG(DEBUG_ERR, (__location__ " references[%u] should be 0\n",
2605 (unsigned int)info->references));
2609 iface = ctdb_find_iface(ctdb, info->name);
2610 if (iface == NULL) {
2614 if (link_up == iface->link_up) {
2618 DEBUG(iface->link_up?DEBUG_ERR:DEBUG_NOTICE,
2619 ("iface[%s] has changed it's link status %s => %s\n",
2621 iface->link_up?"up":"down",
2622 link_up?"up":"down"));
2624 iface->link_up = link_up;
2630 structure containing the listening socket and the list of tcp connections
2631 that the ctdb daemon is to kill
2633 struct ctdb_kill_tcp {
2634 struct ctdb_vnn *vnn;
2635 struct ctdb_context *ctdb;
2637 struct tevent_fd *fde;
2638 trbt_tree_t *connections;
2643 a tcp connection that is to be killed
2645 struct ctdb_killtcp_con {
2646 ctdb_sock_addr src_addr;
2647 ctdb_sock_addr dst_addr;
2649 struct ctdb_kill_tcp *killtcp;
2652 /* this function is used to create a key to represent this socketpair
2653 in the killtcp tree.
2654 this key is used to insert and lookup matching socketpairs that are
2655 to be tickled and RST
2657 #define KILLTCP_KEYLEN 10
2658 static uint32_t *killtcp_key(ctdb_sock_addr *src, ctdb_sock_addr *dst)
2660 static uint32_t key[KILLTCP_KEYLEN];
2662 bzero(key, sizeof(key));
2664 if (src->sa.sa_family != dst->sa.sa_family) {
2665 DEBUG(DEBUG_ERR, (__location__ " ERROR, different families passed :%u vs %u\n", src->sa.sa_family, dst->sa.sa_family));
2669 switch (src->sa.sa_family) {
2671 key[0] = dst->ip.sin_addr.s_addr;
2672 key[1] = src->ip.sin_addr.s_addr;
2673 key[2] = dst->ip.sin_port;
2674 key[3] = src->ip.sin_port;
2677 uint32_t *dst6_addr32 =
2678 (uint32_t *)&(dst->ip6.sin6_addr.s6_addr);
2679 uint32_t *src6_addr32 =
2680 (uint32_t *)&(src->ip6.sin6_addr.s6_addr);
2681 key[0] = dst6_addr32[3];
2682 key[1] = src6_addr32[3];
2683 key[2] = dst6_addr32[2];
2684 key[3] = src6_addr32[2];
2685 key[4] = dst6_addr32[1];
2686 key[5] = src6_addr32[1];
2687 key[6] = dst6_addr32[0];
2688 key[7] = src6_addr32[0];
2689 key[8] = dst->ip6.sin6_port;
2690 key[9] = src->ip6.sin6_port;
2694 DEBUG(DEBUG_ERR, (__location__ " ERROR, unknown family passed :%u\n", src->sa.sa_family));
2702 called when we get a read event on the raw socket
2704 static void capture_tcp_handler(struct tevent_context *ev,
2705 struct tevent_fd *fde,
2706 uint16_t flags, void *private_data)
2708 struct ctdb_kill_tcp *killtcp = talloc_get_type(private_data, struct ctdb_kill_tcp);
2709 struct ctdb_killtcp_con *con;
2710 ctdb_sock_addr src, dst;
2711 uint32_t ack_seq, seq;
2713 if (!(flags & TEVENT_FD_READ)) {
2717 if (ctdb_sys_read_tcp_packet(killtcp->capture_fd,
2718 killtcp->private_data,
2720 &ack_seq, &seq) != 0) {
2721 /* probably a non-tcp ACK packet */
2725 /* check if we have this guy in our list of connections
2728 con = trbt_lookuparray32(killtcp->connections,
2729 KILLTCP_KEYLEN, killtcp_key(&src, &dst));
2731 /* no this was some other packet we can just ignore */
2735 /* This one has been tickled !
2736 now reset him and remove him from the list.
2738 DEBUG(DEBUG_INFO, ("sending a tcp reset to kill connection :%d -> %s:%d\n",
2739 ntohs(con->dst_addr.ip.sin_port),
2740 ctdb_addr_to_str(&con->src_addr),
2741 ntohs(con->src_addr.ip.sin_port)));
2743 ctdb_sys_send_tcp(&con->dst_addr, &con->src_addr, ack_seq, seq, 1);
2748 /* when traversing the list of all tcp connections to send tickle acks to
2749 (so that we can capture the ack coming back and kill the connection
2751 this callback is called for each connection we are currently trying to kill
2753 static int tickle_connection_traverse(void *param, void *data)
2755 struct ctdb_killtcp_con *con = talloc_get_type(data, struct ctdb_killtcp_con);
2757 /* have tried too many times, just give up */
2758 if (con->count >= 5) {
2759 /* can't delete in traverse: reparent to delete_cons */
2760 talloc_steal(param, con);
2764 /* othervise, try tickling it again */
2767 (ctdb_sock_addr *)&con->dst_addr,
2768 (ctdb_sock_addr *)&con->src_addr,
2775 called every second until all sentenced connections have been reset
2777 static void ctdb_tickle_sentenced_connections(struct tevent_context *ev,
2778 struct tevent_timer *te,
2779 struct timeval t, void *private_data)
2781 struct ctdb_kill_tcp *killtcp = talloc_get_type(private_data, struct ctdb_kill_tcp);
2782 void *delete_cons = talloc_new(NULL);
2784 /* loop over all connections sending tickle ACKs */
2785 trbt_traversearray32(killtcp->connections, KILLTCP_KEYLEN, tickle_connection_traverse, delete_cons);
2787 /* now we've finished traverse, it's safe to do deletion. */
2788 talloc_free(delete_cons);
2790 /* If there are no more connections to kill we can remove the
2791 entire killtcp structure
2793 if ( (killtcp->connections == NULL) ||
2794 (killtcp->connections->root == NULL) ) {
2795 talloc_free(killtcp);
2799 /* try tickling them again in a seconds time
2801 tevent_add_timer(killtcp->ctdb->ev, killtcp,
2802 timeval_current_ofs(1, 0),
2803 ctdb_tickle_sentenced_connections, killtcp);
2807 destroy the killtcp structure
2809 static int ctdb_killtcp_destructor(struct ctdb_kill_tcp *killtcp)
2811 struct ctdb_vnn *tmpvnn;
2813 /* verify that this vnn is still active */
2814 for (tmpvnn = killtcp->ctdb->vnn; tmpvnn; tmpvnn = tmpvnn->next) {
2815 if (tmpvnn == killtcp->vnn) {
2820 if (tmpvnn == NULL) {
2824 if (killtcp->vnn->killtcp != killtcp) {
2828 killtcp->vnn->killtcp = NULL;
2834 /* nothing fancy here, just unconditionally replace any existing
2835 connection structure with the new one.
2837 don't even free the old one if it did exist, that one is talloc_stolen
2838 by the same node in the tree anyway and will be deleted when the new data
2841 static void *add_killtcp_callback(void *parm, void *data)
2847 add a tcp socket to the list of connections we want to RST
2849 static int ctdb_killtcp_add_connection(struct ctdb_context *ctdb,
2853 ctdb_sock_addr src, dst;
2854 struct ctdb_kill_tcp *killtcp;
2855 struct ctdb_killtcp_con *con;
2856 struct ctdb_vnn *vnn;
2858 ctdb_canonicalize_ip(s, &src);
2859 ctdb_canonicalize_ip(d, &dst);
2861 vnn = find_public_ip_vnn(ctdb, &dst);
2863 vnn = find_public_ip_vnn(ctdb, &src);
2866 /* if it is not a public ip it could be our 'single ip' */
2867 if (ctdb->single_ip_vnn) {
2868 if (ctdb_same_ip(&ctdb->single_ip_vnn->public_address, &dst)) {
2869 vnn = ctdb->single_ip_vnn;
2874 DEBUG(DEBUG_ERR,(__location__ " Could not killtcp, not a public address\n"));
2878 killtcp = vnn->killtcp;
2880 /* If this is the first connection to kill we must allocate
2883 if (killtcp == NULL) {
2884 killtcp = talloc_zero(vnn, struct ctdb_kill_tcp);
2885 CTDB_NO_MEMORY(ctdb, killtcp);
2888 killtcp->ctdb = ctdb;
2889 killtcp->capture_fd = -1;
2890 killtcp->connections = trbt_create(killtcp, 0);
2892 vnn->killtcp = killtcp;
2893 talloc_set_destructor(killtcp, ctdb_killtcp_destructor);
2898 /* create a structure that describes this connection we want to
2899 RST and store it in killtcp->connections
2901 con = talloc(killtcp, struct ctdb_killtcp_con);
2902 CTDB_NO_MEMORY(ctdb, con);
2903 con->src_addr = src;
2904 con->dst_addr = dst;
2906 con->killtcp = killtcp;
2909 trbt_insertarray32_callback(killtcp->connections,
2910 KILLTCP_KEYLEN, killtcp_key(&con->dst_addr, &con->src_addr),
2911 add_killtcp_callback, con);
2914 If we don't have a socket to listen on yet we must create it
2916 if (killtcp->capture_fd == -1) {
2917 const char *iface = ctdb_vnn_iface_string(vnn);
2918 killtcp->capture_fd = ctdb_sys_open_capture_socket(iface, &killtcp->private_data);
2919 if (killtcp->capture_fd == -1) {
2920 DEBUG(DEBUG_CRIT,(__location__ " Failed to open capturing "
2921 "socket on iface '%s' for killtcp (%s)\n",
2922 iface, strerror(errno)));
2928 if (killtcp->fde == NULL) {
2929 killtcp->fde = tevent_add_fd(ctdb->ev, killtcp,
2930 killtcp->capture_fd,
2932 capture_tcp_handler, killtcp);
2933 tevent_fd_set_auto_close(killtcp->fde);
2935 /* We also need to set up some events to tickle all these connections
2936 until they are all reset
2938 tevent_add_timer(ctdb->ev, killtcp, timeval_current_ofs(1, 0),
2939 ctdb_tickle_sentenced_connections, killtcp);
2942 /* tickle him once now */
2951 talloc_free(vnn->killtcp);
2952 vnn->killtcp = NULL;
2957 kill a TCP connection.
2959 int32_t ctdb_control_kill_tcp(struct ctdb_context *ctdb, TDB_DATA indata)
2961 struct ctdb_connection *killtcp = (struct ctdb_connection *)indata.dptr;
2963 return ctdb_killtcp_add_connection(ctdb, &killtcp->src, &killtcp->dst);
2967 called by a daemon to inform us of the entire list of TCP tickles for
2968 a particular public address.
2969 this control should only be sent by the node that is currently serving
2970 that public address.
2972 int32_t ctdb_control_set_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata)
2974 struct ctdb_tickle_list_old *list = (struct ctdb_tickle_list_old *)indata.dptr;
2975 struct ctdb_tcp_array *tcparray;
2976 struct ctdb_vnn *vnn;
2978 /* We must at least have tickles.num or else we cant verify the size
2979 of the received data blob
2981 if (indata.dsize < offsetof(struct ctdb_tickle_list_old, connections)) {
2982 DEBUG(DEBUG_ERR,("Bad indata in ctdb_tickle_list. Not enough data for the tickle.num field\n"));
2986 /* verify that the size of data matches what we expect */
2987 if (indata.dsize < offsetof(struct ctdb_tickle_list_old, connections)
2988 + sizeof(struct ctdb_connection) * list->num) {
2989 DEBUG(DEBUG_ERR,("Bad indata in ctdb_tickle_list\n"));
2993 DEBUG(DEBUG_INFO, ("Received tickle update for public address %s\n",
2994 ctdb_addr_to_str(&list->addr)));
2996 vnn = find_public_ip_vnn(ctdb, &list->addr);
2998 DEBUG(DEBUG_INFO,(__location__ " Could not set tcp tickle list, '%s' is not a public address\n",
2999 ctdb_addr_to_str(&list->addr)));
3004 if (vnn->pnn == ctdb->pnn) {
3006 ("Ignoring redundant set tcp tickle list, this node hosts '%s'\n",
3007 ctdb_addr_to_str(&list->addr)));
3011 /* remove any old ticklelist we might have */
3012 talloc_free(vnn->tcp_array);
3013 vnn->tcp_array = NULL;
3015 tcparray = talloc(vnn, struct ctdb_tcp_array);
3016 CTDB_NO_MEMORY(ctdb, tcparray);
3018 tcparray->num = list->num;
3020 tcparray->connections = talloc_array(tcparray, struct ctdb_connection, tcparray->num);
3021 CTDB_NO_MEMORY(ctdb, tcparray->connections);
3023 memcpy(tcparray->connections, &list->connections[0],
3024 sizeof(struct ctdb_connection)*tcparray->num);
3026 /* We now have a new fresh tickle list array for this vnn */
3027 vnn->tcp_array = tcparray;
3033 called to return the full list of tickles for the puclic address associated
3034 with the provided vnn
3036 int32_t ctdb_control_get_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata)
3038 ctdb_sock_addr *addr = (ctdb_sock_addr *)indata.dptr;
3039 struct ctdb_tickle_list_old *list;
3040 struct ctdb_tcp_array *tcparray;
3042 struct ctdb_vnn *vnn;
3044 vnn = find_public_ip_vnn(ctdb, addr);
3046 DEBUG(DEBUG_ERR,(__location__ " Could not get tcp tickle list, '%s' is not a public address\n",
3047 ctdb_addr_to_str(addr)));
3052 tcparray = vnn->tcp_array;
3054 num = tcparray->num;
3059 outdata->dsize = offsetof(struct ctdb_tickle_list_old, connections)
3060 + sizeof(struct ctdb_connection) * num;
3062 outdata->dptr = talloc_size(outdata, outdata->dsize);
3063 CTDB_NO_MEMORY(ctdb, outdata->dptr);
3064 list = (struct ctdb_tickle_list_old *)outdata->dptr;
3069 memcpy(&list->connections[0], tcparray->connections,
3070 sizeof(struct ctdb_connection) * num);
3078 set the list of all tcp tickles for a public address
3080 static int ctdb_send_set_tcp_tickles_for_ip(struct ctdb_context *ctdb,
3081 ctdb_sock_addr *addr,
3082 struct ctdb_tcp_array *tcparray)
3086 struct ctdb_tickle_list_old *list;
3089 num = tcparray->num;
3094 data.dsize = offsetof(struct ctdb_tickle_list_old, connections) +
3095 sizeof(struct ctdb_connection) * num;
3096 data.dptr = talloc_size(ctdb, data.dsize);
3097 CTDB_NO_MEMORY(ctdb, data.dptr);
3099 list = (struct ctdb_tickle_list_old *)data.dptr;
3103 memcpy(&list->connections[0], tcparray->connections, sizeof(struct ctdb_connection) * num);
3106 ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_ALL, 0,
3107 CTDB_CONTROL_SET_TCP_TICKLE_LIST,
3108 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
3110 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set tcp tickles failed\n"));
3114 talloc_free(data.dptr);
3121 perform tickle updates if required
3123 static void ctdb_update_tcp_tickles(struct tevent_context *ev,
3124 struct tevent_timer *te,
3125 struct timeval t, void *private_data)
3127 struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
3129 struct ctdb_vnn *vnn;
3131 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
3132 /* we only send out updates for public addresses that
3135 if (ctdb->pnn != vnn->pnn) {
3138 /* We only send out the updates if we need to */
3139 if (!vnn->tcp_update_needed) {
3142 ret = ctdb_send_set_tcp_tickles_for_ip(ctdb,
3143 &vnn->public_address,
3146 DEBUG(DEBUG_ERR,("Failed to send the tickle update for public address %s\n",
3147 ctdb_addr_to_str(&vnn->public_address)));
3150 ("Sent tickle update for public address %s\n",
3151 ctdb_addr_to_str(&vnn->public_address)));
3152 vnn->tcp_update_needed = false;
3156 tevent_add_timer(ctdb->ev, ctdb->tickle_update_context,
3157 timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
3158 ctdb_update_tcp_tickles, ctdb);
3162 start periodic update of tcp tickles
3164 void ctdb_start_tcp_tickle_update(struct ctdb_context *ctdb)
3166 ctdb->tickle_update_context = talloc_new(ctdb);
3168 tevent_add_timer(ctdb->ev, ctdb->tickle_update_context,
3169 timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
3170 ctdb_update_tcp_tickles, ctdb);
3176 struct control_gratious_arp {
3177 struct ctdb_context *ctdb;
3178 ctdb_sock_addr addr;
3184 send a control_gratuitous arp
3186 static void send_gratious_arp(struct tevent_context *ev,
3187 struct tevent_timer *te,
3188 struct timeval t, void *private_data)
3191 struct control_gratious_arp *arp = talloc_get_type(private_data,
3192 struct control_gratious_arp);
3194 ret = ctdb_sys_send_arp(&arp->addr, arp->iface);
3196 DEBUG(DEBUG_ERR,(__location__ " sending of gratious arp on iface '%s' failed (%s)\n",
3197 arp->iface, strerror(errno)));
3202 if (arp->count == CTDB_ARP_REPEAT) {
3207 tevent_add_timer(arp->ctdb->ev, arp,
3208 timeval_current_ofs(CTDB_ARP_INTERVAL, 0),
3209 send_gratious_arp, arp);
3216 int32_t ctdb_control_send_gratious_arp(struct ctdb_context *ctdb, TDB_DATA indata)
3218 struct ctdb_addr_info_old *gratious_arp = (struct ctdb_addr_info_old *)indata.dptr;
3219 struct control_gratious_arp *arp;
3221 /* verify the size of indata */
3222 if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
3223 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_gratious_arp structure. Got %u require %u bytes\n",
3224 (unsigned)indata.dsize,
3225 (unsigned)offsetof(struct ctdb_addr_info_old, iface)));
3229 ( offsetof(struct ctdb_addr_info_old, iface)
3230 + gratious_arp->len ) ){
3232 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
3233 "but should be %u bytes\n",
3234 (unsigned)indata.dsize,
3235 (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+gratious_arp->len)));
3240 arp = talloc(ctdb, struct control_gratious_arp);
3241 CTDB_NO_MEMORY(ctdb, arp);
3244 arp->addr = gratious_arp->addr;
3245 arp->iface = talloc_strdup(arp, gratious_arp->iface);
3246 CTDB_NO_MEMORY(ctdb, arp->iface);
3249 tevent_add_timer(arp->ctdb->ev, arp,
3250 timeval_zero(), send_gratious_arp, arp);
3255 int32_t ctdb_control_add_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
3257 struct ctdb_addr_info_old *pub = (struct ctdb_addr_info_old *)indata.dptr;
3260 /* verify the size of indata */
3261 if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
3262 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_addr_info structure\n"));
3266 ( offsetof(struct ctdb_addr_info_old, iface)
3269 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
3270 "but should be %u bytes\n",
3271 (unsigned)indata.dsize,
3272 (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+pub->len)));
3276 DEBUG(DEBUG_NOTICE,("Add IP %s\n", ctdb_addr_to_str(&pub->addr)));
3278 ret = ctdb_add_public_address(ctdb, &pub->addr, pub->mask, &pub->iface[0], true);
3281 DEBUG(DEBUG_ERR,(__location__ " Failed to add public address\n"));
3288 struct delete_ip_callback_state {
3289 struct ctdb_req_control_old *c;
3293 called when releaseip event finishes for del_public_address
3295 static void delete_ip_callback(struct ctdb_context *ctdb,
3296 int32_t status, TDB_DATA data,
3297 const char *errormsg,
3300 struct delete_ip_callback_state *state =
3301 talloc_get_type(private_data, struct delete_ip_callback_state);
3303 /* If release failed then fail. */
3304 ctdb_request_control_reply(ctdb, state->c, NULL, status, errormsg);
3305 talloc_free(private_data);
3308 int32_t ctdb_control_del_public_address(struct ctdb_context *ctdb,
3309 struct ctdb_req_control_old *c,
3310 TDB_DATA indata, bool *async_reply)
3312 struct ctdb_addr_info_old *pub = (struct ctdb_addr_info_old *)indata.dptr;
3313 struct ctdb_vnn *vnn;
3315 /* verify the size of indata */
3316 if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
3317 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_addr_info structure\n"));
3321 ( offsetof(struct ctdb_addr_info_old, iface)
3324 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
3325 "but should be %u bytes\n",
3326 (unsigned)indata.dsize,
3327 (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+pub->len)));
3331 DEBUG(DEBUG_NOTICE,("Delete IP %s\n", ctdb_addr_to_str(&pub->addr)));
3333 /* walk over all public addresses until we find a match */
3334 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
3335 if (ctdb_same_ip(&vnn->public_address, &pub->addr)) {
3336 if (vnn->pnn == ctdb->pnn) {
3337 struct delete_ip_callback_state *state;
3338 struct ctdb_public_ip *ip;
3342 vnn->delete_pending = true;
3344 state = talloc(ctdb,
3345 struct delete_ip_callback_state);
3346 CTDB_NO_MEMORY(ctdb, state);
3349 ip = talloc(state, struct ctdb_public_ip);
3352 (__location__ " Out of memory\n"));
3357 ip->addr = pub->addr;
3359 data.dsize = sizeof(struct ctdb_public_ip);
3360 data.dptr = (unsigned char *)ip;
3362 ret = ctdb_daemon_send_control(ctdb,
3365 CTDB_CONTROL_RELEASE_IP,
3372 (__location__ "Unable to send "
3373 "CTDB_CONTROL_RELEASE_IP\n"));
3378 state->c = talloc_steal(state, c);
3379 *async_reply = true;
3381 /* This IP is not hosted on the
3382 * current node so just delete it
3384 do_delete_ip(ctdb, vnn);
3391 DEBUG(DEBUG_ERR,("Delete IP of unknown public IP address %s\n",
3392 ctdb_addr_to_str(&pub->addr)));
3397 struct ipreallocated_callback_state {
3398 struct ctdb_req_control_old *c;
3401 static void ctdb_ipreallocated_callback(struct ctdb_context *ctdb,
3402 int status, void *p)
3404 struct ipreallocated_callback_state *state =
3405 talloc_get_type(p, struct ipreallocated_callback_state);
3409 (" \"ipreallocated\" event script failed (status %d)\n",
3411 if (status == -ETIME) {
3412 ctdb_ban_self(ctdb);
3416 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
3420 /* A control to run the ipreallocated event */
3421 int32_t ctdb_control_ipreallocated(struct ctdb_context *ctdb,
3422 struct ctdb_req_control_old *c,
3426 struct ipreallocated_callback_state *state;
3428 state = talloc(ctdb, struct ipreallocated_callback_state);
3429 CTDB_NO_MEMORY(ctdb, state);
3431 DEBUG(DEBUG_INFO,(__location__ " Running \"ipreallocated\" event\n"));
3433 ret = ctdb_event_script_callback(ctdb, state,
3434 ctdb_ipreallocated_callback, state,
3435 CTDB_EVENT_IPREALLOCATED,
3439 DEBUG(DEBUG_ERR,("Failed to run \"ipreallocated\" event \n"));
3444 /* tell the control that we will be reply asynchronously */
3445 state->c = talloc_steal(state, c);
3446 *async_reply = true;
3452 /* This function is called from the recovery daemon to verify that a remote
3453 node has the expected ip allocation.
3454 This is verified against ctdb->ip_tree
3456 static int verify_remote_ip_allocation(struct ctdb_context *ctdb,
3457 struct ctdb_public_ip_list *ips,
3460 struct public_ip_list *tmp_ip;
3463 if (ctdb->ip_tree == NULL) {
3464 /* don't know the expected allocation yet, assume remote node
3473 for (i=0; i<ips->num; i++) {
3474 tmp_ip = trbt_lookuparray32(ctdb->ip_tree, IP_KEYLEN, ip_key(&ips->ip[i].addr));
3475 if (tmp_ip == NULL) {
3476 DEBUG(DEBUG_ERR,("Node %u has new or unknown public IP %s\n", pnn, ctdb_addr_to_str(&ips->ip[i].addr)));
3480 if (tmp_ip->pnn == -1 || ips->ip[i].pnn == -1) {
3484 if (tmp_ip->pnn != ips->ip[i].pnn) {
3486 ("Inconsistent IP allocation - node %u thinks %s is held by node %u while it is assigned to node %u\n",
3488 ctdb_addr_to_str(&ips->ip[i].addr),
3489 ips->ip[i].pnn, tmp_ip->pnn));
3497 int update_ip_assignment_tree(struct ctdb_context *ctdb, struct ctdb_public_ip *ip)
3499 struct public_ip_list *tmp_ip;
3501 /* IP tree is never built if DisableIPFailover is set */
3502 if (ctdb->tunable.disable_ip_failover != 0) {
3506 if (ctdb->ip_tree == NULL) {
3507 DEBUG(DEBUG_ERR,("No ctdb->ip_tree yet. Failed to update ip assignment\n"));
3511 tmp_ip = trbt_lookuparray32(ctdb->ip_tree, IP_KEYLEN, ip_key(&ip->addr));
3512 if (tmp_ip == NULL) {
3513 DEBUG(DEBUG_ERR,(__location__ " Could not find record for address %s, update ip\n", ctdb_addr_to_str(&ip->addr)));
3517 DEBUG(DEBUG_NOTICE,("Updated ip assignment tree for ip : %s from node %u to node %u\n", ctdb_addr_to_str(&ip->addr), tmp_ip->pnn, ip->pnn));
3518 tmp_ip->pnn = ip->pnn;
3523 void clear_ip_assignment_tree(struct ctdb_context *ctdb)
3525 TALLOC_FREE(ctdb->ip_tree);
3528 struct ctdb_reloadips_handle {
3529 struct ctdb_context *ctdb;
3530 struct ctdb_req_control_old *c;
3534 struct tevent_fd *fde;
3537 static int ctdb_reloadips_destructor(struct ctdb_reloadips_handle *h)
3539 if (h == h->ctdb->reload_ips) {
3540 h->ctdb->reload_ips = NULL;
3543 ctdb_request_control_reply(h->ctdb, h->c, NULL, h->status, NULL);
3546 ctdb_kill(h->ctdb, h->child, SIGKILL);
3550 static void ctdb_reloadips_timeout_event(struct tevent_context *ev,
3551 struct tevent_timer *te,
3552 struct timeval t, void *private_data)
3554 struct ctdb_reloadips_handle *h = talloc_get_type(private_data, struct ctdb_reloadips_handle);
3559 static void ctdb_reloadips_child_handler(struct tevent_context *ev,
3560 struct tevent_fd *fde,
3561 uint16_t flags, void *private_data)
3563 struct ctdb_reloadips_handle *h = talloc_get_type(private_data, struct ctdb_reloadips_handle);
3568 ret = sys_read(h->fd[0], &res, 1);
3569 if (ret < 1 || res != 0) {
3570 DEBUG(DEBUG_ERR, (__location__ " Reloadips child process returned error\n"));
3578 static int ctdb_reloadips_child(struct ctdb_context *ctdb)
3580 TALLOC_CTX *mem_ctx = talloc_new(NULL);
3581 struct ctdb_public_ip_list_old *ips;
3582 struct ctdb_vnn *vnn;
3583 struct client_async_data *async_data;
3584 struct timeval timeout;
3586 struct ctdb_client_control_state *state;
3590 CTDB_NO_MEMORY(ctdb, mem_ctx);
3592 /* Read IPs from local node */
3593 ret = ctdb_ctrl_get_public_ips(ctdb, TAKEOVER_TIMEOUT(),
3594 CTDB_CURRENT_NODE, mem_ctx, &ips);
3597 ("Unable to fetch public IPs from local node\n"));
3598 talloc_free(mem_ctx);
3602 /* Read IPs file - this is safe since this is a child process */
3604 if (ctdb_set_public_addresses(ctdb, false) != 0) {
3605 DEBUG(DEBUG_ERR,("Failed to re-read public addresses file\n"));
3606 talloc_free(mem_ctx);
3610 async_data = talloc_zero(mem_ctx, struct client_async_data);
3611 CTDB_NO_MEMORY(ctdb, async_data);
3613 /* Compare IPs between node and file for IPs to be deleted */
3614 for (i = 0; i < ips->num; i++) {
3616 for (vnn = ctdb->vnn; vnn; vnn = vnn->next) {
3617 if (ctdb_same_ip(&vnn->public_address,
3618 &ips->ips[i].addr)) {
3619 /* IP is still in file */
3625 /* Delete IP ips->ips[i] */
3626 struct ctdb_addr_info_old *pub;
3629 ("IP %s no longer configured, deleting it\n",
3630 ctdb_addr_to_str(&ips->ips[i].addr)));
3632 pub = talloc_zero(mem_ctx, struct ctdb_addr_info_old);
3633 CTDB_NO_MEMORY(ctdb, pub);
3635 pub->addr = ips->ips[i].addr;
3639 timeout = TAKEOVER_TIMEOUT();
3641 data.dsize = offsetof(struct ctdb_addr_info_old,
3643 data.dptr = (uint8_t *)pub;
3645 state = ctdb_control_send(ctdb, CTDB_CURRENT_NODE, 0,
3646 CTDB_CONTROL_DEL_PUBLIC_IP,
3647 0, data, async_data,
3649 if (state == NULL) {
3652 " failed sending CTDB_CONTROL_DEL_PUBLIC_IP\n"));
3656 ctdb_client_async_add(async_data, state);
3660 /* Compare IPs between node and file for IPs to be added */
3662 for (vnn = ctdb->vnn; vnn; vnn = vnn->next) {
3663 for (i = 0; i < ips->num; i++) {
3664 if (ctdb_same_ip(&vnn->public_address,
3665 &ips->ips[i].addr)) {
3666 /* IP already on node */
3670 if (i == ips->num) {
3671 /* Add IP ips->ips[i] */
3672 struct ctdb_addr_info_old *pub;
3673 const char *ifaces = NULL;
3678 ("New IP %s configured, adding it\n",
3679 ctdb_addr_to_str(&vnn->public_address)));
3681 uint32_t pnn = ctdb_get_pnn(ctdb);
3683 data.dsize = sizeof(pnn);
3684 data.dptr = (uint8_t *)&pnn;
3686 ret = ctdb_client_send_message(
3688 CTDB_BROADCAST_CONNECTED,
3689 CTDB_SRVID_REBALANCE_NODE,
3692 DEBUG(DEBUG_WARNING,
3693 ("Failed to send message to force node reallocation - IPs may be unbalanced\n"));
3699 ifaces = vnn->ifaces[0];
3701 while (vnn->ifaces[iface] != NULL) {
3702 ifaces = talloc_asprintf(vnn, "%s,%s", ifaces,
3703 vnn->ifaces[iface]);
3707 len = strlen(ifaces) + 1;
3708 pub = talloc_zero_size(mem_ctx,
3709 offsetof(struct ctdb_addr_info_old, iface) + len);
3710 CTDB_NO_MEMORY(ctdb, pub);
3712 pub->addr = vnn->public_address;
3713 pub->mask = vnn->public_netmask_bits;
3715 memcpy(&pub->iface[0], ifaces, pub->len);
3717 timeout = TAKEOVER_TIMEOUT();
3719 data.dsize = offsetof(struct ctdb_addr_info_old,
3721 data.dptr = (uint8_t *)pub;
3723 state = ctdb_control_send(ctdb, CTDB_CURRENT_NODE, 0,
3724 CTDB_CONTROL_ADD_PUBLIC_IP,
3725 0, data, async_data,
3727 if (state == NULL) {
3730 " failed sending CTDB_CONTROL_ADD_PUBLIC_IP\n"));
3734 ctdb_client_async_add(async_data, state);
3738 if (ctdb_client_async_wait(ctdb, async_data) != 0) {
3739 DEBUG(DEBUG_ERR,(__location__ " Add/delete IPs failed\n"));
3743 talloc_free(mem_ctx);
3747 talloc_free(mem_ctx);
3751 /* This control is sent to force the node to re-read the public addresses file
3752 and drop any addresses we should nnot longer host, and add new addresses
3753 that we are now able to host
3755 int32_t ctdb_control_reload_public_ips(struct ctdb_context *ctdb, struct ctdb_req_control_old *c, bool *async_reply)
3757 struct ctdb_reloadips_handle *h;
3758 pid_t parent = getpid();
3760 if (ctdb->reload_ips != NULL) {
3761 talloc_free(ctdb->reload_ips);
3762 ctdb->reload_ips = NULL;
3765 h = talloc(ctdb, struct ctdb_reloadips_handle);
3766 CTDB_NO_MEMORY(ctdb, h);
3771 if (pipe(h->fd) == -1) {
3772 DEBUG(DEBUG_ERR,("Failed to create pipe for ctdb_freeze_lock\n"));
3777 h->child = ctdb_fork(ctdb);
3778 if (h->child == (pid_t)-1) {
3779 DEBUG(DEBUG_ERR, ("Failed to fork a child for reloadips\n"));
3787 if (h->child == 0) {
3788 signed char res = 0;
3791 debug_extra = talloc_asprintf(NULL, "reloadips:");
3793 prctl_set_comment("ctdb_reloadips");
3794 if (switch_from_server_to_client(ctdb, "reloadips-child") != 0) {
3795 DEBUG(DEBUG_CRIT,("ERROR: Failed to switch reloadips child into client mode\n"));
3798 res = ctdb_reloadips_child(ctdb);
3800 DEBUG(DEBUG_ERR,("Failed to reload ips on local node\n"));
3804 sys_write(h->fd[1], &res, 1);
3805 ctdb_wait_for_process_to_exit(parent);
3809 h->c = talloc_steal(h, c);
3812 set_close_on_exec(h->fd[0]);
3814 talloc_set_destructor(h, ctdb_reloadips_destructor);
3817 h->fde = tevent_add_fd(ctdb->ev, h, h->fd[0], TEVENT_FD_READ,
3818 ctdb_reloadips_child_handler, (void *)h);
3819 tevent_fd_set_auto_close(h->fde);
3821 tevent_add_timer(ctdb->ev, h, timeval_current_ofs(120, 0),
3822 ctdb_reloadips_timeout_event, h);
3824 /* we reply later */
3825 *async_reply = true;