4 Copyright (C) Ronnie Sahlberg 2007
5 Copyright (C) Andrew Tridgell 2007
6 Copyright (C) Martin Schwenke 2011
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, see <http://www.gnu.org/licenses/>.
22 #include "system/network.h"
23 #include "system/filesys.h"
24 #include "system/time.h"
25 #include "system/wait.h"
30 #include "lib/util/dlinklist.h"
31 #include "lib/util/debug.h"
32 #include "lib/util/samba_util.h"
33 #include "lib/util/util_process.h"
35 #include "ctdb_private.h"
36 #include "ctdb_client.h"
38 #include "common/rb_tree.h"
39 #include "common/reqid.h"
40 #include "common/system.h"
41 #include "common/common.h"
42 #include "common/logging.h"
44 #include "server/ipalloc.h"
46 #define TAKEOVER_TIMEOUT() timeval_current_ofs(ctdb->tunable.takeover_timeout,0)
48 #define CTDB_ARP_INTERVAL 1
49 #define CTDB_ARP_REPEAT 3
51 struct ctdb_interface {
52 struct ctdb_interface *prev, *next;
58 /* state associated with a public ip address */
60 struct ctdb_vnn *prev, *next;
62 struct ctdb_interface *iface;
64 ctdb_sock_addr public_address;
65 uint8_t public_netmask_bits;
67 /* the node number that is serving this public address, if any.
68 If no node serves this ip it is set to -1 */
71 /* List of clients to tickle for this public address */
72 struct ctdb_tcp_array *tcp_array;
74 /* whether we need to update the other nodes with changes to our list
75 of connected clients */
76 bool tcp_update_needed;
78 /* a context to hang sending gratious arp events off */
79 TALLOC_CTX *takeover_ctx;
81 /* Set to true any time an update to this VNN is in flight.
82 This helps to avoid races. */
83 bool update_in_flight;
85 /* If CTDB_CONTROL_DEL_PUBLIC_IP is received for this IP
86 * address then this flag is set. It will be deleted in the
87 * release IP callback. */
91 static const char *ctdb_vnn_iface_string(const struct ctdb_vnn *vnn)
94 return vnn->iface->name;
100 static int ctdb_add_local_iface(struct ctdb_context *ctdb, const char *iface)
102 struct ctdb_interface *i;
104 if (strlen(iface) > CTDB_IFACE_SIZE) {
105 DEBUG(DEBUG_ERR, ("Interface name too long \"%s\"\n", iface));
109 /* Verify that we don't have an entry for this ip yet */
110 for (i=ctdb->ifaces;i;i=i->next) {
111 if (strcmp(i->name, iface) == 0) {
116 /* create a new structure for this interface */
117 i = talloc_zero(ctdb, struct ctdb_interface);
118 CTDB_NO_MEMORY_FATAL(ctdb, i);
119 i->name = talloc_strdup(i, iface);
120 CTDB_NO_MEMORY(ctdb, i->name);
124 DLIST_ADD(ctdb->ifaces, i);
129 static bool vnn_has_interface_with_name(struct ctdb_vnn *vnn,
134 for (n = 0; vnn->ifaces[n] != NULL; n++) {
135 if (strcmp(name, vnn->ifaces[n]) == 0) {
143 /* If any interfaces now have no possible IPs then delete them. This
144 * implementation is naive (i.e. simple) rather than clever
145 * (i.e. complex). Given that this is run on delip and that operation
146 * is rare, this doesn't need to be efficient - it needs to be
147 * foolproof. One alternative is reference counting, where the logic
148 * is distributed and can, therefore, be broken in multiple places.
149 * Another alternative is to build a red-black tree of interfaces that
150 * can have addresses (by walking ctdb->vnn once) and then walking
151 * ctdb->ifaces once and deleting those not in the tree. Let's go to
152 * one of those if the naive implementation causes problems... :-)
154 static void ctdb_remove_orphaned_ifaces(struct ctdb_context *ctdb,
155 struct ctdb_vnn *vnn)
157 struct ctdb_interface *i, *next;
159 /* For each interface, check if there's an IP using it. */
160 for (i = ctdb->ifaces; i != NULL; i = next) {
165 /* Only consider interfaces named in the given VNN. */
166 if (!vnn_has_interface_with_name(vnn, i->name)) {
170 /* Search for a vnn with this interface. */
172 for (tv=ctdb->vnn; tv; tv=tv->next) {
173 if (vnn_has_interface_with_name(tv, i->name)) {
180 /* None of the VNNs are using this interface. */
181 DLIST_REMOVE(ctdb->ifaces, i);
188 static struct ctdb_interface *ctdb_find_iface(struct ctdb_context *ctdb,
191 struct ctdb_interface *i;
193 for (i=ctdb->ifaces;i;i=i->next) {
194 if (strcmp(i->name, iface) == 0) {
202 static struct ctdb_interface *ctdb_vnn_best_iface(struct ctdb_context *ctdb,
203 struct ctdb_vnn *vnn)
206 struct ctdb_interface *cur = NULL;
207 struct ctdb_interface *best = NULL;
209 for (i=0; vnn->ifaces[i]; i++) {
211 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
225 if (cur->references < best->references) {
234 static int32_t ctdb_vnn_assign_iface(struct ctdb_context *ctdb,
235 struct ctdb_vnn *vnn)
237 struct ctdb_interface *best = NULL;
240 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
241 "still assigned to iface '%s'\n",
242 ctdb_addr_to_str(&vnn->public_address),
243 ctdb_vnn_iface_string(vnn)));
247 best = ctdb_vnn_best_iface(ctdb, vnn);
249 DEBUG(DEBUG_ERR, (__location__ " public address '%s' "
250 "cannot assign to iface any iface\n",
251 ctdb_addr_to_str(&vnn->public_address)));
257 vnn->pnn = ctdb->pnn;
259 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
260 "now assigned to iface '%s' refs[%d]\n",
261 ctdb_addr_to_str(&vnn->public_address),
262 ctdb_vnn_iface_string(vnn),
267 static void ctdb_vnn_unassign_iface(struct ctdb_context *ctdb,
268 struct ctdb_vnn *vnn)
270 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
271 "now unassigned (old iface '%s' refs[%d])\n",
272 ctdb_addr_to_str(&vnn->public_address),
273 ctdb_vnn_iface_string(vnn),
274 vnn->iface?vnn->iface->references:0));
276 vnn->iface->references--;
279 if (vnn->pnn == ctdb->pnn) {
284 static bool ctdb_vnn_available(struct ctdb_context *ctdb,
285 struct ctdb_vnn *vnn)
289 /* Nodes that are not RUNNING can not host IPs */
290 if (ctdb->runstate != CTDB_RUNSTATE_RUNNING) {
294 if (vnn->delete_pending) {
298 if (vnn->iface && vnn->iface->link_up) {
302 for (i=0; vnn->ifaces[i]; i++) {
303 struct ctdb_interface *cur;
305 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
318 struct ctdb_takeover_arp {
319 struct ctdb_context *ctdb;
322 struct ctdb_tcp_array *tcparray;
323 struct ctdb_vnn *vnn;
328 lists of tcp endpoints
330 struct ctdb_tcp_list {
331 struct ctdb_tcp_list *prev, *next;
332 struct ctdb_connection connection;
336 list of clients to kill on IP release
338 struct ctdb_client_ip {
339 struct ctdb_client_ip *prev, *next;
340 struct ctdb_context *ctdb;
347 send a gratuitous arp
349 static void ctdb_control_send_arp(struct tevent_context *ev,
350 struct tevent_timer *te,
351 struct timeval t, void *private_data)
353 struct ctdb_takeover_arp *arp = talloc_get_type(private_data,
354 struct ctdb_takeover_arp);
356 struct ctdb_tcp_array *tcparray;
357 const char *iface = ctdb_vnn_iface_string(arp->vnn);
359 ret = ctdb_sys_send_arp(&arp->addr, iface);
361 DEBUG(DEBUG_CRIT,(__location__ " sending of arp failed on iface '%s' (%s)\n",
362 iface, strerror(errno)));
365 tcparray = arp->tcparray;
367 for (i=0;i<tcparray->num;i++) {
368 struct ctdb_connection *tcon;
370 tcon = &tcparray->connections[i];
371 DEBUG(DEBUG_INFO,("sending tcp tickle ack for %u->%s:%u\n",
372 (unsigned)ntohs(tcon->dst.ip.sin_port),
373 ctdb_addr_to_str(&tcon->src),
374 (unsigned)ntohs(tcon->src.ip.sin_port)));
375 ret = ctdb_sys_send_tcp(
380 DEBUG(DEBUG_CRIT,(__location__ " Failed to send tcp tickle ack for %s\n",
381 ctdb_addr_to_str(&tcon->src)));
388 if (arp->count == CTDB_ARP_REPEAT) {
393 tevent_add_timer(arp->ctdb->ev, arp->vnn->takeover_ctx,
394 timeval_current_ofs(CTDB_ARP_INTERVAL, 100000),
395 ctdb_control_send_arp, arp);
398 static int32_t ctdb_announce_vnn_iface(struct ctdb_context *ctdb,
399 struct ctdb_vnn *vnn)
401 struct ctdb_takeover_arp *arp;
402 struct ctdb_tcp_array *tcparray;
404 if (!vnn->takeover_ctx) {
405 vnn->takeover_ctx = talloc_new(vnn);
406 if (!vnn->takeover_ctx) {
411 arp = talloc_zero(vnn->takeover_ctx, struct ctdb_takeover_arp);
417 arp->addr = vnn->public_address;
420 tcparray = vnn->tcp_array;
422 /* add all of the known tcp connections for this IP to the
423 list of tcp connections to send tickle acks for */
424 arp->tcparray = talloc_steal(arp, tcparray);
426 vnn->tcp_array = NULL;
427 vnn->tcp_update_needed = true;
430 tevent_add_timer(arp->ctdb->ev, vnn->takeover_ctx,
431 timeval_zero(), ctdb_control_send_arp, arp);
436 struct takeover_callback_state {
437 struct ctdb_req_control_old *c;
438 ctdb_sock_addr *addr;
439 struct ctdb_vnn *vnn;
442 struct ctdb_do_takeip_state {
443 struct ctdb_req_control_old *c;
444 struct ctdb_vnn *vnn;
448 called when takeip event finishes
450 static void ctdb_do_takeip_callback(struct ctdb_context *ctdb, int status,
453 struct ctdb_do_takeip_state *state =
454 talloc_get_type(private_data, struct ctdb_do_takeip_state);
459 if (status == -ETIME) {
462 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
463 ctdb_addr_to_str(&state->vnn->public_address),
464 ctdb_vnn_iface_string(state->vnn)));
465 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
471 if (ctdb->do_checkpublicip) {
473 ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
475 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
482 data.dptr = (uint8_t *)ctdb_addr_to_str(&state->vnn->public_address);
483 data.dsize = strlen((char *)data.dptr) + 1;
484 DEBUG(DEBUG_INFO,(__location__ " sending TAKE_IP for '%s'\n", data.dptr));
486 ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_TAKE_IP, data);
489 /* the control succeeded */
490 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
495 static int ctdb_takeip_destructor(struct ctdb_do_takeip_state *state)
497 state->vnn->update_in_flight = false;
502 take over an ip address
504 static int32_t ctdb_do_takeip(struct ctdb_context *ctdb,
505 struct ctdb_req_control_old *c,
506 struct ctdb_vnn *vnn)
509 struct ctdb_do_takeip_state *state;
511 if (vnn->update_in_flight) {
512 DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u rejected "
513 "update for this IP already in flight\n",
514 ctdb_addr_to_str(&vnn->public_address),
515 vnn->public_netmask_bits));
519 ret = ctdb_vnn_assign_iface(ctdb, vnn);
521 DEBUG(DEBUG_ERR,("Takeover of IP %s/%u failed to "
522 "assign a usable interface\n",
523 ctdb_addr_to_str(&vnn->public_address),
524 vnn->public_netmask_bits));
528 state = talloc(vnn, struct ctdb_do_takeip_state);
529 CTDB_NO_MEMORY(ctdb, state);
531 state->c = talloc_steal(ctdb, c);
534 vnn->update_in_flight = true;
535 talloc_set_destructor(state, ctdb_takeip_destructor);
537 DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u on interface %s\n",
538 ctdb_addr_to_str(&vnn->public_address),
539 vnn->public_netmask_bits,
540 ctdb_vnn_iface_string(vnn)));
542 ret = ctdb_event_script_callback(ctdb,
544 ctdb_do_takeip_callback,
548 ctdb_vnn_iface_string(vnn),
549 ctdb_addr_to_str(&vnn->public_address),
550 vnn->public_netmask_bits);
553 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
554 ctdb_addr_to_str(&vnn->public_address),
555 ctdb_vnn_iface_string(vnn)));
563 struct ctdb_do_updateip_state {
564 struct ctdb_req_control_old *c;
565 struct ctdb_interface *old;
566 struct ctdb_vnn *vnn;
570 called when updateip event finishes
572 static void ctdb_do_updateip_callback(struct ctdb_context *ctdb, int status,
575 struct ctdb_do_updateip_state *state =
576 talloc_get_type(private_data, struct ctdb_do_updateip_state);
580 if (status == -ETIME) {
583 DEBUG(DEBUG_ERR,(__location__ " Failed to move IP %s from interface %s to %s\n",
584 ctdb_addr_to_str(&state->vnn->public_address),
586 ctdb_vnn_iface_string(state->vnn)));
589 * All we can do is reset the old interface
590 * and let the next run fix it
592 ctdb_vnn_unassign_iface(ctdb, state->vnn);
593 state->vnn->iface = state->old;
594 state->vnn->iface->references++;
596 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
601 if (ctdb->do_checkpublicip) {
603 ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
605 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
612 /* the control succeeded */
613 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
618 static int ctdb_updateip_destructor(struct ctdb_do_updateip_state *state)
620 state->vnn->update_in_flight = false;
625 update (move) an ip address
627 static int32_t ctdb_do_updateip(struct ctdb_context *ctdb,
628 struct ctdb_req_control_old *c,
629 struct ctdb_vnn *vnn)
632 struct ctdb_do_updateip_state *state;
633 struct ctdb_interface *old = vnn->iface;
634 const char *new_name;
636 if (vnn->update_in_flight) {
637 DEBUG(DEBUG_NOTICE,("Update of IP %s/%u rejected "
638 "update for this IP already in flight\n",
639 ctdb_addr_to_str(&vnn->public_address),
640 vnn->public_netmask_bits));
644 ctdb_vnn_unassign_iface(ctdb, vnn);
645 ret = ctdb_vnn_assign_iface(ctdb, vnn);
647 DEBUG(DEBUG_ERR,("update of IP %s/%u failed to "
648 "assin a usable interface (old iface '%s')\n",
649 ctdb_addr_to_str(&vnn->public_address),
650 vnn->public_netmask_bits,
655 new_name = ctdb_vnn_iface_string(vnn);
656 if (old->name != NULL && new_name != NULL && !strcmp(old->name, new_name)) {
657 /* A benign update from one interface onto itself.
658 * no need to run the eventscripts in this case, just return
661 ctdb_request_control_reply(ctdb, c, NULL, 0, NULL);
665 state = talloc(vnn, struct ctdb_do_updateip_state);
666 CTDB_NO_MEMORY(ctdb, state);
668 state->c = talloc_steal(ctdb, c);
672 vnn->update_in_flight = true;
673 talloc_set_destructor(state, ctdb_updateip_destructor);
675 DEBUG(DEBUG_NOTICE,("Update of IP %s/%u from "
676 "interface %s to %s\n",
677 ctdb_addr_to_str(&vnn->public_address),
678 vnn->public_netmask_bits,
682 ret = ctdb_event_script_callback(ctdb,
684 ctdb_do_updateip_callback,
686 CTDB_EVENT_UPDATE_IP,
690 ctdb_addr_to_str(&vnn->public_address),
691 vnn->public_netmask_bits);
693 DEBUG(DEBUG_ERR,(__location__ " Failed update IP %s from interface %s to %s\n",
694 ctdb_addr_to_str(&vnn->public_address),
695 old->name, new_name));
704 Find the vnn of the node that has a public ip address
705 returns -1 if the address is not known as a public address
707 static struct ctdb_vnn *find_public_ip_vnn(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
709 struct ctdb_vnn *vnn;
711 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
712 if (ctdb_same_ip(&vnn->public_address, addr)) {
721 take over an ip address
723 int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb,
724 struct ctdb_req_control_old *c,
729 struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
730 struct ctdb_vnn *vnn;
731 bool have_ip = false;
732 bool do_updateip = false;
733 bool do_takeip = false;
734 struct ctdb_interface *best_iface = NULL;
736 if (pip->pnn != ctdb->pnn) {
737 DEBUG(DEBUG_ERR,(__location__" takeoverip called for an ip '%s' "
738 "with pnn %d, but we're node %d\n",
739 ctdb_addr_to_str(&pip->addr),
740 pip->pnn, ctdb->pnn));
744 /* update out vnn list */
745 vnn = find_public_ip_vnn(ctdb, &pip->addr);
747 DEBUG(DEBUG_INFO,("takeoverip called for an ip '%s' that is not a public address\n",
748 ctdb_addr_to_str(&pip->addr)));
752 if (ctdb->tunable.disable_ip_failover == 0 && ctdb->do_checkpublicip) {
753 have_ip = ctdb_sys_have_ip(&pip->addr);
755 best_iface = ctdb_vnn_best_iface(ctdb, vnn);
756 if (best_iface == NULL) {
757 DEBUG(DEBUG_ERR,("takeoverip of IP %s/%u failed to find"
758 "a usable interface (old %s, have_ip %d)\n",
759 ctdb_addr_to_str(&vnn->public_address),
760 vnn->public_netmask_bits,
761 ctdb_vnn_iface_string(vnn),
766 if (vnn->iface == NULL && vnn->pnn == -1 && have_ip && best_iface != NULL) {
767 DEBUG(DEBUG_ERR,("Taking over newly created ip\n"));
772 if (vnn->iface == NULL && have_ip) {
773 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
774 "but we have no interface assigned, has someone manually configured it? Ignore for now.\n",
775 ctdb_addr_to_str(&vnn->public_address)));
779 if (vnn->pnn != ctdb->pnn && have_ip && vnn->pnn != -1) {
780 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
781 "and we have it on iface[%s], but it was assigned to node %d"
782 "and we are node %d, banning ourself\n",
783 ctdb_addr_to_str(&vnn->public_address),
784 ctdb_vnn_iface_string(vnn), vnn->pnn, ctdb->pnn));
789 if (vnn->pnn == -1 && have_ip) {
790 vnn->pnn = ctdb->pnn;
791 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
792 "and we already have it on iface[%s], update local daemon\n",
793 ctdb_addr_to_str(&vnn->public_address),
794 ctdb_vnn_iface_string(vnn)));
799 if (vnn->iface != best_iface) {
800 if (!vnn->iface->link_up) {
802 } else if (vnn->iface->references > (best_iface->references + 1)) {
803 /* only move when the rebalance gains something */
811 ctdb_vnn_unassign_iface(ctdb, vnn);
818 ret = ctdb_do_takeip(ctdb, c, vnn);
822 } else if (do_updateip) {
823 ret = ctdb_do_updateip(ctdb, c, vnn);
829 * The interface is up and the kernel known the ip
832 DEBUG(DEBUG_INFO,("Redundant takeover of IP %s/%u on interface %s (ip already held)\n",
833 ctdb_addr_to_str(&pip->addr),
834 vnn->public_netmask_bits,
835 ctdb_vnn_iface_string(vnn)));
839 /* tell ctdb_control.c that we will be replying asynchronously */
845 static void do_delete_ip(struct ctdb_context *ctdb, struct ctdb_vnn *vnn)
847 DLIST_REMOVE(ctdb->vnn, vnn);
848 ctdb_vnn_unassign_iface(ctdb, vnn);
849 ctdb_remove_orphaned_ifaces(ctdb, vnn);
854 called when releaseip event finishes
856 static void release_ip_callback(struct ctdb_context *ctdb, int status,
859 struct takeover_callback_state *state =
860 talloc_get_type(private_data, struct takeover_callback_state);
863 if (status == -ETIME) {
867 if (ctdb->tunable.disable_ip_failover == 0 && ctdb->do_checkpublicip) {
868 if (ctdb_sys_have_ip(state->addr)) {
870 ("IP %s still hosted during release IP callback, failing\n",
871 ctdb_addr_to_str(state->addr)));
872 ctdb_request_control_reply(ctdb, state->c,
879 /* send a message to all clients of this node telling them
880 that the cluster has been reconfigured and they should
881 release any sockets on this IP */
882 data.dptr = (uint8_t *)talloc_strdup(state, ctdb_addr_to_str(state->addr));
883 CTDB_NO_MEMORY_VOID(ctdb, data.dptr);
884 data.dsize = strlen((char *)data.dptr)+1;
886 DEBUG(DEBUG_INFO,(__location__ " sending RELEASE_IP for '%s'\n", data.dptr));
888 ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_RELEASE_IP, data);
890 ctdb_vnn_unassign_iface(ctdb, state->vnn);
892 /* Process the IP if it has been marked for deletion */
893 if (state->vnn->delete_pending) {
894 do_delete_ip(ctdb, state->vnn);
898 /* the control succeeded */
899 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
903 static int ctdb_releaseip_destructor(struct takeover_callback_state *state)
905 if (state->vnn != NULL) {
906 state->vnn->update_in_flight = false;
912 release an ip address
914 int32_t ctdb_control_release_ip(struct ctdb_context *ctdb,
915 struct ctdb_req_control_old *c,
920 struct takeover_callback_state *state;
921 struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
922 struct ctdb_vnn *vnn;
925 /* update our vnn list */
926 vnn = find_public_ip_vnn(ctdb, &pip->addr);
928 DEBUG(DEBUG_INFO,("releaseip called for an ip '%s' that is not a public address\n",
929 ctdb_addr_to_str(&pip->addr)));
934 /* stop any previous arps */
935 talloc_free(vnn->takeover_ctx);
936 vnn->takeover_ctx = NULL;
938 /* Some ctdb tool commands (e.g. moveip) send
939 * lazy multicast to drop an IP from any node that isn't the
940 * intended new node. The following causes makes ctdbd ignore
941 * a release for any address it doesn't host.
943 if (ctdb->tunable.disable_ip_failover == 0 && ctdb->do_checkpublicip) {
944 if (!ctdb_sys_have_ip(&pip->addr)) {
945 DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u on interface %s (ip not held)\n",
946 ctdb_addr_to_str(&pip->addr),
947 vnn->public_netmask_bits,
948 ctdb_vnn_iface_string(vnn)));
949 ctdb_vnn_unassign_iface(ctdb, vnn);
953 if (vnn->iface == NULL) {
954 DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u (ip not held)\n",
955 ctdb_addr_to_str(&pip->addr),
956 vnn->public_netmask_bits));
961 /* There is a potential race between take_ip and us because we
962 * update the VNN via a callback that run when the
963 * eventscripts have been run. Avoid the race by allowing one
964 * update to be in flight at a time.
966 if (vnn->update_in_flight) {
967 DEBUG(DEBUG_NOTICE,("Release of IP %s/%u rejected "
968 "update for this IP already in flight\n",
969 ctdb_addr_to_str(&vnn->public_address),
970 vnn->public_netmask_bits));
974 iface = strdup(ctdb_vnn_iface_string(vnn));
976 DEBUG(DEBUG_NOTICE,("Release of IP %s/%u on interface %s node:%d\n",
977 ctdb_addr_to_str(&pip->addr),
978 vnn->public_netmask_bits,
982 state = talloc(ctdb, struct takeover_callback_state);
984 ctdb_set_error(ctdb, "Out of memory at %s:%d",
990 state->c = talloc_steal(state, c);
991 state->addr = talloc(state, ctdb_sock_addr);
992 if (state->addr == NULL) {
993 ctdb_set_error(ctdb, "Out of memory at %s:%d",
999 *state->addr = pip->addr;
1002 vnn->update_in_flight = true;
1003 talloc_set_destructor(state, ctdb_releaseip_destructor);
1005 ret = ctdb_event_script_callback(ctdb,
1006 state, release_ip_callback, state,
1007 CTDB_EVENT_RELEASE_IP,
1010 ctdb_addr_to_str(&pip->addr),
1011 vnn->public_netmask_bits);
1014 DEBUG(DEBUG_ERR,(__location__ " Failed to release IP %s on interface %s\n",
1015 ctdb_addr_to_str(&pip->addr),
1016 ctdb_vnn_iface_string(vnn)));
1021 /* tell the control that we will be reply asynchronously */
1022 *async_reply = true;
1026 static int ctdb_add_public_address(struct ctdb_context *ctdb,
1027 ctdb_sock_addr *addr,
1028 unsigned mask, const char *ifaces,
1031 struct ctdb_vnn *vnn;
1038 tmp = strdup(ifaces);
1039 for (iface = strtok(tmp, ","); iface; iface = strtok(NULL, ",")) {
1040 if (!ctdb_sys_check_iface_exists(iface)) {
1041 DEBUG(DEBUG_CRIT,("Interface %s does not exist. Can not add public-address : %s\n", iface, ctdb_addr_to_str(addr)));
1048 /* Verify that we don't have an entry for this ip yet */
1049 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1050 if (ctdb_same_sockaddr(addr, &vnn->public_address)) {
1051 DEBUG(DEBUG_CRIT,("Same ip '%s' specified multiple times in the public address list \n",
1052 ctdb_addr_to_str(addr)));
1057 /* create a new vnn structure for this ip address */
1058 vnn = talloc_zero(ctdb, struct ctdb_vnn);
1059 CTDB_NO_MEMORY_FATAL(ctdb, vnn);
1060 vnn->ifaces = talloc_array(vnn, const char *, num + 2);
1061 tmp = talloc_strdup(vnn, ifaces);
1062 CTDB_NO_MEMORY_FATAL(ctdb, tmp);
1063 for (iface = strtok(tmp, ","); iface; iface = strtok(NULL, ",")) {
1064 vnn->ifaces = talloc_realloc(vnn, vnn->ifaces, const char *, num + 2);
1065 CTDB_NO_MEMORY_FATAL(ctdb, vnn->ifaces);
1066 vnn->ifaces[num] = talloc_strdup(vnn, iface);
1067 CTDB_NO_MEMORY_FATAL(ctdb, vnn->ifaces[num]);
1071 vnn->ifaces[num] = NULL;
1072 vnn->public_address = *addr;
1073 vnn->public_netmask_bits = mask;
1075 if (check_address) {
1076 if (ctdb_sys_have_ip(addr)) {
1077 DEBUG(DEBUG_ERR,("We are already hosting public address '%s'. setting PNN to ourself:%d\n", ctdb_addr_to_str(addr), ctdb->pnn));
1078 vnn->pnn = ctdb->pnn;
1082 for (i=0; vnn->ifaces[i]; i++) {
1083 ret = ctdb_add_local_iface(ctdb, vnn->ifaces[i]);
1085 DEBUG(DEBUG_CRIT, (__location__ " failed to add iface[%s] "
1086 "for public_address[%s]\n",
1087 vnn->ifaces[i], ctdb_addr_to_str(addr)));
1093 DLIST_ADD(ctdb->vnn, vnn);
1099 setup the public address lists from a file
1101 int ctdb_set_public_addresses(struct ctdb_context *ctdb, bool check_addresses)
1107 lines = file_lines_load(ctdb->public_addresses_file, &nlines, 0, ctdb);
1108 if (lines == NULL) {
1109 ctdb_set_error(ctdb, "Failed to load public address list '%s'\n", ctdb->public_addresses_file);
1112 while (nlines > 0 && strcmp(lines[nlines-1], "") == 0) {
1116 for (i=0;i<nlines;i++) {
1118 ctdb_sock_addr addr;
1119 const char *addrstr;
1124 while ((*line == ' ') || (*line == '\t')) {
1130 if (strcmp(line, "") == 0) {
1133 tok = strtok(line, " \t");
1135 tok = strtok(NULL, " \t");
1137 if (NULL == ctdb->default_public_interface) {
1138 DEBUG(DEBUG_CRIT,("No default public interface and no interface specified at line %u of public address list\n",
1143 ifaces = ctdb->default_public_interface;
1148 if (!addrstr || !parse_ip_mask(addrstr, ifaces, &addr, &mask)) {
1149 DEBUG(DEBUG_CRIT,("Badly formed line %u in public address list\n", i+1));
1153 if (ctdb_add_public_address(ctdb, &addr, mask, ifaces, check_addresses)) {
1154 DEBUG(DEBUG_CRIT,("Failed to add line %u to the public address list\n", i+1));
1165 static struct ctdb_public_ip_list *
1166 ctdb_fetch_remote_public_ips(struct ctdb_context *ctdb,
1167 TALLOC_CTX *mem_ctx,
1168 struct ctdb_node_map_old *nodemap,
1169 uint32_t public_ip_flags)
1172 struct ctdb_public_ip_list_old *ip_list;
1173 struct ctdb_public_ip_list *public_ips;
1175 public_ips = talloc_zero_array(mem_ctx,
1176 struct ctdb_public_ip_list,
1178 if (public_ips == NULL) {
1179 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
1183 for (j = 0; j < nodemap->num; j++) {
1184 if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) {
1188 /* Retrieve the list of public IPs from the
1189 * node. Flags says whether it is known or
1191 ret = ctdb_ctrl_get_public_ips_flags(
1192 ctdb, TAKEOVER_TIMEOUT(), j, public_ips,
1193 public_ip_flags, &ip_list);
1196 ("Failed to read public IPs from node: %u\n", j));
1197 talloc_free(public_ips);
1200 public_ips[j].num = ip_list->num;
1201 if (ip_list->num == 0) {
1202 talloc_free(ip_list);
1205 public_ips[j].ip = talloc_zero_array(public_ips,
1206 struct ctdb_public_ip,
1208 if (public_ips[j].ip == NULL) {
1209 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
1210 talloc_free(public_ips);
1213 memcpy(public_ips[j].ip, &ip_list->ips[0],
1214 sizeof(struct ctdb_public_ip) * ip_list->num);
1215 talloc_free(ip_list);
1221 struct get_tunable_callback_data {
1222 const char *tunable;
1227 static void get_tunable_callback(struct ctdb_context *ctdb, uint32_t pnn,
1228 int32_t res, TDB_DATA outdata,
1231 struct get_tunable_callback_data *cd =
1232 (struct get_tunable_callback_data *)callback;
1236 /* Already handled in fail callback */
1240 if (outdata.dsize != sizeof(uint32_t)) {
1241 DEBUG(DEBUG_ERR,("Wrong size of returned data when reading \"%s\" tunable from node %d. Expected %d bytes but received %d bytes\n",
1242 cd->tunable, pnn, (int)sizeof(uint32_t),
1243 (int)outdata.dsize));
1248 size = talloc_array_length(cd->out);
1250 DEBUG(DEBUG_ERR,("Got %s reply from node %d but nodemap only has %d entries\n",
1251 cd->tunable, pnn, size));
1256 cd->out[pnn] = *(uint32_t *)outdata.dptr;
1259 static void get_tunable_fail_callback(struct ctdb_context *ctdb, uint32_t pnn,
1260 int32_t res, TDB_DATA outdata,
1263 struct get_tunable_callback_data *cd =
1264 (struct get_tunable_callback_data *)callback;
1269 ("Timed out getting tunable \"%s\" from node %d\n",
1275 DEBUG(DEBUG_WARNING,
1276 ("Tunable \"%s\" not implemented on node %d\n",
1281 ("Unexpected error getting tunable \"%s\" from node %d\n",
1287 static uint32_t *get_tunable_from_nodes(struct ctdb_context *ctdb,
1288 TALLOC_CTX *tmp_ctx,
1289 struct ctdb_node_map_old *nodemap,
1290 const char *tunable,
1291 uint32_t default_value)
1294 struct ctdb_control_get_tunable *t;
1297 struct get_tunable_callback_data callback_data;
1300 tvals = talloc_array(tmp_ctx, uint32_t, nodemap->num);
1301 CTDB_NO_MEMORY_NULL(ctdb, tvals);
1302 for (i=0; i<nodemap->num; i++) {
1303 tvals[i] = default_value;
1306 callback_data.out = tvals;
1307 callback_data.tunable = tunable;
1308 callback_data.fatal = false;
1310 data.dsize = offsetof(struct ctdb_control_get_tunable, name) + strlen(tunable) + 1;
1311 data.dptr = talloc_size(tmp_ctx, data.dsize);
1312 t = (struct ctdb_control_get_tunable *)data.dptr;
1313 t->length = strlen(tunable)+1;
1314 memcpy(t->name, tunable, t->length);
1315 nodes = list_of_connected_nodes(ctdb, nodemap, tmp_ctx, true);
1316 if (ctdb_client_async_control(ctdb, CTDB_CONTROL_GET_TUNABLE,
1317 nodes, 0, TAKEOVER_TIMEOUT(),
1319 get_tunable_callback,
1320 get_tunable_fail_callback,
1321 &callback_data) != 0) {
1322 if (callback_data.fatal) {
1328 talloc_free(data.dptr);
1333 static struct ctdb_node_map *
1334 ctdb_node_map_old_to_new(TALLOC_CTX *mem_ctx,
1335 const struct ctdb_node_map_old *old)
1337 struct ctdb_node_map *new;
1339 new = talloc(mem_ctx, struct ctdb_node_map);
1341 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
1344 new->num = old->num;
1345 new->node = talloc_zero_array(new,
1346 struct ctdb_node_and_flags, new->num);
1347 memcpy(new->node, &old->nodes[0],
1348 sizeof(struct ctdb_node_and_flags) * new->num);
1354 static bool set_ipflags(struct ctdb_context *ctdb,
1355 struct ipalloc_state *ipalloc_state,
1356 struct ctdb_node_map_old *nodemap)
1358 uint32_t *tval_noiptakeover;
1359 uint32_t *tval_noiphostonalldisabled;
1360 struct ctdb_node_map *new;
1362 tval_noiptakeover = get_tunable_from_nodes(ctdb, ipalloc_state, nodemap,
1364 if (tval_noiptakeover == NULL) {
1368 tval_noiphostonalldisabled =
1369 get_tunable_from_nodes(ctdb, ipalloc_state, nodemap,
1370 "NoIPHostOnAllDisabled", 0);
1371 if (tval_noiphostonalldisabled == NULL) {
1372 /* Caller frees tmp_ctx */
1376 new = ctdb_node_map_old_to_new(ipalloc_state, nodemap);
1381 ipalloc_set_node_flags(ipalloc_state, new,
1383 tval_noiphostonalldisabled);
1385 talloc_free(tval_noiptakeover);
1386 talloc_free(tval_noiphostonalldisabled);
1392 static enum ipalloc_algorithm
1393 determine_algorithm(const struct ctdb_tunable_list *tunables)
1395 if (1 == tunables->lcp2_public_ip_assignment) {
1396 return IPALLOC_LCP2;
1397 } else if (1 == tunables->deterministic_public_ips) {
1398 return IPALLOC_DETERMINISTIC;
1400 return IPALLOC_NONDETERMINISTIC;
1404 struct takeover_callback_data {
1406 unsigned int *fail_count;
1409 static struct takeover_callback_data *
1410 takeover_callback_data_init(TALLOC_CTX *mem_ctx,
1413 static struct takeover_callback_data *takeover_data;
1415 takeover_data = talloc_zero(mem_ctx, struct takeover_callback_data);
1416 if (takeover_data == NULL) {
1417 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
1421 takeover_data->fail_count = talloc_zero_array(takeover_data,
1422 unsigned int, num_nodes);
1423 if (takeover_data->fail_count == NULL) {
1424 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
1425 talloc_free(takeover_data);
1429 takeover_data->num_nodes = num_nodes;
1431 return takeover_data;
1434 static void takeover_run_fail_callback(struct ctdb_context *ctdb,
1435 uint32_t node_pnn, int32_t res,
1436 TDB_DATA outdata, void *callback_data)
1438 struct takeover_callback_data *cd =
1439 talloc_get_type_abort(callback_data,
1440 struct takeover_callback_data);
1442 if (node_pnn >= cd->num_nodes) {
1443 DEBUG(DEBUG_ERR, (__location__ " invalid PNN %u\n", node_pnn));
1447 if (cd->fail_count[node_pnn] == 0) {
1449 ("Node %u failed the takeover run\n", node_pnn));
1452 cd->fail_count[node_pnn]++;
1455 static void takeover_run_process_failures(struct ctdb_context *ctdb,
1456 struct takeover_callback_data *tcd)
1458 unsigned int max_fails = 0;
1459 uint32_t max_pnn = -1;
1462 for (i = 0; i < tcd->num_nodes; i++) {
1463 if (tcd->fail_count[i] > max_fails) {
1465 max_fails = tcd->fail_count[i];
1469 if (max_fails > 0) {
1474 ("Sending banning credits to %u with fail count %u\n",
1475 max_pnn, max_fails));
1477 data.dptr = (uint8_t *)&max_pnn;
1478 data.dsize = sizeof(uint32_t);
1479 ret = ctdb_client_send_message(ctdb,
1480 CTDB_BROADCAST_CONNECTED,
1485 ("Failed to set banning credits for node %u\n",
1492 * Recalculate the allocation of public IPs to nodes and have the
1493 * nodes host their allocated addresses.
1495 * - Initialise IP allocation state. Pass:
1496 + algorithm to be used;
1497 + whether IP rebalancing ("failback") should be done (this uses a
1498 cluster-wide configuration variable and only the value form the
1499 master node is used); and
1500 * + list of nodes to force rebalance (internal structure, currently
1501 * no way to fetch, only used by LCP2 for nodes that have had new
1502 * IP addresses added).
1503 * - Set IP flags for IP allocation based on node map and tunables
1504 * NoIPTakeover/NoIPHostOnAllDisabled from all connected nodes
1505 * (tunable fetching done separately so values can be faked in unit
1507 * - Retrieve known and available IP addresses (done separately so
1508 * values can be faked in unit testing)
1509 * - Use ipalloc_set_public_ips() to set known and available IP
1510 addresses for allocation
1511 * - If cluster can't host IP addresses then early exit
1512 * - Run IP allocation algorithm
1513 * - Send RELEASE_IP to all nodes for IPs they should not host
1514 * - Send TAKE_IP to all nodes for IPs they should host
1515 * - Send IPREALLOCATED to all nodes (with backward compatibility hack)
1517 int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map_old *nodemap,
1518 uint32_t *force_rebalance_nodes)
1521 struct ctdb_public_ip ip;
1523 struct public_ip_list *all_ips, *tmp_ip;
1525 struct timeval timeout;
1526 struct client_async_data *async_data;
1527 struct ctdb_client_control_state *state;
1528 TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
1529 struct ipalloc_state *ipalloc_state;
1530 struct ctdb_public_ip_list *known_ips, *available_ips;
1531 struct takeover_callback_data *takeover_data;
1533 /* Initialise fail callback data to be used with
1534 * takeover_run_fail_callback(). A failure in any of the
1535 * following steps will cause an early return, so this can be
1536 * reused for each of those steps without re-initialising. */
1537 takeover_data = takeover_callback_data_init(tmp_ctx,
1539 if (takeover_data == NULL) {
1540 talloc_free(tmp_ctx);
1544 /* Each of the later stages (RELEASE_IP, TAKEOVER_IP,
1545 * IPREALLOCATED) notionally has a timeout of TakeoverTimeout
1546 * seconds. However, RELEASE_IP can take longer due to TCP
1547 * connection killing, so sometimes needs more time.
1548 * Therefore, use a cumulative timeout of TakeoverTimeout * 3
1549 * seconds across all 3 stages. No explicit expiry checks are
1550 * needed before each stage because tevent is smart enough to
1551 * fire the timeouts even if they are in the past. Initialise
1552 * this here to cope with early jumps to IPREALLOCATED. */
1553 timeout = timeval_current_ofs(3 * ctdb->tunable.takeover_timeout,0);
1556 * ip failover is completely disabled, just send out the
1557 * ipreallocated event.
1559 if (ctdb->tunable.disable_ip_failover != 0) {
1563 ipalloc_state = ipalloc_state_init(tmp_ctx, ctdb->num_nodes,
1564 determine_algorithm(&ctdb->tunable),
1565 (ctdb->tunable.no_ip_failback != 0),
1566 force_rebalance_nodes);
1567 if (ipalloc_state == NULL) {
1568 talloc_free(tmp_ctx);
1572 if (!set_ipflags(ctdb, ipalloc_state, nodemap)) {
1574 ("Failed to set IP flags - aborting takeover run\n"));
1575 talloc_free(tmp_ctx);
1579 /* Fetch known/available public IPs from each active node */
1580 /* Fetch lists of known public IPs from all nodes */
1581 known_ips = ctdb_fetch_remote_public_ips(ctdb, ipalloc_state,
1583 if (known_ips == NULL) {
1584 DEBUG(DEBUG_ERR, ("Failed to read known public IPs\n"));
1585 talloc_free(tmp_ctx);
1588 available_ips = ctdb_fetch_remote_public_ips(
1589 ctdb, ipalloc_state, nodemap,
1590 CTDB_PUBLIC_IP_FLAGS_ONLY_AVAILABLE);
1591 if (available_ips == NULL) {
1592 DEBUG(DEBUG_ERR, ("Failed to read available public IPs\n"));
1593 talloc_free(tmp_ctx);
1597 if (! ipalloc_set_public_ips(ipalloc_state, known_ips, available_ips)) {
1598 DEBUG(DEBUG_ERR, ("Failed to set public IPs\n"));
1599 talloc_free(tmp_ctx);
1603 if (! ipalloc_can_host_ips(ipalloc_state)) {
1604 DEBUG(DEBUG_WARNING,("No nodes available to host public IPs yet\n"));
1608 /* Do the IP reassignment calculations */
1609 all_ips = ipalloc(ipalloc_state);
1610 if (all_ips == NULL) {
1611 talloc_free(tmp_ctx);
1615 /* Now tell all nodes to release any public IPs should not
1616 * host. This will be a NOOP on nodes that don't currently
1617 * hold the given IP.
1619 async_data = talloc_zero(tmp_ctx, struct client_async_data);
1620 CTDB_NO_MEMORY_FATAL(ctdb, async_data);
1622 async_data->fail_callback = takeover_run_fail_callback;
1623 async_data->callback_data = takeover_data;
1625 ZERO_STRUCT(ip); /* Avoid valgrind warnings for union */
1627 /* Send a RELEASE_IP to all nodes that should not be hosting
1628 * each IP. For each IP, all but one of these will be
1629 * redundant. However, the redundant ones are used to tell
1630 * nodes which node should be hosting the IP so that commands
1631 * like "ctdb ip" can display a particular nodes idea of who
1632 * is hosting what. */
1633 for (i=0;i<nodemap->num;i++) {
1634 /* don't talk to unconnected nodes, but do talk to banned nodes */
1635 if (nodemap->nodes[i].flags & NODE_FLAGS_DISCONNECTED) {
1639 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1640 if (tmp_ip->pnn == nodemap->nodes[i].pnn) {
1641 /* This node should be serving this
1642 vnn so don't tell it to release the ip
1646 ip.pnn = tmp_ip->pnn;
1647 ip.addr = tmp_ip->addr;
1649 data.dsize = sizeof(ip);
1650 data.dptr = (uint8_t *)&ip;
1651 state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
1652 0, CTDB_CONTROL_RELEASE_IP, 0,
1655 if (state == NULL) {
1656 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_RELEASE_IP to node %u\n", nodemap->nodes[i].pnn));
1657 talloc_free(tmp_ctx);
1661 ctdb_client_async_add(async_data, state);
1664 if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1666 ("Async control CTDB_CONTROL_RELEASE_IP failed\n"));
1669 talloc_free(async_data);
1672 /* For each IP, send a TAKOVER_IP to the node that should be
1673 * hosting it. Many of these will often be redundant (since
1674 * the allocation won't have changed) but they can be useful
1675 * to recover from inconsistencies. */
1676 async_data = talloc_zero(tmp_ctx, struct client_async_data);
1677 CTDB_NO_MEMORY_FATAL(ctdb, async_data);
1679 async_data->fail_callback = takeover_run_fail_callback;
1680 async_data->callback_data = takeover_data;
1682 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1683 if (tmp_ip->pnn == -1) {
1684 /* this IP won't be taken over */
1688 ip.pnn = tmp_ip->pnn;
1689 ip.addr = tmp_ip->addr;
1691 data.dsize = sizeof(ip);
1692 data.dptr = (uint8_t *)&ip;
1693 state = ctdb_control_send(ctdb, tmp_ip->pnn,
1694 0, CTDB_CONTROL_TAKEOVER_IP, 0,
1695 data, async_data, &timeout, NULL);
1696 if (state == NULL) {
1697 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_TAKEOVER_IP to node %u\n", tmp_ip->pnn));
1698 talloc_free(tmp_ctx);
1702 ctdb_client_async_add(async_data, state);
1704 if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1706 ("Async control CTDB_CONTROL_TAKEOVER_IP failed\n"));
1712 * Tell all nodes to run eventscripts to process the
1713 * "ipreallocated" event. This can do a lot of things,
1714 * including restarting services to reconfigure them if public
1715 * IPs have moved. Once upon a time this event only used to
1718 nodes = list_of_connected_nodes(ctdb, nodemap, tmp_ctx, true);
1719 ret = ctdb_client_async_control(ctdb, CTDB_CONTROL_IPREALLOCATED,
1722 NULL, takeover_run_fail_callback,
1726 ("Async CTDB_CONTROL_IPREALLOCATED control failed\n"));
1730 talloc_free(tmp_ctx);
1734 takeover_run_process_failures(ctdb, takeover_data);
1735 talloc_free(tmp_ctx);
1741 destroy a ctdb_client_ip structure
1743 static int ctdb_client_ip_destructor(struct ctdb_client_ip *ip)
1745 DEBUG(DEBUG_DEBUG,("destroying client tcp for %s:%u (client_id %u)\n",
1746 ctdb_addr_to_str(&ip->addr),
1747 ntohs(ip->addr.ip.sin_port),
1750 DLIST_REMOVE(ip->ctdb->client_ip_list, ip);
1755 called by a client to inform us of a TCP connection that it is managing
1756 that should tickled with an ACK when IP takeover is done
1758 int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id,
1761 struct ctdb_client *client = reqid_find(ctdb->idr, client_id, struct ctdb_client);
1762 struct ctdb_connection *tcp_sock = NULL;
1763 struct ctdb_tcp_list *tcp;
1764 struct ctdb_connection t;
1767 struct ctdb_client_ip *ip;
1768 struct ctdb_vnn *vnn;
1769 ctdb_sock_addr addr;
1771 /* If we don't have public IPs, tickles are useless */
1772 if (ctdb->vnn == NULL) {
1776 tcp_sock = (struct ctdb_connection *)indata.dptr;
1778 addr = tcp_sock->src;
1779 ctdb_canonicalize_ip(&addr, &tcp_sock->src);
1780 addr = tcp_sock->dst;
1781 ctdb_canonicalize_ip(&addr, &tcp_sock->dst);
1784 memcpy(&addr, &tcp_sock->dst, sizeof(addr));
1785 vnn = find_public_ip_vnn(ctdb, &addr);
1787 switch (addr.sa.sa_family) {
1789 if (ntohl(addr.ip.sin_addr.s_addr) != INADDR_LOOPBACK) {
1790 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public address.\n",
1791 ctdb_addr_to_str(&addr)));
1795 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public ipv6 address.\n",
1796 ctdb_addr_to_str(&addr)));
1799 DEBUG(DEBUG_ERR,(__location__ " Unknown family type %d\n", addr.sa.sa_family));
1805 if (vnn->pnn != ctdb->pnn) {
1806 DEBUG(DEBUG_ERR,("Attempt to register tcp client for IP %s we don't hold - failing (client_id %u pid %u)\n",
1807 ctdb_addr_to_str(&addr),
1808 client_id, client->pid));
1809 /* failing this call will tell smbd to die */
1813 ip = talloc(client, struct ctdb_client_ip);
1814 CTDB_NO_MEMORY(ctdb, ip);
1818 ip->client_id = client_id;
1819 talloc_set_destructor(ip, ctdb_client_ip_destructor);
1820 DLIST_ADD(ctdb->client_ip_list, ip);
1822 tcp = talloc(client, struct ctdb_tcp_list);
1823 CTDB_NO_MEMORY(ctdb, tcp);
1825 tcp->connection.src = tcp_sock->src;
1826 tcp->connection.dst = tcp_sock->dst;
1828 DLIST_ADD(client->tcp_list, tcp);
1830 t.src = tcp_sock->src;
1831 t.dst = tcp_sock->dst;
1833 data.dptr = (uint8_t *)&t;
1834 data.dsize = sizeof(t);
1836 switch (addr.sa.sa_family) {
1838 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1839 (unsigned)ntohs(tcp_sock->dst.ip.sin_port),
1840 ctdb_addr_to_str(&tcp_sock->src),
1841 (unsigned)ntohs(tcp_sock->src.ip.sin_port), client_id, client->pid));
1844 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1845 (unsigned)ntohs(tcp_sock->dst.ip6.sin6_port),
1846 ctdb_addr_to_str(&tcp_sock->src),
1847 (unsigned)ntohs(tcp_sock->src.ip6.sin6_port), client_id, client->pid));
1850 DEBUG(DEBUG_ERR,(__location__ " Unknown family %d\n", addr.sa.sa_family));
1854 /* tell all nodes about this tcp connection */
1855 ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0,
1856 CTDB_CONTROL_TCP_ADD,
1857 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
1859 DEBUG(DEBUG_ERR,(__location__ " Failed to send CTDB_CONTROL_TCP_ADD\n"));
1867 find a tcp address on a list
1869 static struct ctdb_connection *ctdb_tcp_find(struct ctdb_tcp_array *array,
1870 struct ctdb_connection *tcp)
1874 if (array == NULL) {
1878 for (i=0;i<array->num;i++) {
1879 if (ctdb_same_sockaddr(&array->connections[i].src, &tcp->src) &&
1880 ctdb_same_sockaddr(&array->connections[i].dst, &tcp->dst)) {
1881 return &array->connections[i];
1890 called by a daemon to inform us of a TCP connection that one of its
1891 clients managing that should tickled with an ACK when IP takeover is
1894 int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata, bool tcp_update_needed)
1896 struct ctdb_connection *p = (struct ctdb_connection *)indata.dptr;
1897 struct ctdb_tcp_array *tcparray;
1898 struct ctdb_connection tcp;
1899 struct ctdb_vnn *vnn;
1901 /* If we don't have public IPs, tickles are useless */
1902 if (ctdb->vnn == NULL) {
1906 vnn = find_public_ip_vnn(ctdb, &p->dst);
1908 DEBUG(DEBUG_INFO,(__location__ " got TCP_ADD control for an address which is not a public address '%s'\n",
1909 ctdb_addr_to_str(&p->dst)));
1915 tcparray = vnn->tcp_array;
1917 /* If this is the first tickle */
1918 if (tcparray == NULL) {
1919 tcparray = talloc(vnn, struct ctdb_tcp_array);
1920 CTDB_NO_MEMORY(ctdb, tcparray);
1921 vnn->tcp_array = tcparray;
1924 tcparray->connections = talloc_size(tcparray, sizeof(struct ctdb_connection));
1925 CTDB_NO_MEMORY(ctdb, tcparray->connections);
1927 tcparray->connections[tcparray->num].src = p->src;
1928 tcparray->connections[tcparray->num].dst = p->dst;
1931 if (tcp_update_needed) {
1932 vnn->tcp_update_needed = true;
1938 /* Do we already have this tickle ?*/
1941 if (ctdb_tcp_find(tcparray, &tcp) != NULL) {
1942 DEBUG(DEBUG_DEBUG,("Already had tickle info for %s:%u for vnn:%u\n",
1943 ctdb_addr_to_str(&tcp.dst),
1944 ntohs(tcp.dst.ip.sin_port),
1949 /* A new tickle, we must add it to the array */
1950 tcparray->connections = talloc_realloc(tcparray, tcparray->connections,
1951 struct ctdb_connection,
1953 CTDB_NO_MEMORY(ctdb, tcparray->connections);
1955 tcparray->connections[tcparray->num].src = p->src;
1956 tcparray->connections[tcparray->num].dst = p->dst;
1959 DEBUG(DEBUG_INFO,("Added tickle info for %s:%u from vnn %u\n",
1960 ctdb_addr_to_str(&tcp.dst),
1961 ntohs(tcp.dst.ip.sin_port),
1964 if (tcp_update_needed) {
1965 vnn->tcp_update_needed = true;
1972 static void ctdb_remove_connection(struct ctdb_vnn *vnn, struct ctdb_connection *conn)
1974 struct ctdb_connection *tcpp;
1980 /* if the array is empty we cant remove it
1981 and we don't need to do anything
1983 if (vnn->tcp_array == NULL) {
1984 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist (array is empty) %s:%u\n",
1985 ctdb_addr_to_str(&conn->dst),
1986 ntohs(conn->dst.ip.sin_port)));
1991 /* See if we know this connection
1992 if we don't know this connection then we dont need to do anything
1994 tcpp = ctdb_tcp_find(vnn->tcp_array, conn);
1996 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist %s:%u\n",
1997 ctdb_addr_to_str(&conn->dst),
1998 ntohs(conn->dst.ip.sin_port)));
2003 /* We need to remove this entry from the array.
2004 Instead of allocating a new array and copying data to it
2005 we cheat and just copy the last entry in the existing array
2006 to the entry that is to be removed and just shring the
2009 *tcpp = vnn->tcp_array->connections[vnn->tcp_array->num - 1];
2010 vnn->tcp_array->num--;
2012 /* If we deleted the last entry we also need to remove the entire array
2014 if (vnn->tcp_array->num == 0) {
2015 talloc_free(vnn->tcp_array);
2016 vnn->tcp_array = NULL;
2019 vnn->tcp_update_needed = true;
2021 DEBUG(DEBUG_INFO,("Removed tickle info for %s:%u\n",
2022 ctdb_addr_to_str(&conn->src),
2023 ntohs(conn->src.ip.sin_port)));
2028 called by a daemon to inform us of a TCP connection that one of its
2029 clients used are no longer needed in the tickle database
2031 int32_t ctdb_control_tcp_remove(struct ctdb_context *ctdb, TDB_DATA indata)
2033 struct ctdb_vnn *vnn;
2034 struct ctdb_connection *conn = (struct ctdb_connection *)indata.dptr;
2036 /* If we don't have public IPs, tickles are useless */
2037 if (ctdb->vnn == NULL) {
2041 vnn = find_public_ip_vnn(ctdb, &conn->dst);
2044 (__location__ " unable to find public address %s\n",
2045 ctdb_addr_to_str(&conn->dst)));
2049 ctdb_remove_connection(vnn, conn);
2056 Called when another daemon starts - causes all tickles for all
2057 public addresses we are serving to be sent to the new node on the
2058 next check. This actually causes the next scheduled call to
2059 tdb_update_tcp_tickles() to update all nodes. This is simple and
2060 doesn't require careful error handling.
2062 int32_t ctdb_control_startup(struct ctdb_context *ctdb, uint32_t pnn)
2064 struct ctdb_vnn *vnn;
2066 DEBUG(DEBUG_INFO, ("Received startup control from node %lu\n",
2067 (unsigned long) pnn));
2069 for (vnn = ctdb->vnn; vnn != NULL; vnn = vnn->next) {
2070 vnn->tcp_update_needed = true;
2078 called when a client structure goes away - hook to remove
2079 elements from the tcp_list in all daemons
2081 void ctdb_takeover_client_destructor_hook(struct ctdb_client *client)
2083 while (client->tcp_list) {
2084 struct ctdb_vnn *vnn;
2085 struct ctdb_tcp_list *tcp = client->tcp_list;
2086 struct ctdb_connection *conn = &tcp->connection;
2088 DLIST_REMOVE(client->tcp_list, tcp);
2090 vnn = find_public_ip_vnn(client->ctdb,
2094 (__location__ " unable to find public address %s\n",
2095 ctdb_addr_to_str(&conn->dst)));
2099 /* If the IP address is hosted on this node then
2100 * remove the connection. */
2101 if (vnn->pnn == client->ctdb->pnn) {
2102 ctdb_remove_connection(vnn, conn);
2105 /* Otherwise this function has been called because the
2106 * server IP address has been released to another node
2107 * and the client has exited. This means that we
2108 * should not delete the connection information. The
2109 * takeover node processes connections too. */
2114 void ctdb_release_all_ips(struct ctdb_context *ctdb)
2116 struct ctdb_vnn *vnn;
2120 if (ctdb->tunable.disable_ip_failover == 1) {
2124 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2125 if (!ctdb_sys_have_ip(&vnn->public_address)) {
2126 ctdb_vnn_unassign_iface(ctdb, vnn);
2133 /* Don't allow multiple releases at once. Some code,
2134 * particularly ctdb_tickle_sentenced_connections() is
2136 if (vnn->update_in_flight) {
2137 DEBUG(DEBUG_WARNING,
2139 " Not releasing IP %s/%u on interface %s, an update is already in progess\n",
2140 ctdb_addr_to_str(&vnn->public_address),
2141 vnn->public_netmask_bits,
2142 ctdb_vnn_iface_string(vnn)));
2145 vnn->update_in_flight = true;
2147 DEBUG(DEBUG_INFO,("Release of IP %s/%u on interface %s node:-1\n",
2148 ctdb_addr_to_str(&vnn->public_address),
2149 vnn->public_netmask_bits,
2150 ctdb_vnn_iface_string(vnn)));
2152 ctdb_event_script_args(ctdb, CTDB_EVENT_RELEASE_IP, "%s %s %u",
2153 ctdb_vnn_iface_string(vnn),
2154 ctdb_addr_to_str(&vnn->public_address),
2155 vnn->public_netmask_bits);
2157 data.dptr = (uint8_t *)talloc_strdup(
2158 vnn, ctdb_addr_to_str(&vnn->public_address));
2159 if (data.dptr != NULL) {
2160 data.dsize = strlen((char *)data.dptr) + 1;
2161 ctdb_daemon_send_message(ctdb, ctdb->pnn,
2162 CTDB_SRVID_RELEASE_IP, data);
2163 talloc_free(data.dptr);
2166 ctdb_vnn_unassign_iface(ctdb, vnn);
2167 vnn->update_in_flight = false;
2171 DEBUG(DEBUG_NOTICE,(__location__ " Released %d public IPs\n", count));
2176 get list of public IPs
2178 int32_t ctdb_control_get_public_ips(struct ctdb_context *ctdb,
2179 struct ctdb_req_control_old *c, TDB_DATA *outdata)
2182 struct ctdb_public_ip_list_old *ips;
2183 struct ctdb_vnn *vnn;
2184 bool only_available = false;
2186 if (c->flags & CTDB_PUBLIC_IP_FLAGS_ONLY_AVAILABLE) {
2187 only_available = true;
2190 /* count how many public ip structures we have */
2192 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2196 len = offsetof(struct ctdb_public_ip_list_old, ips) +
2197 num*sizeof(struct ctdb_public_ip);
2198 ips = talloc_zero_size(outdata, len);
2199 CTDB_NO_MEMORY(ctdb, ips);
2202 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2203 if (only_available && !ctdb_vnn_available(ctdb, vnn)) {
2206 ips->ips[i].pnn = vnn->pnn;
2207 ips->ips[i].addr = vnn->public_address;
2211 len = offsetof(struct ctdb_public_ip_list_old, ips) +
2212 i*sizeof(struct ctdb_public_ip);
2214 outdata->dsize = len;
2215 outdata->dptr = (uint8_t *)ips;
2221 int32_t ctdb_control_get_public_ip_info(struct ctdb_context *ctdb,
2222 struct ctdb_req_control_old *c,
2227 ctdb_sock_addr *addr;
2228 struct ctdb_public_ip_info_old *info;
2229 struct ctdb_vnn *vnn;
2231 addr = (ctdb_sock_addr *)indata.dptr;
2233 vnn = find_public_ip_vnn(ctdb, addr);
2235 DEBUG(DEBUG_ERR,(__location__ " Could not get public ip info, "
2236 "'%s'not a public address\n",
2237 ctdb_addr_to_str(addr)));
2241 /* count how many public ip structures we have */
2243 for (;vnn->ifaces[num];) {
2247 len = offsetof(struct ctdb_public_ip_info_old, ifaces) +
2248 num*sizeof(struct ctdb_iface);
2249 info = talloc_zero_size(outdata, len);
2250 CTDB_NO_MEMORY(ctdb, info);
2252 info->ip.addr = vnn->public_address;
2253 info->ip.pnn = vnn->pnn;
2254 info->active_idx = 0xFFFFFFFF;
2256 for (i=0; vnn->ifaces[i]; i++) {
2257 struct ctdb_interface *cur;
2259 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
2261 DEBUG(DEBUG_CRIT, (__location__ " internal error iface[%s] unknown\n",
2265 if (vnn->iface == cur) {
2266 info->active_idx = i;
2268 strncpy(info->ifaces[i].name, cur->name,
2269 sizeof(info->ifaces[i].name));
2270 info->ifaces[i].name[sizeof(info->ifaces[i].name)-1] = '\0';
2271 info->ifaces[i].link_state = cur->link_up;
2272 info->ifaces[i].references = cur->references;
2275 len = offsetof(struct ctdb_public_ip_info_old, ifaces) +
2276 i*sizeof(struct ctdb_iface);
2278 outdata->dsize = len;
2279 outdata->dptr = (uint8_t *)info;
2284 int32_t ctdb_control_get_ifaces(struct ctdb_context *ctdb,
2285 struct ctdb_req_control_old *c,
2289 struct ctdb_iface_list_old *ifaces;
2290 struct ctdb_interface *cur;
2292 /* count how many public ip structures we have */
2294 for (cur=ctdb->ifaces;cur;cur=cur->next) {
2298 len = offsetof(struct ctdb_iface_list_old, ifaces) +
2299 num*sizeof(struct ctdb_iface);
2300 ifaces = talloc_zero_size(outdata, len);
2301 CTDB_NO_MEMORY(ctdb, ifaces);
2304 for (cur=ctdb->ifaces;cur;cur=cur->next) {
2305 strncpy(ifaces->ifaces[i].name, cur->name,
2306 sizeof(ifaces->ifaces[i].name));
2307 ifaces->ifaces[i].name[sizeof(ifaces->ifaces[i].name)-1] = '\0';
2308 ifaces->ifaces[i].link_state = cur->link_up;
2309 ifaces->ifaces[i].references = cur->references;
2313 len = offsetof(struct ctdb_iface_list_old, ifaces) +
2314 i*sizeof(struct ctdb_iface);
2316 outdata->dsize = len;
2317 outdata->dptr = (uint8_t *)ifaces;
2322 int32_t ctdb_control_set_iface_link(struct ctdb_context *ctdb,
2323 struct ctdb_req_control_old *c,
2326 struct ctdb_iface *info;
2327 struct ctdb_interface *iface;
2328 bool link_up = false;
2330 info = (struct ctdb_iface *)indata.dptr;
2332 if (info->name[CTDB_IFACE_SIZE] != '\0') {
2333 int len = strnlen(info->name, CTDB_IFACE_SIZE);
2334 DEBUG(DEBUG_ERR, (__location__ " name[%*.*s] not terminated\n",
2335 len, len, info->name));
2339 switch (info->link_state) {
2347 DEBUG(DEBUG_ERR, (__location__ " link_state[%u] invalid\n",
2348 (unsigned int)info->link_state));
2352 if (info->references != 0) {
2353 DEBUG(DEBUG_ERR, (__location__ " references[%u] should be 0\n",
2354 (unsigned int)info->references));
2358 iface = ctdb_find_iface(ctdb, info->name);
2359 if (iface == NULL) {
2363 if (link_up == iface->link_up) {
2368 ("iface[%s] has changed it's link status %s => %s\n",
2370 iface->link_up?"up":"down",
2371 link_up?"up":"down"));
2373 iface->link_up = link_up;
2379 called by a daemon to inform us of the entire list of TCP tickles for
2380 a particular public address.
2381 this control should only be sent by the node that is currently serving
2382 that public address.
2384 int32_t ctdb_control_set_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata)
2386 struct ctdb_tickle_list_old *list = (struct ctdb_tickle_list_old *)indata.dptr;
2387 struct ctdb_tcp_array *tcparray;
2388 struct ctdb_vnn *vnn;
2390 /* We must at least have tickles.num or else we cant verify the size
2391 of the received data blob
2393 if (indata.dsize < offsetof(struct ctdb_tickle_list_old, connections)) {
2394 DEBUG(DEBUG_ERR,("Bad indata in ctdb_tickle_list. Not enough data for the tickle.num field\n"));
2398 /* verify that the size of data matches what we expect */
2399 if (indata.dsize < offsetof(struct ctdb_tickle_list_old, connections)
2400 + sizeof(struct ctdb_connection) * list->num) {
2401 DEBUG(DEBUG_ERR,("Bad indata in ctdb_tickle_list\n"));
2405 DEBUG(DEBUG_INFO, ("Received tickle update for public address %s\n",
2406 ctdb_addr_to_str(&list->addr)));
2408 vnn = find_public_ip_vnn(ctdb, &list->addr);
2410 DEBUG(DEBUG_INFO,(__location__ " Could not set tcp tickle list, '%s' is not a public address\n",
2411 ctdb_addr_to_str(&list->addr)));
2416 if (vnn->pnn == ctdb->pnn) {
2418 ("Ignoring redundant set tcp tickle list, this node hosts '%s'\n",
2419 ctdb_addr_to_str(&list->addr)));
2423 /* remove any old ticklelist we might have */
2424 talloc_free(vnn->tcp_array);
2425 vnn->tcp_array = NULL;
2427 tcparray = talloc(vnn, struct ctdb_tcp_array);
2428 CTDB_NO_MEMORY(ctdb, tcparray);
2430 tcparray->num = list->num;
2432 tcparray->connections = talloc_array(tcparray, struct ctdb_connection, tcparray->num);
2433 CTDB_NO_MEMORY(ctdb, tcparray->connections);
2435 memcpy(tcparray->connections, &list->connections[0],
2436 sizeof(struct ctdb_connection)*tcparray->num);
2438 /* We now have a new fresh tickle list array for this vnn */
2439 vnn->tcp_array = tcparray;
2445 called to return the full list of tickles for the puclic address associated
2446 with the provided vnn
2448 int32_t ctdb_control_get_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata)
2450 ctdb_sock_addr *addr = (ctdb_sock_addr *)indata.dptr;
2451 struct ctdb_tickle_list_old *list;
2452 struct ctdb_tcp_array *tcparray;
2454 struct ctdb_vnn *vnn;
2457 vnn = find_public_ip_vnn(ctdb, addr);
2459 DEBUG(DEBUG_ERR,(__location__ " Could not get tcp tickle list, '%s' is not a public address\n",
2460 ctdb_addr_to_str(addr)));
2465 port = ctdb_addr_to_port(addr);
2467 tcparray = vnn->tcp_array;
2469 if (tcparray != NULL) {
2471 /* All connections */
2472 num = tcparray->num;
2474 /* Count connections for port */
2475 for (i = 0; i < tcparray->num; i++) {
2476 if (port == ctdb_addr_to_port(&tcparray->connections[i].dst)) {
2483 outdata->dsize = offsetof(struct ctdb_tickle_list_old, connections)
2484 + sizeof(struct ctdb_connection) * num;
2486 outdata->dptr = talloc_size(outdata, outdata->dsize);
2487 CTDB_NO_MEMORY(ctdb, outdata->dptr);
2488 list = (struct ctdb_tickle_list_old *)outdata->dptr;
2498 for (i = 0; i < tcparray->num; i++) {
2500 port == ctdb_addr_to_port(&tcparray->connections[i].dst)) {
2501 list->connections[num] = tcparray->connections[i];
2511 set the list of all tcp tickles for a public address
2513 static int ctdb_send_set_tcp_tickles_for_ip(struct ctdb_context *ctdb,
2514 ctdb_sock_addr *addr,
2515 struct ctdb_tcp_array *tcparray)
2519 struct ctdb_tickle_list_old *list;
2522 num = tcparray->num;
2527 data.dsize = offsetof(struct ctdb_tickle_list_old, connections) +
2528 sizeof(struct ctdb_connection) * num;
2529 data.dptr = talloc_size(ctdb, data.dsize);
2530 CTDB_NO_MEMORY(ctdb, data.dptr);
2532 list = (struct ctdb_tickle_list_old *)data.dptr;
2536 memcpy(&list->connections[0], tcparray->connections, sizeof(struct ctdb_connection) * num);
2539 ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_ALL, 0,
2540 CTDB_CONTROL_SET_TCP_TICKLE_LIST,
2541 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
2543 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set tcp tickles failed\n"));
2547 talloc_free(data.dptr);
2554 perform tickle updates if required
2556 static void ctdb_update_tcp_tickles(struct tevent_context *ev,
2557 struct tevent_timer *te,
2558 struct timeval t, void *private_data)
2560 struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
2562 struct ctdb_vnn *vnn;
2564 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2565 /* we only send out updates for public addresses that
2568 if (ctdb->pnn != vnn->pnn) {
2571 /* We only send out the updates if we need to */
2572 if (!vnn->tcp_update_needed) {
2575 ret = ctdb_send_set_tcp_tickles_for_ip(ctdb,
2576 &vnn->public_address,
2579 DEBUG(DEBUG_ERR,("Failed to send the tickle update for public address %s\n",
2580 ctdb_addr_to_str(&vnn->public_address)));
2583 ("Sent tickle update for public address %s\n",
2584 ctdb_addr_to_str(&vnn->public_address)));
2585 vnn->tcp_update_needed = false;
2589 tevent_add_timer(ctdb->ev, ctdb->tickle_update_context,
2590 timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
2591 ctdb_update_tcp_tickles, ctdb);
2595 start periodic update of tcp tickles
2597 void ctdb_start_tcp_tickle_update(struct ctdb_context *ctdb)
2599 ctdb->tickle_update_context = talloc_new(ctdb);
2601 tevent_add_timer(ctdb->ev, ctdb->tickle_update_context,
2602 timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
2603 ctdb_update_tcp_tickles, ctdb);
2609 struct control_gratious_arp {
2610 struct ctdb_context *ctdb;
2611 ctdb_sock_addr addr;
2617 send a control_gratuitous arp
2619 static void send_gratious_arp(struct tevent_context *ev,
2620 struct tevent_timer *te,
2621 struct timeval t, void *private_data)
2624 struct control_gratious_arp *arp = talloc_get_type(private_data,
2625 struct control_gratious_arp);
2627 ret = ctdb_sys_send_arp(&arp->addr, arp->iface);
2629 DEBUG(DEBUG_ERR,(__location__ " sending of gratious arp on iface '%s' failed (%s)\n",
2630 arp->iface, strerror(errno)));
2635 if (arp->count == CTDB_ARP_REPEAT) {
2640 tevent_add_timer(arp->ctdb->ev, arp,
2641 timeval_current_ofs(CTDB_ARP_INTERVAL, 0),
2642 send_gratious_arp, arp);
2649 int32_t ctdb_control_send_gratious_arp(struct ctdb_context *ctdb, TDB_DATA indata)
2651 struct ctdb_addr_info_old *gratious_arp = (struct ctdb_addr_info_old *)indata.dptr;
2652 struct control_gratious_arp *arp;
2654 /* verify the size of indata */
2655 if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
2656 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_gratious_arp structure. Got %u require %u bytes\n",
2657 (unsigned)indata.dsize,
2658 (unsigned)offsetof(struct ctdb_addr_info_old, iface)));
2662 ( offsetof(struct ctdb_addr_info_old, iface)
2663 + gratious_arp->len ) ){
2665 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2666 "but should be %u bytes\n",
2667 (unsigned)indata.dsize,
2668 (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+gratious_arp->len)));
2673 arp = talloc(ctdb, struct control_gratious_arp);
2674 CTDB_NO_MEMORY(ctdb, arp);
2677 arp->addr = gratious_arp->addr;
2678 arp->iface = talloc_strdup(arp, gratious_arp->iface);
2679 CTDB_NO_MEMORY(ctdb, arp->iface);
2682 tevent_add_timer(arp->ctdb->ev, arp,
2683 timeval_zero(), send_gratious_arp, arp);
2688 int32_t ctdb_control_add_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2690 struct ctdb_addr_info_old *pub = (struct ctdb_addr_info_old *)indata.dptr;
2693 /* verify the size of indata */
2694 if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
2695 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_addr_info structure\n"));
2699 ( offsetof(struct ctdb_addr_info_old, iface)
2702 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2703 "but should be %u bytes\n",
2704 (unsigned)indata.dsize,
2705 (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+pub->len)));
2709 DEBUG(DEBUG_NOTICE,("Add IP %s\n", ctdb_addr_to_str(&pub->addr)));
2711 ret = ctdb_add_public_address(ctdb, &pub->addr, pub->mask, &pub->iface[0], true);
2714 DEBUG(DEBUG_ERR,(__location__ " Failed to add public address\n"));
2721 int32_t ctdb_control_del_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2723 struct ctdb_addr_info_old *pub = (struct ctdb_addr_info_old *)indata.dptr;
2724 struct ctdb_vnn *vnn;
2726 /* verify the size of indata */
2727 if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
2728 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_addr_info structure\n"));
2732 ( offsetof(struct ctdb_addr_info_old, iface)
2735 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2736 "but should be %u bytes\n",
2737 (unsigned)indata.dsize,
2738 (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+pub->len)));
2742 DEBUG(DEBUG_NOTICE,("Delete IP %s\n", ctdb_addr_to_str(&pub->addr)));
2744 /* walk over all public addresses until we find a match */
2745 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2746 if (ctdb_same_ip(&vnn->public_address, &pub->addr)) {
2747 if (vnn->pnn == ctdb->pnn) {
2748 /* This IP is currently being hosted.
2749 * Defer the deletion until the next
2750 * takeover run. "ctdb reloadips" will
2751 * always cause a takeover run. "ctdb
2752 * delip" will now need an explicit
2753 * "ctdb ipreallocated" afterwards. */
2754 vnn->delete_pending = true;
2756 /* This IP is not hosted on the
2757 * current node so just delete it
2759 do_delete_ip(ctdb, vnn);
2766 DEBUG(DEBUG_ERR,("Delete IP of unknown public IP address %s\n",
2767 ctdb_addr_to_str(&pub->addr)));
2772 struct ipreallocated_callback_state {
2773 struct ctdb_req_control_old *c;
2776 static void ctdb_ipreallocated_callback(struct ctdb_context *ctdb,
2777 int status, void *p)
2779 struct ipreallocated_callback_state *state =
2780 talloc_get_type(p, struct ipreallocated_callback_state);
2784 (" \"ipreallocated\" event script failed (status %d)\n",
2786 if (status == -ETIME) {
2787 ctdb_ban_self(ctdb);
2791 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
2795 /* A control to run the ipreallocated event */
2796 int32_t ctdb_control_ipreallocated(struct ctdb_context *ctdb,
2797 struct ctdb_req_control_old *c,
2801 struct ipreallocated_callback_state *state;
2803 state = talloc(ctdb, struct ipreallocated_callback_state);
2804 CTDB_NO_MEMORY(ctdb, state);
2806 DEBUG(DEBUG_INFO,(__location__ " Running \"ipreallocated\" event\n"));
2808 ret = ctdb_event_script_callback(ctdb, state,
2809 ctdb_ipreallocated_callback, state,
2810 CTDB_EVENT_IPREALLOCATED,
2814 DEBUG(DEBUG_ERR,("Failed to run \"ipreallocated\" event \n"));
2819 /* tell the control that we will be reply asynchronously */
2820 state->c = talloc_steal(state, c);
2821 *async_reply = true;
2827 struct ctdb_reloadips_handle {
2828 struct ctdb_context *ctdb;
2829 struct ctdb_req_control_old *c;
2833 struct tevent_fd *fde;
2836 static int ctdb_reloadips_destructor(struct ctdb_reloadips_handle *h)
2838 if (h == h->ctdb->reload_ips) {
2839 h->ctdb->reload_ips = NULL;
2842 ctdb_request_control_reply(h->ctdb, h->c, NULL, h->status, NULL);
2845 ctdb_kill(h->ctdb, h->child, SIGKILL);
2849 static void ctdb_reloadips_timeout_event(struct tevent_context *ev,
2850 struct tevent_timer *te,
2851 struct timeval t, void *private_data)
2853 struct ctdb_reloadips_handle *h = talloc_get_type(private_data, struct ctdb_reloadips_handle);
2858 static void ctdb_reloadips_child_handler(struct tevent_context *ev,
2859 struct tevent_fd *fde,
2860 uint16_t flags, void *private_data)
2862 struct ctdb_reloadips_handle *h = talloc_get_type(private_data, struct ctdb_reloadips_handle);
2867 ret = sys_read(h->fd[0], &res, 1);
2868 if (ret < 1 || res != 0) {
2869 DEBUG(DEBUG_ERR, (__location__ " Reloadips child process returned error\n"));
2877 static int ctdb_reloadips_child(struct ctdb_context *ctdb)
2879 TALLOC_CTX *mem_ctx = talloc_new(NULL);
2880 struct ctdb_public_ip_list_old *ips;
2881 struct ctdb_vnn *vnn;
2882 struct client_async_data *async_data;
2883 struct timeval timeout;
2885 struct ctdb_client_control_state *state;
2889 CTDB_NO_MEMORY(ctdb, mem_ctx);
2891 /* Read IPs from local node */
2892 ret = ctdb_ctrl_get_public_ips(ctdb, TAKEOVER_TIMEOUT(),
2893 CTDB_CURRENT_NODE, mem_ctx, &ips);
2896 ("Unable to fetch public IPs from local node\n"));
2897 talloc_free(mem_ctx);
2901 /* Read IPs file - this is safe since this is a child process */
2903 if (ctdb_set_public_addresses(ctdb, false) != 0) {
2904 DEBUG(DEBUG_ERR,("Failed to re-read public addresses file\n"));
2905 talloc_free(mem_ctx);
2909 async_data = talloc_zero(mem_ctx, struct client_async_data);
2910 CTDB_NO_MEMORY(ctdb, async_data);
2912 /* Compare IPs between node and file for IPs to be deleted */
2913 for (i = 0; i < ips->num; i++) {
2915 for (vnn = ctdb->vnn; vnn; vnn = vnn->next) {
2916 if (ctdb_same_ip(&vnn->public_address,
2917 &ips->ips[i].addr)) {
2918 /* IP is still in file */
2924 /* Delete IP ips->ips[i] */
2925 struct ctdb_addr_info_old *pub;
2928 ("IP %s no longer configured, deleting it\n",
2929 ctdb_addr_to_str(&ips->ips[i].addr)));
2931 pub = talloc_zero(mem_ctx, struct ctdb_addr_info_old);
2932 CTDB_NO_MEMORY(ctdb, pub);
2934 pub->addr = ips->ips[i].addr;
2938 timeout = TAKEOVER_TIMEOUT();
2940 data.dsize = offsetof(struct ctdb_addr_info_old,
2942 data.dptr = (uint8_t *)pub;
2944 state = ctdb_control_send(ctdb, CTDB_CURRENT_NODE, 0,
2945 CTDB_CONTROL_DEL_PUBLIC_IP,
2946 0, data, async_data,
2948 if (state == NULL) {
2951 " failed sending CTDB_CONTROL_DEL_PUBLIC_IP\n"));
2955 ctdb_client_async_add(async_data, state);
2959 /* Compare IPs between node and file for IPs to be added */
2961 for (vnn = ctdb->vnn; vnn; vnn = vnn->next) {
2962 for (i = 0; i < ips->num; i++) {
2963 if (ctdb_same_ip(&vnn->public_address,
2964 &ips->ips[i].addr)) {
2965 /* IP already on node */
2969 if (i == ips->num) {
2970 /* Add IP ips->ips[i] */
2971 struct ctdb_addr_info_old *pub;
2972 const char *ifaces = NULL;
2977 ("New IP %s configured, adding it\n",
2978 ctdb_addr_to_str(&vnn->public_address)));
2980 uint32_t pnn = ctdb_get_pnn(ctdb);
2982 data.dsize = sizeof(pnn);
2983 data.dptr = (uint8_t *)&pnn;
2985 ret = ctdb_client_send_message(
2987 CTDB_BROADCAST_CONNECTED,
2988 CTDB_SRVID_REBALANCE_NODE,
2991 DEBUG(DEBUG_WARNING,
2992 ("Failed to send message to force node reallocation - IPs may be unbalanced\n"));
2998 ifaces = vnn->ifaces[0];
3000 while (vnn->ifaces[iface] != NULL) {
3001 ifaces = talloc_asprintf(vnn, "%s,%s", ifaces,
3002 vnn->ifaces[iface]);
3006 len = strlen(ifaces) + 1;
3007 pub = talloc_zero_size(mem_ctx,
3008 offsetof(struct ctdb_addr_info_old, iface) + len);
3009 CTDB_NO_MEMORY(ctdb, pub);
3011 pub->addr = vnn->public_address;
3012 pub->mask = vnn->public_netmask_bits;
3014 memcpy(&pub->iface[0], ifaces, pub->len);
3016 timeout = TAKEOVER_TIMEOUT();
3018 data.dsize = offsetof(struct ctdb_addr_info_old,
3020 data.dptr = (uint8_t *)pub;
3022 state = ctdb_control_send(ctdb, CTDB_CURRENT_NODE, 0,
3023 CTDB_CONTROL_ADD_PUBLIC_IP,
3024 0, data, async_data,
3026 if (state == NULL) {
3029 " failed sending CTDB_CONTROL_ADD_PUBLIC_IP\n"));
3033 ctdb_client_async_add(async_data, state);
3037 if (ctdb_client_async_wait(ctdb, async_data) != 0) {
3038 DEBUG(DEBUG_ERR,(__location__ " Add/delete IPs failed\n"));
3042 talloc_free(mem_ctx);
3046 talloc_free(mem_ctx);
3050 /* This control is sent to force the node to re-read the public addresses file
3051 and drop any addresses we should nnot longer host, and add new addresses
3052 that we are now able to host
3054 int32_t ctdb_control_reload_public_ips(struct ctdb_context *ctdb, struct ctdb_req_control_old *c, bool *async_reply)
3056 struct ctdb_reloadips_handle *h;
3057 pid_t parent = getpid();
3059 if (ctdb->reload_ips != NULL) {
3060 talloc_free(ctdb->reload_ips);
3061 ctdb->reload_ips = NULL;
3064 h = talloc(ctdb, struct ctdb_reloadips_handle);
3065 CTDB_NO_MEMORY(ctdb, h);
3070 if (pipe(h->fd) == -1) {
3071 DEBUG(DEBUG_ERR,("Failed to create pipe for ctdb_freeze_lock\n"));
3076 h->child = ctdb_fork(ctdb);
3077 if (h->child == (pid_t)-1) {
3078 DEBUG(DEBUG_ERR, ("Failed to fork a child for reloadips\n"));
3086 if (h->child == 0) {
3087 signed char res = 0;
3090 debug_extra = talloc_asprintf(NULL, "reloadips:");
3092 prctl_set_comment("ctdb_reloadips");
3093 if (switch_from_server_to_client(ctdb, "reloadips-child") != 0) {
3094 DEBUG(DEBUG_CRIT,("ERROR: Failed to switch reloadips child into client mode\n"));
3097 res = ctdb_reloadips_child(ctdb);
3099 DEBUG(DEBUG_ERR,("Failed to reload ips on local node\n"));
3103 sys_write(h->fd[1], &res, 1);
3104 ctdb_wait_for_process_to_exit(parent);
3108 h->c = talloc_steal(h, c);
3111 set_close_on_exec(h->fd[0]);
3113 talloc_set_destructor(h, ctdb_reloadips_destructor);
3116 h->fde = tevent_add_fd(ctdb->ev, h, h->fd[0], TEVENT_FD_READ,
3117 ctdb_reloadips_child_handler, (void *)h);
3118 tevent_fd_set_auto_close(h->fde);
3120 tevent_add_timer(ctdb->ev, h, timeval_current_ofs(120, 0),
3121 ctdb_reloadips_timeout_event, h);
3123 /* we reply later */
3124 *async_reply = true;