4 Copyright (C) Ronnie Sahlberg 2007
5 Copyright (C) Andrew Tridgell 2007
6 Copyright (C) Martin Schwenke 2011
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, see <http://www.gnu.org/licenses/>.
22 #include "system/network.h"
23 #include "system/filesys.h"
24 #include "system/time.h"
25 #include "system/wait.h"
30 #include "lib/util/dlinklist.h"
31 #include "lib/util/debug.h"
32 #include "lib/util/samba_util.h"
33 #include "lib/util/util_process.h"
35 #include "ctdb_private.h"
36 #include "ctdb_client.h"
38 #include "common/rb_tree.h"
39 #include "common/reqid.h"
40 #include "common/system.h"
41 #include "common/common.h"
42 #include "common/logging.h"
44 #include "server/ipalloc.h"
46 #define TAKEOVER_TIMEOUT() timeval_current_ofs(ctdb->tunable.takeover_timeout,0)
48 #define CTDB_ARP_INTERVAL 1
49 #define CTDB_ARP_REPEAT 3
51 struct ctdb_interface {
52 struct ctdb_interface *prev, *next;
58 static const char *ctdb_vnn_iface_string(const struct ctdb_vnn *vnn)
61 return vnn->iface->name;
67 static int ctdb_add_local_iface(struct ctdb_context *ctdb, const char *iface)
69 struct ctdb_interface *i;
71 /* Verify that we don't have an entry for this ip yet */
72 for (i=ctdb->ifaces;i;i=i->next) {
73 if (strcmp(i->name, iface) == 0) {
78 /* create a new structure for this interface */
79 i = talloc_zero(ctdb, struct ctdb_interface);
80 CTDB_NO_MEMORY_FATAL(ctdb, i);
81 i->name = talloc_strdup(i, iface);
82 CTDB_NO_MEMORY(ctdb, i->name);
86 DLIST_ADD(ctdb->ifaces, i);
91 static bool vnn_has_interface_with_name(struct ctdb_vnn *vnn,
96 for (n = 0; vnn->ifaces[n] != NULL; n++) {
97 if (strcmp(name, vnn->ifaces[n]) == 0) {
105 /* If any interfaces now have no possible IPs then delete them. This
106 * implementation is naive (i.e. simple) rather than clever
107 * (i.e. complex). Given that this is run on delip and that operation
108 * is rare, this doesn't need to be efficient - it needs to be
109 * foolproof. One alternative is reference counting, where the logic
110 * is distributed and can, therefore, be broken in multiple places.
111 * Another alternative is to build a red-black tree of interfaces that
112 * can have addresses (by walking ctdb->vnn and ctdb->single_ip_vnn
113 * once) and then walking ctdb->ifaces once and deleting those not in
114 * the tree. Let's go to one of those if the naive implementation
115 * causes problems... :-)
117 static void ctdb_remove_orphaned_ifaces(struct ctdb_context *ctdb,
118 struct ctdb_vnn *vnn)
120 struct ctdb_interface *i, *next;
122 /* For each interface, check if there's an IP using it. */
123 for (i = ctdb->ifaces; i != NULL; i = next) {
128 /* Only consider interfaces named in the given VNN. */
129 if (!vnn_has_interface_with_name(vnn, i->name)) {
133 /* Is the "single IP" on this interface? */
134 if ((ctdb->single_ip_vnn != NULL) &&
135 (ctdb->single_ip_vnn->ifaces[0] != NULL) &&
136 (strcmp(i->name, ctdb->single_ip_vnn->ifaces[0]) == 0)) {
137 /* Found, next interface please... */
140 /* Search for a vnn with this interface. */
142 for (tv=ctdb->vnn; tv; tv=tv->next) {
143 if (vnn_has_interface_with_name(tv, i->name)) {
150 /* None of the VNNs are using this interface. */
151 DLIST_REMOVE(ctdb->ifaces, i);
158 static struct ctdb_interface *ctdb_find_iface(struct ctdb_context *ctdb,
161 struct ctdb_interface *i;
163 for (i=ctdb->ifaces;i;i=i->next) {
164 if (strcmp(i->name, iface) == 0) {
172 static struct ctdb_interface *ctdb_vnn_best_iface(struct ctdb_context *ctdb,
173 struct ctdb_vnn *vnn)
176 struct ctdb_interface *cur = NULL;
177 struct ctdb_interface *best = NULL;
179 for (i=0; vnn->ifaces[i]; i++) {
181 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
195 if (cur->references < best->references) {
204 static int32_t ctdb_vnn_assign_iface(struct ctdb_context *ctdb,
205 struct ctdb_vnn *vnn)
207 struct ctdb_interface *best = NULL;
210 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
211 "still assigned to iface '%s'\n",
212 ctdb_addr_to_str(&vnn->public_address),
213 ctdb_vnn_iface_string(vnn)));
217 best = ctdb_vnn_best_iface(ctdb, vnn);
219 DEBUG(DEBUG_ERR, (__location__ " public address '%s' "
220 "cannot assign to iface any iface\n",
221 ctdb_addr_to_str(&vnn->public_address)));
227 vnn->pnn = ctdb->pnn;
229 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
230 "now assigned to iface '%s' refs[%d]\n",
231 ctdb_addr_to_str(&vnn->public_address),
232 ctdb_vnn_iface_string(vnn),
237 static void ctdb_vnn_unassign_iface(struct ctdb_context *ctdb,
238 struct ctdb_vnn *vnn)
240 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
241 "now unassigned (old iface '%s' refs[%d])\n",
242 ctdb_addr_to_str(&vnn->public_address),
243 ctdb_vnn_iface_string(vnn),
244 vnn->iface?vnn->iface->references:0));
246 vnn->iface->references--;
249 if (vnn->pnn == ctdb->pnn) {
254 static bool ctdb_vnn_available(struct ctdb_context *ctdb,
255 struct ctdb_vnn *vnn)
259 /* Nodes that are not RUNNING can not host IPs */
260 if (ctdb->runstate != CTDB_RUNSTATE_RUNNING) {
264 if (vnn->delete_pending) {
268 if (vnn->iface && vnn->iface->link_up) {
272 for (i=0; vnn->ifaces[i]; i++) {
273 struct ctdb_interface *cur;
275 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
288 struct ctdb_takeover_arp {
289 struct ctdb_context *ctdb;
292 struct ctdb_tcp_array *tcparray;
293 struct ctdb_vnn *vnn;
298 lists of tcp endpoints
300 struct ctdb_tcp_list {
301 struct ctdb_tcp_list *prev, *next;
302 struct ctdb_connection connection;
306 list of clients to kill on IP release
308 struct ctdb_client_ip {
309 struct ctdb_client_ip *prev, *next;
310 struct ctdb_context *ctdb;
317 send a gratuitous arp
319 static void ctdb_control_send_arp(struct tevent_context *ev,
320 struct tevent_timer *te,
321 struct timeval t, void *private_data)
323 struct ctdb_takeover_arp *arp = talloc_get_type(private_data,
324 struct ctdb_takeover_arp);
326 struct ctdb_tcp_array *tcparray;
327 const char *iface = ctdb_vnn_iface_string(arp->vnn);
329 ret = ctdb_sys_send_arp(&arp->addr, iface);
331 DEBUG(DEBUG_CRIT,(__location__ " sending of arp failed on iface '%s' (%s)\n",
332 iface, strerror(errno)));
335 tcparray = arp->tcparray;
337 for (i=0;i<tcparray->num;i++) {
338 struct ctdb_connection *tcon;
340 tcon = &tcparray->connections[i];
341 DEBUG(DEBUG_INFO,("sending tcp tickle ack for %u->%s:%u\n",
342 (unsigned)ntohs(tcon->dst.ip.sin_port),
343 ctdb_addr_to_str(&tcon->src),
344 (unsigned)ntohs(tcon->src.ip.sin_port)));
345 ret = ctdb_sys_send_tcp(
350 DEBUG(DEBUG_CRIT,(__location__ " Failed to send tcp tickle ack for %s\n",
351 ctdb_addr_to_str(&tcon->src)));
358 if (arp->count == CTDB_ARP_REPEAT) {
363 tevent_add_timer(arp->ctdb->ev, arp->vnn->takeover_ctx,
364 timeval_current_ofs(CTDB_ARP_INTERVAL, 100000),
365 ctdb_control_send_arp, arp);
368 static int32_t ctdb_announce_vnn_iface(struct ctdb_context *ctdb,
369 struct ctdb_vnn *vnn)
371 struct ctdb_takeover_arp *arp;
372 struct ctdb_tcp_array *tcparray;
374 if (!vnn->takeover_ctx) {
375 vnn->takeover_ctx = talloc_new(vnn);
376 if (!vnn->takeover_ctx) {
381 arp = talloc_zero(vnn->takeover_ctx, struct ctdb_takeover_arp);
387 arp->addr = vnn->public_address;
390 tcparray = vnn->tcp_array;
392 /* add all of the known tcp connections for this IP to the
393 list of tcp connections to send tickle acks for */
394 arp->tcparray = talloc_steal(arp, tcparray);
396 vnn->tcp_array = NULL;
397 vnn->tcp_update_needed = true;
400 tevent_add_timer(arp->ctdb->ev, vnn->takeover_ctx,
401 timeval_zero(), ctdb_control_send_arp, arp);
406 struct takeover_callback_state {
407 struct ctdb_req_control_old *c;
408 ctdb_sock_addr *addr;
409 struct ctdb_vnn *vnn;
412 struct ctdb_do_takeip_state {
413 struct ctdb_req_control_old *c;
414 struct ctdb_vnn *vnn;
418 called when takeip event finishes
420 static void ctdb_do_takeip_callback(struct ctdb_context *ctdb, int status,
423 struct ctdb_do_takeip_state *state =
424 talloc_get_type(private_data, struct ctdb_do_takeip_state);
429 struct ctdb_node *node = ctdb->nodes[ctdb->pnn];
431 if (status == -ETIME) {
434 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
435 ctdb_addr_to_str(&state->vnn->public_address),
436 ctdb_vnn_iface_string(state->vnn)));
437 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
439 node->flags |= NODE_FLAGS_UNHEALTHY;
444 if (ctdb->do_checkpublicip) {
446 ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
448 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
455 data.dptr = (uint8_t *)ctdb_addr_to_str(&state->vnn->public_address);
456 data.dsize = strlen((char *)data.dptr) + 1;
457 DEBUG(DEBUG_INFO,(__location__ " sending TAKE_IP for '%s'\n", data.dptr));
459 ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_TAKE_IP, data);
462 /* the control succeeded */
463 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
468 static int ctdb_takeip_destructor(struct ctdb_do_takeip_state *state)
470 state->vnn->update_in_flight = false;
475 take over an ip address
477 static int32_t ctdb_do_takeip(struct ctdb_context *ctdb,
478 struct ctdb_req_control_old *c,
479 struct ctdb_vnn *vnn)
482 struct ctdb_do_takeip_state *state;
484 if (vnn->update_in_flight) {
485 DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u rejected "
486 "update for this IP already in flight\n",
487 ctdb_addr_to_str(&vnn->public_address),
488 vnn->public_netmask_bits));
492 ret = ctdb_vnn_assign_iface(ctdb, vnn);
494 DEBUG(DEBUG_ERR,("Takeover of IP %s/%u failed to "
495 "assign a usable interface\n",
496 ctdb_addr_to_str(&vnn->public_address),
497 vnn->public_netmask_bits));
501 state = talloc(vnn, struct ctdb_do_takeip_state);
502 CTDB_NO_MEMORY(ctdb, state);
504 state->c = talloc_steal(ctdb, c);
507 vnn->update_in_flight = true;
508 talloc_set_destructor(state, ctdb_takeip_destructor);
510 DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u on interface %s\n",
511 ctdb_addr_to_str(&vnn->public_address),
512 vnn->public_netmask_bits,
513 ctdb_vnn_iface_string(vnn)));
515 ret = ctdb_event_script_callback(ctdb,
517 ctdb_do_takeip_callback,
521 ctdb_vnn_iface_string(vnn),
522 ctdb_addr_to_str(&vnn->public_address),
523 vnn->public_netmask_bits);
526 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
527 ctdb_addr_to_str(&vnn->public_address),
528 ctdb_vnn_iface_string(vnn)));
536 struct ctdb_do_updateip_state {
537 struct ctdb_req_control_old *c;
538 struct ctdb_interface *old;
539 struct ctdb_vnn *vnn;
543 called when updateip event finishes
545 static void ctdb_do_updateip_callback(struct ctdb_context *ctdb, int status,
548 struct ctdb_do_updateip_state *state =
549 talloc_get_type(private_data, struct ctdb_do_updateip_state);
553 if (status == -ETIME) {
556 DEBUG(DEBUG_ERR,(__location__ " Failed to move IP %s from interface %s to %s\n",
557 ctdb_addr_to_str(&state->vnn->public_address),
559 ctdb_vnn_iface_string(state->vnn)));
562 * All we can do is reset the old interface
563 * and let the next run fix it
565 ctdb_vnn_unassign_iface(ctdb, state->vnn);
566 state->vnn->iface = state->old;
567 state->vnn->iface->references++;
569 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
574 if (ctdb->do_checkpublicip) {
576 ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
578 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
585 /* the control succeeded */
586 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
591 static int ctdb_updateip_destructor(struct ctdb_do_updateip_state *state)
593 state->vnn->update_in_flight = false;
598 update (move) an ip address
600 static int32_t ctdb_do_updateip(struct ctdb_context *ctdb,
601 struct ctdb_req_control_old *c,
602 struct ctdb_vnn *vnn)
605 struct ctdb_do_updateip_state *state;
606 struct ctdb_interface *old = vnn->iface;
607 const char *new_name;
609 if (vnn->update_in_flight) {
610 DEBUG(DEBUG_NOTICE,("Update of IP %s/%u rejected "
611 "update for this IP already in flight\n",
612 ctdb_addr_to_str(&vnn->public_address),
613 vnn->public_netmask_bits));
617 ctdb_vnn_unassign_iface(ctdb, vnn);
618 ret = ctdb_vnn_assign_iface(ctdb, vnn);
620 DEBUG(DEBUG_ERR,("update of IP %s/%u failed to "
621 "assin a usable interface (old iface '%s')\n",
622 ctdb_addr_to_str(&vnn->public_address),
623 vnn->public_netmask_bits,
628 new_name = ctdb_vnn_iface_string(vnn);
629 if (old->name != NULL && new_name != NULL && !strcmp(old->name, new_name)) {
630 /* A benign update from one interface onto itself.
631 * no need to run the eventscripts in this case, just return
634 ctdb_request_control_reply(ctdb, c, NULL, 0, NULL);
638 state = talloc(vnn, struct ctdb_do_updateip_state);
639 CTDB_NO_MEMORY(ctdb, state);
641 state->c = talloc_steal(ctdb, c);
645 vnn->update_in_flight = true;
646 talloc_set_destructor(state, ctdb_updateip_destructor);
648 DEBUG(DEBUG_NOTICE,("Update of IP %s/%u from "
649 "interface %s to %s\n",
650 ctdb_addr_to_str(&vnn->public_address),
651 vnn->public_netmask_bits,
655 ret = ctdb_event_script_callback(ctdb,
657 ctdb_do_updateip_callback,
659 CTDB_EVENT_UPDATE_IP,
663 ctdb_addr_to_str(&vnn->public_address),
664 vnn->public_netmask_bits);
666 DEBUG(DEBUG_ERR,(__location__ " Failed update IP %s from interface %s to %s\n",
667 ctdb_addr_to_str(&vnn->public_address),
668 old->name, new_name));
677 Find the vnn of the node that has a public ip address
678 returns -1 if the address is not known as a public address
680 static struct ctdb_vnn *find_public_ip_vnn(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
682 struct ctdb_vnn *vnn;
684 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
685 if (ctdb_same_ip(&vnn->public_address, addr)) {
694 take over an ip address
696 int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb,
697 struct ctdb_req_control_old *c,
702 struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
703 struct ctdb_vnn *vnn;
704 bool have_ip = false;
705 bool do_updateip = false;
706 bool do_takeip = false;
707 struct ctdb_interface *best_iface = NULL;
709 if (pip->pnn != ctdb->pnn) {
710 DEBUG(DEBUG_ERR,(__location__" takeoverip called for an ip '%s' "
711 "with pnn %d, but we're node %d\n",
712 ctdb_addr_to_str(&pip->addr),
713 pip->pnn, ctdb->pnn));
717 /* update out vnn list */
718 vnn = find_public_ip_vnn(ctdb, &pip->addr);
720 DEBUG(DEBUG_INFO,("takeoverip called for an ip '%s' that is not a public address\n",
721 ctdb_addr_to_str(&pip->addr)));
725 if (ctdb->tunable.disable_ip_failover == 0 && ctdb->do_checkpublicip) {
726 have_ip = ctdb_sys_have_ip(&pip->addr);
728 best_iface = ctdb_vnn_best_iface(ctdb, vnn);
729 if (best_iface == NULL) {
730 DEBUG(DEBUG_ERR,("takeoverip of IP %s/%u failed to find"
731 "a usable interface (old %s, have_ip %d)\n",
732 ctdb_addr_to_str(&vnn->public_address),
733 vnn->public_netmask_bits,
734 ctdb_vnn_iface_string(vnn),
739 if (vnn->iface == NULL && vnn->pnn == -1 && have_ip && best_iface != NULL) {
740 DEBUG(DEBUG_ERR,("Taking over newly created ip\n"));
745 if (vnn->iface == NULL && have_ip) {
746 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
747 "but we have no interface assigned, has someone manually configured it? Ignore for now.\n",
748 ctdb_addr_to_str(&vnn->public_address)));
752 if (vnn->pnn != ctdb->pnn && have_ip && vnn->pnn != -1) {
753 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
754 "and we have it on iface[%s], but it was assigned to node %d"
755 "and we are node %d, banning ourself\n",
756 ctdb_addr_to_str(&vnn->public_address),
757 ctdb_vnn_iface_string(vnn), vnn->pnn, ctdb->pnn));
762 if (vnn->pnn == -1 && have_ip) {
763 vnn->pnn = ctdb->pnn;
764 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
765 "and we already have it on iface[%s], update local daemon\n",
766 ctdb_addr_to_str(&vnn->public_address),
767 ctdb_vnn_iface_string(vnn)));
772 if (vnn->iface != best_iface) {
773 if (!vnn->iface->link_up) {
775 } else if (vnn->iface->references > (best_iface->references + 1)) {
776 /* only move when the rebalance gains something */
784 ctdb_vnn_unassign_iface(ctdb, vnn);
791 ret = ctdb_do_takeip(ctdb, c, vnn);
795 } else if (do_updateip) {
796 ret = ctdb_do_updateip(ctdb, c, vnn);
802 * The interface is up and the kernel known the ip
805 DEBUG(DEBUG_INFO,("Redundant takeover of IP %s/%u on interface %s (ip already held)\n",
806 ctdb_addr_to_str(&pip->addr),
807 vnn->public_netmask_bits,
808 ctdb_vnn_iface_string(vnn)));
812 /* tell ctdb_control.c that we will be replying asynchronously */
819 kill any clients that are registered with a IP that is being released
821 static void release_kill_clients(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
823 struct ctdb_client_ip *ip;
825 DEBUG(DEBUG_INFO,("release_kill_clients for ip %s\n",
826 ctdb_addr_to_str(addr)));
828 for (ip=ctdb->client_ip_list; ip; ip=ip->next) {
829 ctdb_sock_addr tmp_addr;
832 DEBUG(DEBUG_INFO,("checking for client %u with IP %s\n",
834 ctdb_addr_to_str(&ip->addr)));
836 if (ctdb_same_ip(&tmp_addr, addr)) {
837 struct ctdb_client *client = reqid_find(ctdb->idr,
840 DEBUG(DEBUG_INFO,("matched client %u with IP %s and pid %u\n",
842 ctdb_addr_to_str(&ip->addr),
845 if (client->pid != 0) {
846 DEBUG(DEBUG_INFO,(__location__ " Killing client pid %u for IP %s on client_id %u\n",
847 (unsigned)client->pid,
848 ctdb_addr_to_str(addr),
850 kill(client->pid, SIGKILL);
856 static void do_delete_ip(struct ctdb_context *ctdb, struct ctdb_vnn *vnn)
858 DLIST_REMOVE(ctdb->vnn, vnn);
859 ctdb_vnn_unassign_iface(ctdb, vnn);
860 ctdb_remove_orphaned_ifaces(ctdb, vnn);
865 called when releaseip event finishes
867 static void release_ip_callback(struct ctdb_context *ctdb, int status,
870 struct takeover_callback_state *state =
871 talloc_get_type(private_data, struct takeover_callback_state);
874 if (status == -ETIME) {
878 if (ctdb->tunable.disable_ip_failover == 0 && ctdb->do_checkpublicip) {
879 if (ctdb_sys_have_ip(state->addr)) {
881 ("IP %s still hosted during release IP callback, failing\n",
882 ctdb_addr_to_str(state->addr)));
883 ctdb_request_control_reply(ctdb, state->c,
890 /* send a message to all clients of this node telling them
891 that the cluster has been reconfigured and they should
892 release any sockets on this IP */
893 data.dptr = (uint8_t *)talloc_strdup(state, ctdb_addr_to_str(state->addr));
894 CTDB_NO_MEMORY_VOID(ctdb, data.dptr);
895 data.dsize = strlen((char *)data.dptr)+1;
897 DEBUG(DEBUG_INFO,(__location__ " sending RELEASE_IP for '%s'\n", data.dptr));
899 ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_RELEASE_IP, data);
901 /* kill clients that have registered with this IP */
902 release_kill_clients(ctdb, state->addr);
904 ctdb_vnn_unassign_iface(ctdb, state->vnn);
906 /* Process the IP if it has been marked for deletion */
907 if (state->vnn->delete_pending) {
908 do_delete_ip(ctdb, state->vnn);
912 /* the control succeeded */
913 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
917 static int ctdb_releaseip_destructor(struct takeover_callback_state *state)
919 if (state->vnn != NULL) {
920 state->vnn->update_in_flight = false;
926 release an ip address
928 int32_t ctdb_control_release_ip(struct ctdb_context *ctdb,
929 struct ctdb_req_control_old *c,
934 struct takeover_callback_state *state;
935 struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
936 struct ctdb_vnn *vnn;
939 /* update our vnn list */
940 vnn = find_public_ip_vnn(ctdb, &pip->addr);
942 DEBUG(DEBUG_INFO,("releaseip called for an ip '%s' that is not a public address\n",
943 ctdb_addr_to_str(&pip->addr)));
948 /* stop any previous arps */
949 talloc_free(vnn->takeover_ctx);
950 vnn->takeover_ctx = NULL;
952 /* Some ctdb tool commands (e.g. moveip, rebalanceip) send
953 * lazy multicast to drop an IP from any node that isn't the
954 * intended new node. The following causes makes ctdbd ignore
955 * a release for any address it doesn't host.
957 if (ctdb->tunable.disable_ip_failover == 0 && ctdb->do_checkpublicip) {
958 if (!ctdb_sys_have_ip(&pip->addr)) {
959 DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u on interface %s (ip not held)\n",
960 ctdb_addr_to_str(&pip->addr),
961 vnn->public_netmask_bits,
962 ctdb_vnn_iface_string(vnn)));
963 ctdb_vnn_unassign_iface(ctdb, vnn);
967 if (vnn->iface == NULL) {
968 DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u (ip not held)\n",
969 ctdb_addr_to_str(&pip->addr),
970 vnn->public_netmask_bits));
975 /* There is a potential race between take_ip and us because we
976 * update the VNN via a callback that run when the
977 * eventscripts have been run. Avoid the race by allowing one
978 * update to be in flight at a time.
980 if (vnn->update_in_flight) {
981 DEBUG(DEBUG_NOTICE,("Release of IP %s/%u rejected "
982 "update for this IP already in flight\n",
983 ctdb_addr_to_str(&vnn->public_address),
984 vnn->public_netmask_bits));
988 iface = strdup(ctdb_vnn_iface_string(vnn));
990 DEBUG(DEBUG_NOTICE,("Release of IP %s/%u on interface %s node:%d\n",
991 ctdb_addr_to_str(&pip->addr),
992 vnn->public_netmask_bits,
996 state = talloc(ctdb, struct takeover_callback_state);
998 ctdb_set_error(ctdb, "Out of memory at %s:%d",
1004 state->c = talloc_steal(state, c);
1005 state->addr = talloc(state, ctdb_sock_addr);
1006 if (state->addr == NULL) {
1007 ctdb_set_error(ctdb, "Out of memory at %s:%d",
1008 __FILE__, __LINE__);
1013 *state->addr = pip->addr;
1016 vnn->update_in_flight = true;
1017 talloc_set_destructor(state, ctdb_releaseip_destructor);
1019 ret = ctdb_event_script_callback(ctdb,
1020 state, release_ip_callback, state,
1021 CTDB_EVENT_RELEASE_IP,
1024 ctdb_addr_to_str(&pip->addr),
1025 vnn->public_netmask_bits);
1028 DEBUG(DEBUG_ERR,(__location__ " Failed to release IP %s on interface %s\n",
1029 ctdb_addr_to_str(&pip->addr),
1030 ctdb_vnn_iface_string(vnn)));
1035 /* tell the control that we will be reply asynchronously */
1036 *async_reply = true;
1040 static int ctdb_add_public_address(struct ctdb_context *ctdb,
1041 ctdb_sock_addr *addr,
1042 unsigned mask, const char *ifaces,
1045 struct ctdb_vnn *vnn;
1052 tmp = strdup(ifaces);
1053 for (iface = strtok(tmp, ","); iface; iface = strtok(NULL, ",")) {
1054 if (!ctdb_sys_check_iface_exists(iface)) {
1055 DEBUG(DEBUG_CRIT,("Interface %s does not exist. Can not add public-address : %s\n", iface, ctdb_addr_to_str(addr)));
1062 /* Verify that we don't have an entry for this ip yet */
1063 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1064 if (ctdb_same_sockaddr(addr, &vnn->public_address)) {
1065 DEBUG(DEBUG_CRIT,("Same ip '%s' specified multiple times in the public address list \n",
1066 ctdb_addr_to_str(addr)));
1071 /* create a new vnn structure for this ip address */
1072 vnn = talloc_zero(ctdb, struct ctdb_vnn);
1073 CTDB_NO_MEMORY_FATAL(ctdb, vnn);
1074 vnn->ifaces = talloc_array(vnn, const char *, num + 2);
1075 tmp = talloc_strdup(vnn, ifaces);
1076 CTDB_NO_MEMORY_FATAL(ctdb, tmp);
1077 for (iface = strtok(tmp, ","); iface; iface = strtok(NULL, ",")) {
1078 vnn->ifaces = talloc_realloc(vnn, vnn->ifaces, const char *, num + 2);
1079 CTDB_NO_MEMORY_FATAL(ctdb, vnn->ifaces);
1080 vnn->ifaces[num] = talloc_strdup(vnn, iface);
1081 CTDB_NO_MEMORY_FATAL(ctdb, vnn->ifaces[num]);
1085 vnn->ifaces[num] = NULL;
1086 vnn->public_address = *addr;
1087 vnn->public_netmask_bits = mask;
1089 if (check_address) {
1090 if (ctdb_sys_have_ip(addr)) {
1091 DEBUG(DEBUG_ERR,("We are already hosting public address '%s'. setting PNN to ourself:%d\n", ctdb_addr_to_str(addr), ctdb->pnn));
1092 vnn->pnn = ctdb->pnn;
1096 for (i=0; vnn->ifaces[i]; i++) {
1097 ret = ctdb_add_local_iface(ctdb, vnn->ifaces[i]);
1099 DEBUG(DEBUG_CRIT, (__location__ " failed to add iface[%s] "
1100 "for public_address[%s]\n",
1101 vnn->ifaces[i], ctdb_addr_to_str(addr)));
1107 DLIST_ADD(ctdb->vnn, vnn);
1113 setup the public address lists from a file
1115 int ctdb_set_public_addresses(struct ctdb_context *ctdb, bool check_addresses)
1121 lines = file_lines_load(ctdb->public_addresses_file, &nlines, 0, ctdb);
1122 if (lines == NULL) {
1123 ctdb_set_error(ctdb, "Failed to load public address list '%s'\n", ctdb->public_addresses_file);
1126 while (nlines > 0 && strcmp(lines[nlines-1], "") == 0) {
1130 for (i=0;i<nlines;i++) {
1132 ctdb_sock_addr addr;
1133 const char *addrstr;
1138 while ((*line == ' ') || (*line == '\t')) {
1144 if (strcmp(line, "") == 0) {
1147 tok = strtok(line, " \t");
1149 tok = strtok(NULL, " \t");
1151 if (NULL == ctdb->default_public_interface) {
1152 DEBUG(DEBUG_CRIT,("No default public interface and no interface specified at line %u of public address list\n",
1157 ifaces = ctdb->default_public_interface;
1162 if (!addrstr || !parse_ip_mask(addrstr, ifaces, &addr, &mask)) {
1163 DEBUG(DEBUG_CRIT,("Badly formed line %u in public address list\n", i+1));
1167 if (ctdb_add_public_address(ctdb, &addr, mask, ifaces, check_addresses)) {
1168 DEBUG(DEBUG_CRIT,("Failed to add line %u to the public address list\n", i+1));
1179 int ctdb_set_single_public_ip(struct ctdb_context *ctdb,
1183 struct ctdb_vnn *svnn;
1184 struct ctdb_interface *cur = NULL;
1188 svnn = talloc_zero(ctdb, struct ctdb_vnn);
1189 CTDB_NO_MEMORY(ctdb, svnn);
1191 svnn->ifaces = talloc_array(svnn, const char *, 2);
1192 CTDB_NO_MEMORY(ctdb, svnn->ifaces);
1193 svnn->ifaces[0] = talloc_strdup(svnn->ifaces, iface);
1194 CTDB_NO_MEMORY(ctdb, svnn->ifaces[0]);
1195 svnn->ifaces[1] = NULL;
1197 ok = parse_ip(ip, iface, 0, &svnn->public_address);
1203 ret = ctdb_add_local_iface(ctdb, svnn->ifaces[0]);
1205 DEBUG(DEBUG_CRIT, (__location__ " failed to add iface[%s] "
1206 "for single_ip[%s]\n",
1208 ctdb_addr_to_str(&svnn->public_address)));
1213 /* assume the single public ip interface is initially "good" */
1214 cur = ctdb_find_iface(ctdb, iface);
1216 DEBUG(DEBUG_CRIT,("Can not find public interface %s used by --single-public-ip", iface));
1219 cur->link_up = true;
1221 ret = ctdb_vnn_assign_iface(ctdb, svnn);
1227 ctdb->single_ip_vnn = svnn;
1231 static void *add_ip_callback(void *parm, void *data)
1233 struct public_ip_list *this_ip = parm;
1234 struct public_ip_list *prev_ip = data;
1236 if (prev_ip == NULL) {
1239 if (this_ip->pnn == -1) {
1240 this_ip->pnn = prev_ip->pnn;
1246 static int getips_count_callback(void *param, void *data)
1248 struct public_ip_list **ip_list = (struct public_ip_list **)param;
1249 struct public_ip_list *new_ip = (struct public_ip_list *)data;
1251 new_ip->next = *ip_list;
1256 static int verify_remote_ip_allocation(struct ctdb_context *ctdb,
1257 struct ctdb_public_ip_list *ips,
1260 static int ctdb_reload_remote_public_ips(struct ctdb_context *ctdb,
1261 struct ipalloc_state *ipalloc_state,
1262 struct ctdb_node_map_old *nodemap)
1266 struct ctdb_public_ip_list_old *ip_list;
1268 if (ipalloc_state->num != nodemap->num) {
1271 " ipalloc_state->num (%d) != nodemap->num (%d) invalid param\n",
1272 ipalloc_state->num, nodemap->num));
1276 for (j=0; j<nodemap->num; j++) {
1277 if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) {
1281 /* Retrieve the list of known public IPs from the node */
1282 ret = ctdb_ctrl_get_public_ips_flags(ctdb,
1285 ipalloc_state->known_public_ips,
1290 ("Failed to read known public IPs from node: %u\n",
1294 ipalloc_state->known_public_ips[j].num = ip_list->num;
1295 /* This could be copied and freed. However, ip_list
1296 * is allocated off ipalloc_state->known_public_ips,
1297 * so this is a safe hack. This will go away in a
1298 * while anyway... */
1299 ipalloc_state->known_public_ips[j].ip = &ip_list->ips[0];
1301 if (ctdb->do_checkpublicip) {
1302 verify_remote_ip_allocation(
1304 &ipalloc_state->known_public_ips[j],
1308 /* Retrieve the list of available public IPs from the node */
1309 ret = ctdb_ctrl_get_public_ips_flags(ctdb,
1312 ipalloc_state->available_public_ips,
1313 CTDB_PUBLIC_IP_FLAGS_ONLY_AVAILABLE,
1317 ("Failed to read available public IPs from node: %u\n",
1321 ipalloc_state->available_public_ips[j].num = ip_list->num;
1322 /* This could be copied and freed. However, ip_list
1323 * is allocated off ipalloc_state->available_public_ips,
1324 * so this is a safe hack. This will go away in a
1325 * while anyway... */
1326 ipalloc_state->available_public_ips[j].ip = &ip_list->ips[0];
1332 static struct public_ip_list *
1333 create_merged_ip_list(struct ctdb_context *ctdb, struct ipalloc_state *ipalloc_state)
1336 struct public_ip_list *ip_list;
1337 struct ctdb_public_ip_list *public_ips;
1339 TALLOC_FREE(ctdb->ip_tree);
1340 ctdb->ip_tree = trbt_create(ctdb, 0);
1342 for (i=0; i < ctdb->num_nodes; i++) {
1344 if (ctdb->nodes[i]->flags & NODE_FLAGS_DELETED) {
1348 /* there were no public ips for this node */
1349 if (ipalloc_state->known_public_ips == NULL) {
1353 public_ips = &ipalloc_state->known_public_ips[i];
1355 for (j=0; j < public_ips->num; j++) {
1356 struct public_ip_list *tmp_ip;
1358 tmp_ip = talloc_zero(ctdb->ip_tree, struct public_ip_list);
1359 CTDB_NO_MEMORY_NULL(ctdb, tmp_ip);
1360 /* Do not use information about IP addresses hosted
1361 * on other nodes, it may not be accurate */
1362 if (public_ips->ip[j].pnn == ctdb->nodes[i]->pnn) {
1363 tmp_ip->pnn = public_ips->ip[j].pnn;
1367 tmp_ip->addr = public_ips->ip[j].addr;
1368 tmp_ip->next = NULL;
1370 trbt_insertarray32_callback(ctdb->ip_tree,
1371 IP_KEYLEN, ip_key(&public_ips->ip[j].addr),
1378 trbt_traversearray32(ctdb->ip_tree, IP_KEYLEN, getips_count_callback, &ip_list);
1383 static bool all_nodes_are_disabled(struct ctdb_node_map_old *nodemap)
1387 for (i=0;i<nodemap->num;i++) {
1388 if (!(nodemap->nodes[i].flags & (NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED))) {
1389 /* Found one completely healthy node */
1397 struct get_tunable_callback_data {
1398 const char *tunable;
1403 static void get_tunable_callback(struct ctdb_context *ctdb, uint32_t pnn,
1404 int32_t res, TDB_DATA outdata,
1407 struct get_tunable_callback_data *cd =
1408 (struct get_tunable_callback_data *)callback;
1412 /* Already handled in fail callback */
1416 if (outdata.dsize != sizeof(uint32_t)) {
1417 DEBUG(DEBUG_ERR,("Wrong size of returned data when reading \"%s\" tunable from node %d. Expected %d bytes but received %d bytes\n",
1418 cd->tunable, pnn, (int)sizeof(uint32_t),
1419 (int)outdata.dsize));
1424 size = talloc_array_length(cd->out);
1426 DEBUG(DEBUG_ERR,("Got %s reply from node %d but nodemap only has %d entries\n",
1427 cd->tunable, pnn, size));
1432 cd->out[pnn] = *(uint32_t *)outdata.dptr;
1435 static void get_tunable_fail_callback(struct ctdb_context *ctdb, uint32_t pnn,
1436 int32_t res, TDB_DATA outdata,
1439 struct get_tunable_callback_data *cd =
1440 (struct get_tunable_callback_data *)callback;
1445 ("Timed out getting tunable \"%s\" from node %d\n",
1451 DEBUG(DEBUG_WARNING,
1452 ("Tunable \"%s\" not implemented on node %d\n",
1457 ("Unexpected error getting tunable \"%s\" from node %d\n",
1463 static uint32_t *get_tunable_from_nodes(struct ctdb_context *ctdb,
1464 TALLOC_CTX *tmp_ctx,
1465 struct ctdb_node_map_old *nodemap,
1466 const char *tunable,
1467 uint32_t default_value)
1470 struct ctdb_control_get_tunable *t;
1473 struct get_tunable_callback_data callback_data;
1476 tvals = talloc_array(tmp_ctx, uint32_t, nodemap->num);
1477 CTDB_NO_MEMORY_NULL(ctdb, tvals);
1478 for (i=0; i<nodemap->num; i++) {
1479 tvals[i] = default_value;
1482 callback_data.out = tvals;
1483 callback_data.tunable = tunable;
1484 callback_data.fatal = false;
1486 data.dsize = offsetof(struct ctdb_control_get_tunable, name) + strlen(tunable) + 1;
1487 data.dptr = talloc_size(tmp_ctx, data.dsize);
1488 t = (struct ctdb_control_get_tunable *)data.dptr;
1489 t->length = strlen(tunable)+1;
1490 memcpy(t->name, tunable, t->length);
1491 nodes = list_of_connected_nodes(ctdb, nodemap, tmp_ctx, true);
1492 if (ctdb_client_async_control(ctdb, CTDB_CONTROL_GET_TUNABLE,
1493 nodes, 0, TAKEOVER_TIMEOUT(),
1495 get_tunable_callback,
1496 get_tunable_fail_callback,
1497 &callback_data) != 0) {
1498 if (callback_data.fatal) {
1504 talloc_free(data.dptr);
1509 /* Set internal flags for IP allocation:
1511 * Set NOIPTAKOVER ip flags from per-node NoIPTakeover tunable
1512 * Set NOIPHOST ip flag for each INACTIVE node
1513 * if all nodes are disabled:
1514 * Set NOIPHOST ip flags from per-node NoIPHostOnAllDisabled tunable
1516 * Set NOIPHOST ip flags for disabled nodes
1518 static void set_ipflags_internal(struct ipalloc_state *ipalloc_state,
1519 struct ctdb_node_map_old *nodemap,
1520 uint32_t *tval_noiptakeover,
1521 uint32_t *tval_noiphostonalldisabled)
1525 for (i=0;i<nodemap->num;i++) {
1526 /* Can not take IPs on node with NoIPTakeover set */
1527 if (tval_noiptakeover[i] != 0) {
1528 ipalloc_state->noiptakeover[i] = true;
1531 /* Can not host IPs on INACTIVE node */
1532 if (nodemap->nodes[i].flags & NODE_FLAGS_INACTIVE) {
1533 ipalloc_state->noiphost[i] = true;
1537 if (all_nodes_are_disabled(nodemap)) {
1538 /* If all nodes are disabled, can not host IPs on node
1539 * with NoIPHostOnAllDisabled set
1541 for (i=0;i<nodemap->num;i++) {
1542 if (tval_noiphostonalldisabled[i] != 0) {
1543 ipalloc_state->noiphost[i] = true;
1547 /* If some nodes are not disabled, then can not host
1548 * IPs on DISABLED node
1550 for (i=0;i<nodemap->num;i++) {
1551 if (nodemap->nodes[i].flags & NODE_FLAGS_DISABLED) {
1552 ipalloc_state->noiphost[i] = true;
1558 static bool set_ipflags(struct ctdb_context *ctdb,
1559 struct ipalloc_state *ipalloc_state,
1560 struct ctdb_node_map_old *nodemap)
1562 uint32_t *tval_noiptakeover;
1563 uint32_t *tval_noiphostonalldisabled;
1565 tval_noiptakeover = get_tunable_from_nodes(ctdb, ipalloc_state, nodemap,
1567 if (tval_noiptakeover == NULL) {
1571 tval_noiphostonalldisabled =
1572 get_tunable_from_nodes(ctdb, ipalloc_state, nodemap,
1573 "NoIPHostOnAllDisabled", 0);
1574 if (tval_noiphostonalldisabled == NULL) {
1575 /* Caller frees tmp_ctx */
1579 set_ipflags_internal(ipalloc_state, nodemap,
1581 tval_noiphostonalldisabled);
1583 talloc_free(tval_noiptakeover);
1584 talloc_free(tval_noiphostonalldisabled);
1589 static struct ipalloc_state * ipalloc_state_init(struct ctdb_context *ctdb,
1590 TALLOC_CTX *mem_ctx)
1592 struct ipalloc_state *ipalloc_state =
1593 talloc_zero(mem_ctx, struct ipalloc_state);
1594 if (ipalloc_state == NULL) {
1595 DEBUG(DEBUG_ERR, (__location__ " Out of memory\n"));
1599 ipalloc_state->num = ctdb->num_nodes;
1601 ipalloc_state->known_public_ips =
1602 talloc_zero_array(ipalloc_state,
1603 struct ctdb_public_ip_list,
1604 ipalloc_state->num);
1605 if (ipalloc_state->known_public_ips == NULL) {
1606 DEBUG(DEBUG_ERR, (__location__ " Out of memory\n"));
1610 ipalloc_state->available_public_ips =
1611 talloc_zero_array(ipalloc_state,
1612 struct ctdb_public_ip_list,
1613 ipalloc_state->num);
1614 if (ipalloc_state->available_public_ips == NULL) {
1615 DEBUG(DEBUG_ERR, (__location__ " Out of memory\n"));
1618 ipalloc_state->noiptakeover =
1619 talloc_zero_array(ipalloc_state,
1621 ipalloc_state->num);
1622 if (ipalloc_state->noiptakeover == NULL) {
1623 DEBUG(DEBUG_ERR, (__location__ " Out of memory\n"));
1626 ipalloc_state->noiphost =
1627 talloc_zero_array(ipalloc_state,
1629 ipalloc_state->num);
1630 if (ipalloc_state->noiphost == NULL) {
1631 DEBUG(DEBUG_ERR, (__location__ " Out of memory\n"));
1635 if (1 == ctdb->tunable.lcp2_public_ip_assignment) {
1636 ipalloc_state->algorithm = IPALLOC_LCP2;
1637 } else if (1 == ctdb->tunable.deterministic_public_ips) {
1638 ipalloc_state->algorithm = IPALLOC_DETERMINISTIC;
1640 ipalloc_state->algorithm = IPALLOC_NONDETERMINISTIC;
1643 ipalloc_state->no_ip_failback = ctdb->tunable.no_ip_failback;
1645 return ipalloc_state;
1647 talloc_free(ipalloc_state);
1651 struct iprealloc_callback_data {
1654 client_async_callback fail_callback;
1655 void *fail_callback_data;
1656 struct ctdb_node_map_old *nodemap;
1659 static void iprealloc_fail_callback(struct ctdb_context *ctdb, uint32_t pnn,
1660 int32_t res, TDB_DATA outdata,
1664 struct iprealloc_callback_data *cd =
1665 (struct iprealloc_callback_data *)callback;
1667 numnodes = talloc_array_length(cd->retry_nodes);
1668 if (pnn > numnodes) {
1670 ("ipreallocated failure from node %d, "
1671 "but only %d nodes in nodemap\n",
1676 /* Can't run the "ipreallocated" event on a INACTIVE node */
1677 if (cd->nodemap->nodes[pnn].flags & NODE_FLAGS_INACTIVE) {
1678 DEBUG(DEBUG_WARNING,
1679 ("ipreallocated failed on inactive node %d, ignoring\n",
1686 /* If the control timed out then that's a real error,
1687 * so call the real fail callback
1689 if (cd->fail_callback) {
1690 cd->fail_callback(ctdb, pnn, res, outdata,
1691 cd->fail_callback_data);
1693 DEBUG(DEBUG_WARNING,
1694 ("iprealloc timed out but no callback registered\n"));
1698 /* If not a timeout then either the ipreallocated
1699 * eventscript (or some setup) failed. This might
1700 * have failed because the IPREALLOCATED control isn't
1701 * implemented - right now there is no way of knowing
1702 * because the error codes are all folded down to -1.
1703 * Consider retrying using EVENTSCRIPT control...
1705 DEBUG(DEBUG_WARNING,
1706 ("ipreallocated failure from node %d, flagging retry\n",
1708 cd->retry_nodes[pnn] = true;
1713 struct takeover_callback_data {
1715 client_async_callback fail_callback;
1716 void *fail_callback_data;
1717 struct ctdb_node_map_old *nodemap;
1720 static void takeover_run_fail_callback(struct ctdb_context *ctdb,
1721 uint32_t node_pnn, int32_t res,
1722 TDB_DATA outdata, void *callback_data)
1724 struct takeover_callback_data *cd =
1725 talloc_get_type_abort(callback_data,
1726 struct takeover_callback_data);
1729 for (i = 0; i < cd->nodemap->num; i++) {
1730 if (node_pnn == cd->nodemap->nodes[i].pnn) {
1735 if (i == cd->nodemap->num) {
1736 DEBUG(DEBUG_ERR, (__location__ " invalid PNN %u\n", node_pnn));
1740 if (!cd->node_failed[i]) {
1741 cd->node_failed[i] = true;
1742 cd->fail_callback(ctdb, node_pnn, res, outdata,
1743 cd->fail_callback_data);
1748 * Recalculate the allocation of public IPs to nodes and have the
1749 * nodes host their allocated addresses.
1751 * - Allocate memory for IP allocation state, including per node
1753 * - Populate IP allocation algorithm in IP allocation state
1754 * - Populate local value of tunable NoIPFailback in IP allocation
1755 state - this is really a cluster-wide configuration variable and
1756 only the value form the master node is used
1757 * - Retrieve tunables NoIPTakeover and NoIPHostOnAllDisabled from all
1758 * connected nodes - this is done separately so tunable values can
1759 * be faked in unit testing
1760 * - Populate NoIPTakover tunable in IP allocation state
1761 * - Populate NoIPHost in IP allocation state, derived from node flags
1762 * and NoIPHostOnAllDisabled tunable
1763 * - Retrieve and populate known and available IP lists in IP
1765 * - If no available IP addresses then early exit
1766 * - Build list of (known IPs, currently assigned node)
1767 * - Populate list of nodes to force rebalance - internal structure,
1768 * currently no way to fetch, only used by LCP2 for nodes that have
1769 * had new IP addresses added
1770 * - Run IP allocation algorithm
1771 * - Send RELEASE_IP to all nodes for IPs they should not host
1772 * - Send TAKE_IP to all nodes for IPs they should host
1773 * - Send IPREALLOCATED to all nodes (with backward compatibility hack)
1775 int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map_old *nodemap,
1776 uint32_t *force_rebalance_nodes,
1777 client_async_callback fail_callback, void *callback_data)
1780 struct ctdb_public_ip ip;
1782 struct public_ip_list *all_ips, *tmp_ip;
1784 struct timeval timeout;
1785 struct client_async_data *async_data;
1786 struct ctdb_client_control_state *state;
1787 TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
1788 struct ipalloc_state *ipalloc_state;
1789 struct takeover_callback_data *takeover_data;
1790 struct iprealloc_callback_data iprealloc_data;
1795 * ip failover is completely disabled, just send out the
1796 * ipreallocated event.
1798 if (ctdb->tunable.disable_ip_failover != 0) {
1802 ipalloc_state = ipalloc_state_init(ctdb, tmp_ctx);
1803 if (ipalloc_state == NULL) {
1804 talloc_free(tmp_ctx);
1808 if (!set_ipflags(ctdb, ipalloc_state, nodemap)) {
1809 DEBUG(DEBUG_ERR,("Failed to set IP flags - aborting takeover run\n"));
1810 talloc_free(tmp_ctx);
1814 /* Fetch known/available public IPs from each active node */
1815 ret = ctdb_reload_remote_public_ips(ctdb, ipalloc_state, nodemap);
1817 talloc_free(tmp_ctx);
1821 /* Short-circuit IP allocation if no node has available IPs */
1822 can_host_ips = false;
1823 for (i=0; i < ipalloc_state->num; i++) {
1824 if (ipalloc_state->available_public_ips[i].num != 0) {
1825 can_host_ips = true;
1828 if (!can_host_ips) {
1829 DEBUG(DEBUG_WARNING,("No nodes available to host public IPs yet\n"));
1833 /* since nodes only know about those public addresses that
1834 can be served by that particular node, no single node has
1835 a full list of all public addresses that exist in the cluster.
1836 Walk over all node structures and create a merged list of
1837 all public addresses that exist in the cluster.
1839 keep the tree of ips around as ctdb->ip_tree
1841 all_ips = create_merged_ip_list(ctdb, ipalloc_state);
1842 ipalloc_state->all_ips = all_ips;
1844 ipalloc_state->force_rebalance_nodes = force_rebalance_nodes;
1846 /* Do the IP reassignment calculations */
1847 ipalloc(ipalloc_state);
1849 /* Now tell all nodes to release any public IPs should not
1850 * host. This will be a NOOP on nodes that don't currently
1851 * hold the given IP.
1853 takeover_data = talloc_zero(tmp_ctx, struct takeover_callback_data);
1854 CTDB_NO_MEMORY_FATAL(ctdb, takeover_data);
1856 takeover_data->node_failed = talloc_zero_array(tmp_ctx,
1857 bool, nodemap->num);
1858 CTDB_NO_MEMORY_FATAL(ctdb, takeover_data->node_failed);
1859 takeover_data->fail_callback = fail_callback;
1860 takeover_data->fail_callback_data = callback_data;
1861 takeover_data->nodemap = nodemap;
1863 async_data = talloc_zero(tmp_ctx, struct client_async_data);
1864 CTDB_NO_MEMORY_FATAL(ctdb, async_data);
1866 async_data->fail_callback = takeover_run_fail_callback;
1867 async_data->callback_data = takeover_data;
1869 ZERO_STRUCT(ip); /* Avoid valgrind warnings for union */
1871 /* Send a RELEASE_IP to all nodes that should not be hosting
1872 * each IP. For each IP, all but one of these will be
1873 * redundant. However, the redundant ones are used to tell
1874 * nodes which node should be hosting the IP so that commands
1875 * like "ctdb ip" can display a particular nodes idea of who
1876 * is hosting what. */
1877 for (i=0;i<nodemap->num;i++) {
1878 /* don't talk to unconnected nodes, but do talk to banned nodes */
1879 if (nodemap->nodes[i].flags & NODE_FLAGS_DISCONNECTED) {
1883 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1884 if (tmp_ip->pnn == nodemap->nodes[i].pnn) {
1885 /* This node should be serving this
1886 vnn so don't tell it to release the ip
1890 ip.pnn = tmp_ip->pnn;
1891 ip.addr = tmp_ip->addr;
1893 timeout = TAKEOVER_TIMEOUT();
1894 data.dsize = sizeof(ip);
1895 data.dptr = (uint8_t *)&ip;
1896 state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
1897 0, CTDB_CONTROL_RELEASE_IP, 0,
1900 if (state == NULL) {
1901 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_RELEASE_IP to node %u\n", nodemap->nodes[i].pnn));
1902 talloc_free(tmp_ctx);
1906 ctdb_client_async_add(async_data, state);
1909 if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1910 DEBUG(DEBUG_ERR,(__location__ " Async control CTDB_CONTROL_RELEASE_IP failed\n"));
1911 talloc_free(tmp_ctx);
1914 talloc_free(async_data);
1917 /* For each IP, send a TAKOVER_IP to the node that should be
1918 * hosting it. Many of these will often be redundant (since
1919 * the allocation won't have changed) but they can be useful
1920 * to recover from inconsistencies. */
1921 async_data = talloc_zero(tmp_ctx, struct client_async_data);
1922 CTDB_NO_MEMORY_FATAL(ctdb, async_data);
1924 async_data->fail_callback = fail_callback;
1925 async_data->callback_data = callback_data;
1927 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1928 if (tmp_ip->pnn == -1) {
1929 /* this IP won't be taken over */
1933 ip.pnn = tmp_ip->pnn;
1934 ip.addr = tmp_ip->addr;
1936 timeout = TAKEOVER_TIMEOUT();
1937 data.dsize = sizeof(ip);
1938 data.dptr = (uint8_t *)&ip;
1939 state = ctdb_control_send(ctdb, tmp_ip->pnn,
1940 0, CTDB_CONTROL_TAKEOVER_IP, 0,
1941 data, async_data, &timeout, NULL);
1942 if (state == NULL) {
1943 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_TAKEOVER_IP to node %u\n", tmp_ip->pnn));
1944 talloc_free(tmp_ctx);
1948 ctdb_client_async_add(async_data, state);
1950 if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1951 DEBUG(DEBUG_ERR,(__location__ " Async control CTDB_CONTROL_TAKEOVER_IP failed\n"));
1952 talloc_free(tmp_ctx);
1958 * Tell all nodes to run eventscripts to process the
1959 * "ipreallocated" event. This can do a lot of things,
1960 * including restarting services to reconfigure them if public
1961 * IPs have moved. Once upon a time this event only used to
1964 retry_data = talloc_zero_array(tmp_ctx, bool, nodemap->num);
1965 CTDB_NO_MEMORY_FATAL(ctdb, retry_data);
1966 iprealloc_data.retry_nodes = retry_data;
1967 iprealloc_data.retry_count = 0;
1968 iprealloc_data.fail_callback = fail_callback;
1969 iprealloc_data.fail_callback_data = callback_data;
1970 iprealloc_data.nodemap = nodemap;
1972 nodes = list_of_connected_nodes(ctdb, nodemap, tmp_ctx, true);
1973 ret = ctdb_client_async_control(ctdb, CTDB_CONTROL_IPREALLOCATED,
1974 nodes, 0, TAKEOVER_TIMEOUT(),
1976 NULL, iprealloc_fail_callback,
1979 /* If the control failed then we should retry to any
1980 * nodes flagged by iprealloc_fail_callback using the
1981 * EVENTSCRIPT control. This is a best-effort at
1982 * backward compatiblity when running a mixed cluster
1983 * where some nodes have not yet been upgraded to
1984 * support the IPREALLOCATED control.
1986 DEBUG(DEBUG_WARNING,
1987 ("Retry ipreallocated to some nodes using eventscript control\n"));
1989 nodes = talloc_array(tmp_ctx, uint32_t,
1990 iprealloc_data.retry_count);
1991 CTDB_NO_MEMORY_FATAL(ctdb, nodes);
1994 for (i=0; i<nodemap->num; i++) {
1995 if (iprealloc_data.retry_nodes[i]) {
2001 data.dptr = discard_const("ipreallocated");
2002 data.dsize = strlen((char *)data.dptr) + 1;
2003 ret = ctdb_client_async_control(ctdb,
2004 CTDB_CONTROL_RUN_EVENTSCRIPTS,
2005 nodes, 0, TAKEOVER_TIMEOUT(),
2007 NULL, fail_callback,
2010 DEBUG(DEBUG_ERR, (__location__ " failed to send control to run eventscripts with \"ipreallocated\"\n"));
2014 talloc_free(tmp_ctx);
2020 destroy a ctdb_client_ip structure
2022 static int ctdb_client_ip_destructor(struct ctdb_client_ip *ip)
2024 DEBUG(DEBUG_DEBUG,("destroying client tcp for %s:%u (client_id %u)\n",
2025 ctdb_addr_to_str(&ip->addr),
2026 ntohs(ip->addr.ip.sin_port),
2029 DLIST_REMOVE(ip->ctdb->client_ip_list, ip);
2034 called by a client to inform us of a TCP connection that it is managing
2035 that should tickled with an ACK when IP takeover is done
2037 int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id,
2040 struct ctdb_client *client = reqid_find(ctdb->idr, client_id, struct ctdb_client);
2041 struct ctdb_connection *tcp_sock = NULL;
2042 struct ctdb_tcp_list *tcp;
2043 struct ctdb_connection t;
2046 struct ctdb_client_ip *ip;
2047 struct ctdb_vnn *vnn;
2048 ctdb_sock_addr addr;
2050 /* If we don't have public IPs, tickles are useless */
2051 if (ctdb->vnn == NULL) {
2055 tcp_sock = (struct ctdb_connection *)indata.dptr;
2057 addr = tcp_sock->src;
2058 ctdb_canonicalize_ip(&addr, &tcp_sock->src);
2059 addr = tcp_sock->dst;
2060 ctdb_canonicalize_ip(&addr, &tcp_sock->dst);
2063 memcpy(&addr, &tcp_sock->dst, sizeof(addr));
2064 vnn = find_public_ip_vnn(ctdb, &addr);
2066 switch (addr.sa.sa_family) {
2068 if (ntohl(addr.ip.sin_addr.s_addr) != INADDR_LOOPBACK) {
2069 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public address.\n",
2070 ctdb_addr_to_str(&addr)));
2074 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public ipv6 address.\n",
2075 ctdb_addr_to_str(&addr)));
2078 DEBUG(DEBUG_ERR,(__location__ " Unknown family type %d\n", addr.sa.sa_family));
2084 if (vnn->pnn != ctdb->pnn) {
2085 DEBUG(DEBUG_ERR,("Attempt to register tcp client for IP %s we don't hold - failing (client_id %u pid %u)\n",
2086 ctdb_addr_to_str(&addr),
2087 client_id, client->pid));
2088 /* failing this call will tell smbd to die */
2092 ip = talloc(client, struct ctdb_client_ip);
2093 CTDB_NO_MEMORY(ctdb, ip);
2097 ip->client_id = client_id;
2098 talloc_set_destructor(ip, ctdb_client_ip_destructor);
2099 DLIST_ADD(ctdb->client_ip_list, ip);
2101 tcp = talloc(client, struct ctdb_tcp_list);
2102 CTDB_NO_MEMORY(ctdb, tcp);
2104 tcp->connection.src = tcp_sock->src;
2105 tcp->connection.dst = tcp_sock->dst;
2107 DLIST_ADD(client->tcp_list, tcp);
2109 t.src = tcp_sock->src;
2110 t.dst = tcp_sock->dst;
2112 data.dptr = (uint8_t *)&t;
2113 data.dsize = sizeof(t);
2115 switch (addr.sa.sa_family) {
2117 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
2118 (unsigned)ntohs(tcp_sock->dst.ip.sin_port),
2119 ctdb_addr_to_str(&tcp_sock->src),
2120 (unsigned)ntohs(tcp_sock->src.ip.sin_port), client_id, client->pid));
2123 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
2124 (unsigned)ntohs(tcp_sock->dst.ip6.sin6_port),
2125 ctdb_addr_to_str(&tcp_sock->src),
2126 (unsigned)ntohs(tcp_sock->src.ip6.sin6_port), client_id, client->pid));
2129 DEBUG(DEBUG_ERR,(__location__ " Unknown family %d\n", addr.sa.sa_family));
2133 /* tell all nodes about this tcp connection */
2134 ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0,
2135 CTDB_CONTROL_TCP_ADD,
2136 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
2138 DEBUG(DEBUG_ERR,(__location__ " Failed to send CTDB_CONTROL_TCP_ADD\n"));
2146 find a tcp address on a list
2148 static struct ctdb_connection *ctdb_tcp_find(struct ctdb_tcp_array *array,
2149 struct ctdb_connection *tcp)
2153 if (array == NULL) {
2157 for (i=0;i<array->num;i++) {
2158 if (ctdb_same_sockaddr(&array->connections[i].src, &tcp->src) &&
2159 ctdb_same_sockaddr(&array->connections[i].dst, &tcp->dst)) {
2160 return &array->connections[i];
2169 called by a daemon to inform us of a TCP connection that one of its
2170 clients managing that should tickled with an ACK when IP takeover is
2173 int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata, bool tcp_update_needed)
2175 struct ctdb_connection *p = (struct ctdb_connection *)indata.dptr;
2176 struct ctdb_tcp_array *tcparray;
2177 struct ctdb_connection tcp;
2178 struct ctdb_vnn *vnn;
2180 /* If we don't have public IPs, tickles are useless */
2181 if (ctdb->vnn == NULL) {
2185 vnn = find_public_ip_vnn(ctdb, &p->dst);
2187 DEBUG(DEBUG_INFO,(__location__ " got TCP_ADD control for an address which is not a public address '%s'\n",
2188 ctdb_addr_to_str(&p->dst)));
2194 tcparray = vnn->tcp_array;
2196 /* If this is the first tickle */
2197 if (tcparray == NULL) {
2198 tcparray = talloc(vnn, struct ctdb_tcp_array);
2199 CTDB_NO_MEMORY(ctdb, tcparray);
2200 vnn->tcp_array = tcparray;
2203 tcparray->connections = talloc_size(tcparray, sizeof(struct ctdb_connection));
2204 CTDB_NO_MEMORY(ctdb, tcparray->connections);
2206 tcparray->connections[tcparray->num].src = p->src;
2207 tcparray->connections[tcparray->num].dst = p->dst;
2210 if (tcp_update_needed) {
2211 vnn->tcp_update_needed = true;
2217 /* Do we already have this tickle ?*/
2220 if (ctdb_tcp_find(tcparray, &tcp) != NULL) {
2221 DEBUG(DEBUG_DEBUG,("Already had tickle info for %s:%u for vnn:%u\n",
2222 ctdb_addr_to_str(&tcp.dst),
2223 ntohs(tcp.dst.ip.sin_port),
2228 /* A new tickle, we must add it to the array */
2229 tcparray->connections = talloc_realloc(tcparray, tcparray->connections,
2230 struct ctdb_connection,
2232 CTDB_NO_MEMORY(ctdb, tcparray->connections);
2234 tcparray->connections[tcparray->num].src = p->src;
2235 tcparray->connections[tcparray->num].dst = p->dst;
2238 DEBUG(DEBUG_INFO,("Added tickle info for %s:%u from vnn %u\n",
2239 ctdb_addr_to_str(&tcp.dst),
2240 ntohs(tcp.dst.ip.sin_port),
2243 if (tcp_update_needed) {
2244 vnn->tcp_update_needed = true;
2251 static void ctdb_remove_connection(struct ctdb_vnn *vnn, struct ctdb_connection *conn)
2253 struct ctdb_connection *tcpp;
2259 /* if the array is empty we cant remove it
2260 and we don't need to do anything
2262 if (vnn->tcp_array == NULL) {
2263 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist (array is empty) %s:%u\n",
2264 ctdb_addr_to_str(&conn->dst),
2265 ntohs(conn->dst.ip.sin_port)));
2270 /* See if we know this connection
2271 if we don't know this connection then we dont need to do anything
2273 tcpp = ctdb_tcp_find(vnn->tcp_array, conn);
2275 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist %s:%u\n",
2276 ctdb_addr_to_str(&conn->dst),
2277 ntohs(conn->dst.ip.sin_port)));
2282 /* We need to remove this entry from the array.
2283 Instead of allocating a new array and copying data to it
2284 we cheat and just copy the last entry in the existing array
2285 to the entry that is to be removed and just shring the
2288 *tcpp = vnn->tcp_array->connections[vnn->tcp_array->num - 1];
2289 vnn->tcp_array->num--;
2291 /* If we deleted the last entry we also need to remove the entire array
2293 if (vnn->tcp_array->num == 0) {
2294 talloc_free(vnn->tcp_array);
2295 vnn->tcp_array = NULL;
2298 vnn->tcp_update_needed = true;
2300 DEBUG(DEBUG_INFO,("Removed tickle info for %s:%u\n",
2301 ctdb_addr_to_str(&conn->src),
2302 ntohs(conn->src.ip.sin_port)));
2307 called by a daemon to inform us of a TCP connection that one of its
2308 clients used are no longer needed in the tickle database
2310 int32_t ctdb_control_tcp_remove(struct ctdb_context *ctdb, TDB_DATA indata)
2312 struct ctdb_vnn *vnn;
2313 struct ctdb_connection *conn = (struct ctdb_connection *)indata.dptr;
2315 /* If we don't have public IPs, tickles are useless */
2316 if (ctdb->vnn == NULL) {
2320 vnn = find_public_ip_vnn(ctdb, &conn->dst);
2323 (__location__ " unable to find public address %s\n",
2324 ctdb_addr_to_str(&conn->dst)));
2328 ctdb_remove_connection(vnn, conn);
2335 Called when another daemon starts - causes all tickles for all
2336 public addresses we are serving to be sent to the new node on the
2337 next check. This actually causes the next scheduled call to
2338 tdb_update_tcp_tickles() to update all nodes. This is simple and
2339 doesn't require careful error handling.
2341 int32_t ctdb_control_startup(struct ctdb_context *ctdb, uint32_t pnn)
2343 struct ctdb_vnn *vnn;
2345 DEBUG(DEBUG_INFO, ("Received startup control from node %lu\n",
2346 (unsigned long) pnn));
2348 for (vnn = ctdb->vnn; vnn != NULL; vnn = vnn->next) {
2349 vnn->tcp_update_needed = true;
2357 called when a client structure goes away - hook to remove
2358 elements from the tcp_list in all daemons
2360 void ctdb_takeover_client_destructor_hook(struct ctdb_client *client)
2362 while (client->tcp_list) {
2363 struct ctdb_vnn *vnn;
2364 struct ctdb_tcp_list *tcp = client->tcp_list;
2365 struct ctdb_connection *conn = &tcp->connection;
2367 DLIST_REMOVE(client->tcp_list, tcp);
2369 vnn = find_public_ip_vnn(client->ctdb,
2373 (__location__ " unable to find public address %s\n",
2374 ctdb_addr_to_str(&conn->dst)));
2378 /* If the IP address is hosted on this node then
2379 * remove the connection. */
2380 if (vnn->pnn == client->ctdb->pnn) {
2381 ctdb_remove_connection(vnn, conn);
2384 /* Otherwise this function has been called because the
2385 * server IP address has been released to another node
2386 * and the client has exited. This means that we
2387 * should not delete the connection information. The
2388 * takeover node processes connections too. */
2393 void ctdb_release_all_ips(struct ctdb_context *ctdb)
2395 struct ctdb_vnn *vnn;
2398 if (ctdb->tunable.disable_ip_failover == 1) {
2402 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2403 if (!ctdb_sys_have_ip(&vnn->public_address)) {
2404 ctdb_vnn_unassign_iface(ctdb, vnn);
2411 /* Don't allow multiple releases at once. Some code,
2412 * particularly ctdb_tickle_sentenced_connections() is
2414 if (vnn->update_in_flight) {
2415 DEBUG(DEBUG_WARNING,
2417 " Not releasing IP %s/%u on interface %s, an update is already in progess\n",
2418 ctdb_addr_to_str(&vnn->public_address),
2419 vnn->public_netmask_bits,
2420 ctdb_vnn_iface_string(vnn)));
2423 vnn->update_in_flight = true;
2425 DEBUG(DEBUG_INFO,("Release of IP %s/%u on interface %s node:-1\n",
2426 ctdb_addr_to_str(&vnn->public_address),
2427 vnn->public_netmask_bits,
2428 ctdb_vnn_iface_string(vnn)));
2430 ctdb_event_script_args(ctdb, CTDB_EVENT_RELEASE_IP, "%s %s %u",
2431 ctdb_vnn_iface_string(vnn),
2432 ctdb_addr_to_str(&vnn->public_address),
2433 vnn->public_netmask_bits);
2434 release_kill_clients(ctdb, &vnn->public_address);
2435 ctdb_vnn_unassign_iface(ctdb, vnn);
2436 vnn->update_in_flight = false;
2440 DEBUG(DEBUG_NOTICE,(__location__ " Released %d public IPs\n", count));
2445 get list of public IPs
2447 int32_t ctdb_control_get_public_ips(struct ctdb_context *ctdb,
2448 struct ctdb_req_control_old *c, TDB_DATA *outdata)
2451 struct ctdb_public_ip_list_old *ips;
2452 struct ctdb_vnn *vnn;
2453 bool only_available = false;
2455 if (c->flags & CTDB_PUBLIC_IP_FLAGS_ONLY_AVAILABLE) {
2456 only_available = true;
2459 /* count how many public ip structures we have */
2461 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2465 len = offsetof(struct ctdb_public_ip_list_old, ips) +
2466 num*sizeof(struct ctdb_public_ip);
2467 ips = talloc_zero_size(outdata, len);
2468 CTDB_NO_MEMORY(ctdb, ips);
2471 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2472 if (only_available && !ctdb_vnn_available(ctdb, vnn)) {
2475 ips->ips[i].pnn = vnn->pnn;
2476 ips->ips[i].addr = vnn->public_address;
2480 len = offsetof(struct ctdb_public_ip_list_old, ips) +
2481 i*sizeof(struct ctdb_public_ip);
2483 outdata->dsize = len;
2484 outdata->dptr = (uint8_t *)ips;
2490 int32_t ctdb_control_get_public_ip_info(struct ctdb_context *ctdb,
2491 struct ctdb_req_control_old *c,
2496 ctdb_sock_addr *addr;
2497 struct ctdb_public_ip_info_old *info;
2498 struct ctdb_vnn *vnn;
2500 addr = (ctdb_sock_addr *)indata.dptr;
2502 vnn = find_public_ip_vnn(ctdb, addr);
2504 /* if it is not a public ip it could be our 'single ip' */
2505 if (ctdb->single_ip_vnn) {
2506 if (ctdb_same_ip(&ctdb->single_ip_vnn->public_address, addr)) {
2507 vnn = ctdb->single_ip_vnn;
2512 DEBUG(DEBUG_ERR,(__location__ " Could not get public ip info, "
2513 "'%s'not a public address\n",
2514 ctdb_addr_to_str(addr)));
2518 /* count how many public ip structures we have */
2520 for (;vnn->ifaces[num];) {
2524 len = offsetof(struct ctdb_public_ip_info_old, ifaces) +
2525 num*sizeof(struct ctdb_iface);
2526 info = talloc_zero_size(outdata, len);
2527 CTDB_NO_MEMORY(ctdb, info);
2529 info->ip.addr = vnn->public_address;
2530 info->ip.pnn = vnn->pnn;
2531 info->active_idx = 0xFFFFFFFF;
2533 for (i=0; vnn->ifaces[i]; i++) {
2534 struct ctdb_interface *cur;
2536 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
2538 DEBUG(DEBUG_CRIT, (__location__ " internal error iface[%s] unknown\n",
2542 if (vnn->iface == cur) {
2543 info->active_idx = i;
2545 strncpy(info->ifaces[i].name, cur->name, sizeof(info->ifaces[i].name)-1);
2546 info->ifaces[i].link_state = cur->link_up;
2547 info->ifaces[i].references = cur->references;
2550 len = offsetof(struct ctdb_public_ip_info_old, ifaces) +
2551 i*sizeof(struct ctdb_iface);
2553 outdata->dsize = len;
2554 outdata->dptr = (uint8_t *)info;
2559 int32_t ctdb_control_get_ifaces(struct ctdb_context *ctdb,
2560 struct ctdb_req_control_old *c,
2564 struct ctdb_iface_list_old *ifaces;
2565 struct ctdb_interface *cur;
2567 /* count how many public ip structures we have */
2569 for (cur=ctdb->ifaces;cur;cur=cur->next) {
2573 len = offsetof(struct ctdb_iface_list_old, ifaces) +
2574 num*sizeof(struct ctdb_iface);
2575 ifaces = talloc_zero_size(outdata, len);
2576 CTDB_NO_MEMORY(ctdb, ifaces);
2579 for (cur=ctdb->ifaces;cur;cur=cur->next) {
2580 strcpy(ifaces->ifaces[i].name, cur->name);
2581 ifaces->ifaces[i].link_state = cur->link_up;
2582 ifaces->ifaces[i].references = cur->references;
2586 len = offsetof(struct ctdb_iface_list_old, ifaces) +
2587 i*sizeof(struct ctdb_iface);
2589 outdata->dsize = len;
2590 outdata->dptr = (uint8_t *)ifaces;
2595 int32_t ctdb_control_set_iface_link(struct ctdb_context *ctdb,
2596 struct ctdb_req_control_old *c,
2599 struct ctdb_iface *info;
2600 struct ctdb_interface *iface;
2601 bool link_up = false;
2603 info = (struct ctdb_iface *)indata.dptr;
2605 if (info->name[CTDB_IFACE_SIZE] != '\0') {
2606 int len = strnlen(info->name, CTDB_IFACE_SIZE);
2607 DEBUG(DEBUG_ERR, (__location__ " name[%*.*s] not terminated\n",
2608 len, len, info->name));
2612 switch (info->link_state) {
2620 DEBUG(DEBUG_ERR, (__location__ " link_state[%u] invalid\n",
2621 (unsigned int)info->link_state));
2625 if (info->references != 0) {
2626 DEBUG(DEBUG_ERR, (__location__ " references[%u] should be 0\n",
2627 (unsigned int)info->references));
2631 iface = ctdb_find_iface(ctdb, info->name);
2632 if (iface == NULL) {
2636 if (link_up == iface->link_up) {
2640 DEBUG(iface->link_up?DEBUG_ERR:DEBUG_NOTICE,
2641 ("iface[%s] has changed it's link status %s => %s\n",
2643 iface->link_up?"up":"down",
2644 link_up?"up":"down"));
2646 iface->link_up = link_up;
2652 structure containing the listening socket and the list of tcp connections
2653 that the ctdb daemon is to kill
2655 struct ctdb_kill_tcp {
2656 struct ctdb_vnn *vnn;
2657 struct ctdb_context *ctdb;
2659 struct tevent_fd *fde;
2660 trbt_tree_t *connections;
2665 a tcp connection that is to be killed
2667 struct ctdb_killtcp_con {
2668 ctdb_sock_addr src_addr;
2669 ctdb_sock_addr dst_addr;
2671 struct ctdb_kill_tcp *killtcp;
2674 /* this function is used to create a key to represent this socketpair
2675 in the killtcp tree.
2676 this key is used to insert and lookup matching socketpairs that are
2677 to be tickled and RST
2679 #define KILLTCP_KEYLEN 10
2680 static uint32_t *killtcp_key(ctdb_sock_addr *src, ctdb_sock_addr *dst)
2682 static uint32_t key[KILLTCP_KEYLEN];
2684 bzero(key, sizeof(key));
2686 if (src->sa.sa_family != dst->sa.sa_family) {
2687 DEBUG(DEBUG_ERR, (__location__ " ERROR, different families passed :%u vs %u\n", src->sa.sa_family, dst->sa.sa_family));
2691 switch (src->sa.sa_family) {
2693 key[0] = dst->ip.sin_addr.s_addr;
2694 key[1] = src->ip.sin_addr.s_addr;
2695 key[2] = dst->ip.sin_port;
2696 key[3] = src->ip.sin_port;
2699 uint32_t *dst6_addr32 =
2700 (uint32_t *)&(dst->ip6.sin6_addr.s6_addr);
2701 uint32_t *src6_addr32 =
2702 (uint32_t *)&(src->ip6.sin6_addr.s6_addr);
2703 key[0] = dst6_addr32[3];
2704 key[1] = src6_addr32[3];
2705 key[2] = dst6_addr32[2];
2706 key[3] = src6_addr32[2];
2707 key[4] = dst6_addr32[1];
2708 key[5] = src6_addr32[1];
2709 key[6] = dst6_addr32[0];
2710 key[7] = src6_addr32[0];
2711 key[8] = dst->ip6.sin6_port;
2712 key[9] = src->ip6.sin6_port;
2716 DEBUG(DEBUG_ERR, (__location__ " ERROR, unknown family passed :%u\n", src->sa.sa_family));
2724 called when we get a read event on the raw socket
2726 static void capture_tcp_handler(struct tevent_context *ev,
2727 struct tevent_fd *fde,
2728 uint16_t flags, void *private_data)
2730 struct ctdb_kill_tcp *killtcp = talloc_get_type(private_data, struct ctdb_kill_tcp);
2731 struct ctdb_killtcp_con *con;
2732 ctdb_sock_addr src, dst;
2733 uint32_t ack_seq, seq;
2735 if (!(flags & TEVENT_FD_READ)) {
2739 if (ctdb_sys_read_tcp_packet(killtcp->capture_fd,
2740 killtcp->private_data,
2742 &ack_seq, &seq) != 0) {
2743 /* probably a non-tcp ACK packet */
2747 /* check if we have this guy in our list of connections
2750 con = trbt_lookuparray32(killtcp->connections,
2751 KILLTCP_KEYLEN, killtcp_key(&src, &dst));
2753 /* no this was some other packet we can just ignore */
2757 /* This one has been tickled !
2758 now reset him and remove him from the list.
2760 DEBUG(DEBUG_INFO, ("sending a tcp reset to kill connection :%d -> %s:%d\n",
2761 ntohs(con->dst_addr.ip.sin_port),
2762 ctdb_addr_to_str(&con->src_addr),
2763 ntohs(con->src_addr.ip.sin_port)));
2765 ctdb_sys_send_tcp(&con->dst_addr, &con->src_addr, ack_seq, seq, 1);
2770 /* when traversing the list of all tcp connections to send tickle acks to
2771 (so that we can capture the ack coming back and kill the connection
2773 this callback is called for each connection we are currently trying to kill
2775 static int tickle_connection_traverse(void *param, void *data)
2777 struct ctdb_killtcp_con *con = talloc_get_type(data, struct ctdb_killtcp_con);
2779 /* have tried too many times, just give up */
2780 if (con->count >= 5) {
2781 /* can't delete in traverse: reparent to delete_cons */
2782 talloc_steal(param, con);
2786 /* othervise, try tickling it again */
2789 (ctdb_sock_addr *)&con->dst_addr,
2790 (ctdb_sock_addr *)&con->src_addr,
2797 called every second until all sentenced connections have been reset
2799 static void ctdb_tickle_sentenced_connections(struct tevent_context *ev,
2800 struct tevent_timer *te,
2801 struct timeval t, void *private_data)
2803 struct ctdb_kill_tcp *killtcp = talloc_get_type(private_data, struct ctdb_kill_tcp);
2804 void *delete_cons = talloc_new(NULL);
2806 /* loop over all connections sending tickle ACKs */
2807 trbt_traversearray32(killtcp->connections, KILLTCP_KEYLEN, tickle_connection_traverse, delete_cons);
2809 /* now we've finished traverse, it's safe to do deletion. */
2810 talloc_free(delete_cons);
2812 /* If there are no more connections to kill we can remove the
2813 entire killtcp structure
2815 if ( (killtcp->connections == NULL) ||
2816 (killtcp->connections->root == NULL) ) {
2817 talloc_free(killtcp);
2821 /* try tickling them again in a seconds time
2823 tevent_add_timer(killtcp->ctdb->ev, killtcp,
2824 timeval_current_ofs(1, 0),
2825 ctdb_tickle_sentenced_connections, killtcp);
2829 destroy the killtcp structure
2831 static int ctdb_killtcp_destructor(struct ctdb_kill_tcp *killtcp)
2833 struct ctdb_vnn *tmpvnn;
2835 /* verify that this vnn is still active */
2836 for (tmpvnn = killtcp->ctdb->vnn; tmpvnn; tmpvnn = tmpvnn->next) {
2837 if (tmpvnn == killtcp->vnn) {
2842 if (tmpvnn == NULL) {
2846 if (killtcp->vnn->killtcp != killtcp) {
2850 killtcp->vnn->killtcp = NULL;
2856 /* nothing fancy here, just unconditionally replace any existing
2857 connection structure with the new one.
2859 don't even free the old one if it did exist, that one is talloc_stolen
2860 by the same node in the tree anyway and will be deleted when the new data
2863 static void *add_killtcp_callback(void *parm, void *data)
2869 add a tcp socket to the list of connections we want to RST
2871 static int ctdb_killtcp_add_connection(struct ctdb_context *ctdb,
2875 ctdb_sock_addr src, dst;
2876 struct ctdb_kill_tcp *killtcp;
2877 struct ctdb_killtcp_con *con;
2878 struct ctdb_vnn *vnn;
2880 ctdb_canonicalize_ip(s, &src);
2881 ctdb_canonicalize_ip(d, &dst);
2883 vnn = find_public_ip_vnn(ctdb, &dst);
2885 vnn = find_public_ip_vnn(ctdb, &src);
2888 /* if it is not a public ip it could be our 'single ip' */
2889 if (ctdb->single_ip_vnn) {
2890 if (ctdb_same_ip(&ctdb->single_ip_vnn->public_address, &dst)) {
2891 vnn = ctdb->single_ip_vnn;
2896 DEBUG(DEBUG_ERR,(__location__ " Could not killtcp, not a public address\n"));
2900 killtcp = vnn->killtcp;
2902 /* If this is the first connection to kill we must allocate
2905 if (killtcp == NULL) {
2906 killtcp = talloc_zero(vnn, struct ctdb_kill_tcp);
2907 CTDB_NO_MEMORY(ctdb, killtcp);
2910 killtcp->ctdb = ctdb;
2911 killtcp->capture_fd = -1;
2912 killtcp->connections = trbt_create(killtcp, 0);
2914 vnn->killtcp = killtcp;
2915 talloc_set_destructor(killtcp, ctdb_killtcp_destructor);
2920 /* create a structure that describes this connection we want to
2921 RST and store it in killtcp->connections
2923 con = talloc(killtcp, struct ctdb_killtcp_con);
2924 CTDB_NO_MEMORY(ctdb, con);
2925 con->src_addr = src;
2926 con->dst_addr = dst;
2928 con->killtcp = killtcp;
2931 trbt_insertarray32_callback(killtcp->connections,
2932 KILLTCP_KEYLEN, killtcp_key(&con->dst_addr, &con->src_addr),
2933 add_killtcp_callback, con);
2936 If we don't have a socket to listen on yet we must create it
2938 if (killtcp->capture_fd == -1) {
2939 const char *iface = ctdb_vnn_iface_string(vnn);
2940 killtcp->capture_fd = ctdb_sys_open_capture_socket(iface, &killtcp->private_data);
2941 if (killtcp->capture_fd == -1) {
2942 DEBUG(DEBUG_CRIT,(__location__ " Failed to open capturing "
2943 "socket on iface '%s' for killtcp (%s)\n",
2944 iface, strerror(errno)));
2950 if (killtcp->fde == NULL) {
2951 killtcp->fde = tevent_add_fd(ctdb->ev, killtcp,
2952 killtcp->capture_fd,
2954 capture_tcp_handler, killtcp);
2955 tevent_fd_set_auto_close(killtcp->fde);
2957 /* We also need to set up some events to tickle all these connections
2958 until they are all reset
2960 tevent_add_timer(ctdb->ev, killtcp, timeval_current_ofs(1, 0),
2961 ctdb_tickle_sentenced_connections, killtcp);
2964 /* tickle him once now */
2973 talloc_free(vnn->killtcp);
2974 vnn->killtcp = NULL;
2979 kill a TCP connection.
2981 int32_t ctdb_control_kill_tcp(struct ctdb_context *ctdb, TDB_DATA indata)
2983 struct ctdb_connection *killtcp = (struct ctdb_connection *)indata.dptr;
2985 return ctdb_killtcp_add_connection(ctdb, &killtcp->src, &killtcp->dst);
2989 called by a daemon to inform us of the entire list of TCP tickles for
2990 a particular public address.
2991 this control should only be sent by the node that is currently serving
2992 that public address.
2994 int32_t ctdb_control_set_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata)
2996 struct ctdb_tickle_list_old *list = (struct ctdb_tickle_list_old *)indata.dptr;
2997 struct ctdb_tcp_array *tcparray;
2998 struct ctdb_vnn *vnn;
3000 /* We must at least have tickles.num or else we cant verify the size
3001 of the received data blob
3003 if (indata.dsize < offsetof(struct ctdb_tickle_list_old, connections)) {
3004 DEBUG(DEBUG_ERR,("Bad indata in ctdb_tickle_list. Not enough data for the tickle.num field\n"));
3008 /* verify that the size of data matches what we expect */
3009 if (indata.dsize < offsetof(struct ctdb_tickle_list_old, connections)
3010 + sizeof(struct ctdb_connection) * list->num) {
3011 DEBUG(DEBUG_ERR,("Bad indata in ctdb_tickle_list\n"));
3015 DEBUG(DEBUG_INFO, ("Received tickle update for public address %s\n",
3016 ctdb_addr_to_str(&list->addr)));
3018 vnn = find_public_ip_vnn(ctdb, &list->addr);
3020 DEBUG(DEBUG_INFO,(__location__ " Could not set tcp tickle list, '%s' is not a public address\n",
3021 ctdb_addr_to_str(&list->addr)));
3026 if (vnn->pnn == ctdb->pnn) {
3028 ("Ignoring redundant set tcp tickle list, this node hosts '%s'\n",
3029 ctdb_addr_to_str(&list->addr)));
3033 /* remove any old ticklelist we might have */
3034 talloc_free(vnn->tcp_array);
3035 vnn->tcp_array = NULL;
3037 tcparray = talloc(vnn, struct ctdb_tcp_array);
3038 CTDB_NO_MEMORY(ctdb, tcparray);
3040 tcparray->num = list->num;
3042 tcparray->connections = talloc_array(tcparray, struct ctdb_connection, tcparray->num);
3043 CTDB_NO_MEMORY(ctdb, tcparray->connections);
3045 memcpy(tcparray->connections, &list->connections[0],
3046 sizeof(struct ctdb_connection)*tcparray->num);
3048 /* We now have a new fresh tickle list array for this vnn */
3049 vnn->tcp_array = tcparray;
3055 called to return the full list of tickles for the puclic address associated
3056 with the provided vnn
3058 int32_t ctdb_control_get_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata)
3060 ctdb_sock_addr *addr = (ctdb_sock_addr *)indata.dptr;
3061 struct ctdb_tickle_list_old *list;
3062 struct ctdb_tcp_array *tcparray;
3064 struct ctdb_vnn *vnn;
3066 vnn = find_public_ip_vnn(ctdb, addr);
3068 DEBUG(DEBUG_ERR,(__location__ " Could not get tcp tickle list, '%s' is not a public address\n",
3069 ctdb_addr_to_str(addr)));
3074 tcparray = vnn->tcp_array;
3076 num = tcparray->num;
3081 outdata->dsize = offsetof(struct ctdb_tickle_list_old, connections)
3082 + sizeof(struct ctdb_connection) * num;
3084 outdata->dptr = talloc_size(outdata, outdata->dsize);
3085 CTDB_NO_MEMORY(ctdb, outdata->dptr);
3086 list = (struct ctdb_tickle_list_old *)outdata->dptr;
3091 memcpy(&list->connections[0], tcparray->connections,
3092 sizeof(struct ctdb_connection) * num);
3100 set the list of all tcp tickles for a public address
3102 static int ctdb_send_set_tcp_tickles_for_ip(struct ctdb_context *ctdb,
3103 ctdb_sock_addr *addr,
3104 struct ctdb_tcp_array *tcparray)
3108 struct ctdb_tickle_list_old *list;
3111 num = tcparray->num;
3116 data.dsize = offsetof(struct ctdb_tickle_list_old, connections) +
3117 sizeof(struct ctdb_connection) * num;
3118 data.dptr = talloc_size(ctdb, data.dsize);
3119 CTDB_NO_MEMORY(ctdb, data.dptr);
3121 list = (struct ctdb_tickle_list_old *)data.dptr;
3125 memcpy(&list->connections[0], tcparray->connections, sizeof(struct ctdb_connection) * num);
3128 ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_ALL, 0,
3129 CTDB_CONTROL_SET_TCP_TICKLE_LIST,
3130 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
3132 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set tcp tickles failed\n"));
3136 talloc_free(data.dptr);
3143 perform tickle updates if required
3145 static void ctdb_update_tcp_tickles(struct tevent_context *ev,
3146 struct tevent_timer *te,
3147 struct timeval t, void *private_data)
3149 struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
3151 struct ctdb_vnn *vnn;
3153 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
3154 /* we only send out updates for public addresses that
3157 if (ctdb->pnn != vnn->pnn) {
3160 /* We only send out the updates if we need to */
3161 if (!vnn->tcp_update_needed) {
3164 ret = ctdb_send_set_tcp_tickles_for_ip(ctdb,
3165 &vnn->public_address,
3168 DEBUG(DEBUG_ERR,("Failed to send the tickle update for public address %s\n",
3169 ctdb_addr_to_str(&vnn->public_address)));
3172 ("Sent tickle update for public address %s\n",
3173 ctdb_addr_to_str(&vnn->public_address)));
3174 vnn->tcp_update_needed = false;
3178 tevent_add_timer(ctdb->ev, ctdb->tickle_update_context,
3179 timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
3180 ctdb_update_tcp_tickles, ctdb);
3184 start periodic update of tcp tickles
3186 void ctdb_start_tcp_tickle_update(struct ctdb_context *ctdb)
3188 ctdb->tickle_update_context = talloc_new(ctdb);
3190 tevent_add_timer(ctdb->ev, ctdb->tickle_update_context,
3191 timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
3192 ctdb_update_tcp_tickles, ctdb);
3198 struct control_gratious_arp {
3199 struct ctdb_context *ctdb;
3200 ctdb_sock_addr addr;
3206 send a control_gratuitous arp
3208 static void send_gratious_arp(struct tevent_context *ev,
3209 struct tevent_timer *te,
3210 struct timeval t, void *private_data)
3213 struct control_gratious_arp *arp = talloc_get_type(private_data,
3214 struct control_gratious_arp);
3216 ret = ctdb_sys_send_arp(&arp->addr, arp->iface);
3218 DEBUG(DEBUG_ERR,(__location__ " sending of gratious arp on iface '%s' failed (%s)\n",
3219 arp->iface, strerror(errno)));
3224 if (arp->count == CTDB_ARP_REPEAT) {
3229 tevent_add_timer(arp->ctdb->ev, arp,
3230 timeval_current_ofs(CTDB_ARP_INTERVAL, 0),
3231 send_gratious_arp, arp);
3238 int32_t ctdb_control_send_gratious_arp(struct ctdb_context *ctdb, TDB_DATA indata)
3240 struct ctdb_addr_info_old *gratious_arp = (struct ctdb_addr_info_old *)indata.dptr;
3241 struct control_gratious_arp *arp;
3243 /* verify the size of indata */
3244 if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
3245 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_gratious_arp structure. Got %u require %u bytes\n",
3246 (unsigned)indata.dsize,
3247 (unsigned)offsetof(struct ctdb_addr_info_old, iface)));
3251 ( offsetof(struct ctdb_addr_info_old, iface)
3252 + gratious_arp->len ) ){
3254 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
3255 "but should be %u bytes\n",
3256 (unsigned)indata.dsize,
3257 (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+gratious_arp->len)));
3262 arp = talloc(ctdb, struct control_gratious_arp);
3263 CTDB_NO_MEMORY(ctdb, arp);
3266 arp->addr = gratious_arp->addr;
3267 arp->iface = talloc_strdup(arp, gratious_arp->iface);
3268 CTDB_NO_MEMORY(ctdb, arp->iface);
3271 tevent_add_timer(arp->ctdb->ev, arp,
3272 timeval_zero(), send_gratious_arp, arp);
3277 int32_t ctdb_control_add_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
3279 struct ctdb_addr_info_old *pub = (struct ctdb_addr_info_old *)indata.dptr;
3282 /* verify the size of indata */
3283 if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
3284 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_addr_info structure\n"));
3288 ( offsetof(struct ctdb_addr_info_old, iface)
3291 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
3292 "but should be %u bytes\n",
3293 (unsigned)indata.dsize,
3294 (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+pub->len)));
3298 DEBUG(DEBUG_NOTICE,("Add IP %s\n", ctdb_addr_to_str(&pub->addr)));
3300 ret = ctdb_add_public_address(ctdb, &pub->addr, pub->mask, &pub->iface[0], true);
3303 DEBUG(DEBUG_ERR,(__location__ " Failed to add public address\n"));
3310 struct delete_ip_callback_state {
3311 struct ctdb_req_control_old *c;
3315 called when releaseip event finishes for del_public_address
3317 static void delete_ip_callback(struct ctdb_context *ctdb,
3318 int32_t status, TDB_DATA data,
3319 const char *errormsg,
3322 struct delete_ip_callback_state *state =
3323 talloc_get_type(private_data, struct delete_ip_callback_state);
3325 /* If release failed then fail. */
3326 ctdb_request_control_reply(ctdb, state->c, NULL, status, errormsg);
3327 talloc_free(private_data);
3330 int32_t ctdb_control_del_public_address(struct ctdb_context *ctdb,
3331 struct ctdb_req_control_old *c,
3332 TDB_DATA indata, bool *async_reply)
3334 struct ctdb_addr_info_old *pub = (struct ctdb_addr_info_old *)indata.dptr;
3335 struct ctdb_vnn *vnn;
3337 /* verify the size of indata */
3338 if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
3339 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_addr_info structure\n"));
3343 ( offsetof(struct ctdb_addr_info_old, iface)
3346 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
3347 "but should be %u bytes\n",
3348 (unsigned)indata.dsize,
3349 (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+pub->len)));
3353 DEBUG(DEBUG_NOTICE,("Delete IP %s\n", ctdb_addr_to_str(&pub->addr)));
3355 /* walk over all public addresses until we find a match */
3356 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
3357 if (ctdb_same_ip(&vnn->public_address, &pub->addr)) {
3358 if (vnn->pnn == ctdb->pnn) {
3359 struct delete_ip_callback_state *state;
3360 struct ctdb_public_ip *ip;
3364 vnn->delete_pending = true;
3366 state = talloc(ctdb,
3367 struct delete_ip_callback_state);
3368 CTDB_NO_MEMORY(ctdb, state);
3371 ip = talloc(state, struct ctdb_public_ip);
3374 (__location__ " Out of memory\n"));
3379 ip->addr = pub->addr;
3381 data.dsize = sizeof(struct ctdb_public_ip);
3382 data.dptr = (unsigned char *)ip;
3384 ret = ctdb_daemon_send_control(ctdb,
3387 CTDB_CONTROL_RELEASE_IP,
3394 (__location__ "Unable to send "
3395 "CTDB_CONTROL_RELEASE_IP\n"));
3400 state->c = talloc_steal(state, c);
3401 *async_reply = true;
3403 /* This IP is not hosted on the
3404 * current node so just delete it
3406 do_delete_ip(ctdb, vnn);
3413 DEBUG(DEBUG_ERR,("Delete IP of unknown public IP address %s\n",
3414 ctdb_addr_to_str(&pub->addr)));
3419 struct ipreallocated_callback_state {
3420 struct ctdb_req_control_old *c;
3423 static void ctdb_ipreallocated_callback(struct ctdb_context *ctdb,
3424 int status, void *p)
3426 struct ipreallocated_callback_state *state =
3427 talloc_get_type(p, struct ipreallocated_callback_state);
3431 (" \"ipreallocated\" event script failed (status %d)\n",
3433 if (status == -ETIME) {
3434 ctdb_ban_self(ctdb);
3438 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
3442 /* A control to run the ipreallocated event */
3443 int32_t ctdb_control_ipreallocated(struct ctdb_context *ctdb,
3444 struct ctdb_req_control_old *c,
3448 struct ipreallocated_callback_state *state;
3450 state = talloc(ctdb, struct ipreallocated_callback_state);
3451 CTDB_NO_MEMORY(ctdb, state);
3453 DEBUG(DEBUG_INFO,(__location__ " Running \"ipreallocated\" event\n"));
3455 ret = ctdb_event_script_callback(ctdb, state,
3456 ctdb_ipreallocated_callback, state,
3457 CTDB_EVENT_IPREALLOCATED,
3461 DEBUG(DEBUG_ERR,("Failed to run \"ipreallocated\" event \n"));
3466 /* tell the control that we will be reply asynchronously */
3467 state->c = talloc_steal(state, c);
3468 *async_reply = true;
3474 /* This function is called from the recovery daemon to verify that a remote
3475 node has the expected ip allocation.
3476 This is verified against ctdb->ip_tree
3478 static int verify_remote_ip_allocation(struct ctdb_context *ctdb,
3479 struct ctdb_public_ip_list *ips,
3482 struct public_ip_list *tmp_ip;
3485 if (ctdb->ip_tree == NULL) {
3486 /* don't know the expected allocation yet, assume remote node
3495 for (i=0; i<ips->num; i++) {
3496 tmp_ip = trbt_lookuparray32(ctdb->ip_tree, IP_KEYLEN, ip_key(&ips->ip[i].addr));
3497 if (tmp_ip == NULL) {
3498 DEBUG(DEBUG_ERR,("Node %u has new or unknown public IP %s\n", pnn, ctdb_addr_to_str(&ips->ip[i].addr)));
3502 if (tmp_ip->pnn == -1 || ips->ip[i].pnn == -1) {
3506 if (tmp_ip->pnn != ips->ip[i].pnn) {
3508 ("Inconsistent IP allocation - node %u thinks %s is held by node %u while it is assigned to node %u\n",
3510 ctdb_addr_to_str(&ips->ip[i].addr),
3511 ips->ip[i].pnn, tmp_ip->pnn));
3519 int update_ip_assignment_tree(struct ctdb_context *ctdb, struct ctdb_public_ip *ip)
3521 struct public_ip_list *tmp_ip;
3523 /* IP tree is never built if DisableIPFailover is set */
3524 if (ctdb->tunable.disable_ip_failover != 0) {
3528 if (ctdb->ip_tree == NULL) {
3529 DEBUG(DEBUG_ERR,("No ctdb->ip_tree yet. Failed to update ip assignment\n"));
3533 tmp_ip = trbt_lookuparray32(ctdb->ip_tree, IP_KEYLEN, ip_key(&ip->addr));
3534 if (tmp_ip == NULL) {
3535 DEBUG(DEBUG_ERR,(__location__ " Could not find record for address %s, update ip\n", ctdb_addr_to_str(&ip->addr)));
3539 DEBUG(DEBUG_NOTICE,("Updated ip assignment tree for ip : %s from node %u to node %u\n", ctdb_addr_to_str(&ip->addr), tmp_ip->pnn, ip->pnn));
3540 tmp_ip->pnn = ip->pnn;
3545 void clear_ip_assignment_tree(struct ctdb_context *ctdb)
3547 TALLOC_FREE(ctdb->ip_tree);
3550 struct ctdb_reloadips_handle {
3551 struct ctdb_context *ctdb;
3552 struct ctdb_req_control_old *c;
3556 struct tevent_fd *fde;
3559 static int ctdb_reloadips_destructor(struct ctdb_reloadips_handle *h)
3561 if (h == h->ctdb->reload_ips) {
3562 h->ctdb->reload_ips = NULL;
3565 ctdb_request_control_reply(h->ctdb, h->c, NULL, h->status, NULL);
3568 ctdb_kill(h->ctdb, h->child, SIGKILL);
3572 static void ctdb_reloadips_timeout_event(struct tevent_context *ev,
3573 struct tevent_timer *te,
3574 struct timeval t, void *private_data)
3576 struct ctdb_reloadips_handle *h = talloc_get_type(private_data, struct ctdb_reloadips_handle);
3581 static void ctdb_reloadips_child_handler(struct tevent_context *ev,
3582 struct tevent_fd *fde,
3583 uint16_t flags, void *private_data)
3585 struct ctdb_reloadips_handle *h = talloc_get_type(private_data, struct ctdb_reloadips_handle);
3590 ret = sys_read(h->fd[0], &res, 1);
3591 if (ret < 1 || res != 0) {
3592 DEBUG(DEBUG_ERR, (__location__ " Reloadips child process returned error\n"));
3600 static int ctdb_reloadips_child(struct ctdb_context *ctdb)
3602 TALLOC_CTX *mem_ctx = talloc_new(NULL);
3603 struct ctdb_public_ip_list_old *ips;
3604 struct ctdb_vnn *vnn;
3605 struct client_async_data *async_data;
3606 struct timeval timeout;
3608 struct ctdb_client_control_state *state;
3612 CTDB_NO_MEMORY(ctdb, mem_ctx);
3614 /* Read IPs from local node */
3615 ret = ctdb_ctrl_get_public_ips(ctdb, TAKEOVER_TIMEOUT(),
3616 CTDB_CURRENT_NODE, mem_ctx, &ips);
3619 ("Unable to fetch public IPs from local node\n"));
3620 talloc_free(mem_ctx);
3624 /* Read IPs file - this is safe since this is a child process */
3626 if (ctdb_set_public_addresses(ctdb, false) != 0) {
3627 DEBUG(DEBUG_ERR,("Failed to re-read public addresses file\n"));
3628 talloc_free(mem_ctx);
3632 async_data = talloc_zero(mem_ctx, struct client_async_data);
3633 CTDB_NO_MEMORY(ctdb, async_data);
3635 /* Compare IPs between node and file for IPs to be deleted */
3636 for (i = 0; i < ips->num; i++) {
3638 for (vnn = ctdb->vnn; vnn; vnn = vnn->next) {
3639 if (ctdb_same_ip(&vnn->public_address,
3640 &ips->ips[i].addr)) {
3641 /* IP is still in file */
3647 /* Delete IP ips->ips[i] */
3648 struct ctdb_addr_info_old *pub;
3651 ("IP %s no longer configured, deleting it\n",
3652 ctdb_addr_to_str(&ips->ips[i].addr)));
3654 pub = talloc_zero(mem_ctx, struct ctdb_addr_info_old);
3655 CTDB_NO_MEMORY(ctdb, pub);
3657 pub->addr = ips->ips[i].addr;
3661 timeout = TAKEOVER_TIMEOUT();
3663 data.dsize = offsetof(struct ctdb_addr_info_old,
3665 data.dptr = (uint8_t *)pub;
3667 state = ctdb_control_send(ctdb, CTDB_CURRENT_NODE, 0,
3668 CTDB_CONTROL_DEL_PUBLIC_IP,
3669 0, data, async_data,
3671 if (state == NULL) {
3674 " failed sending CTDB_CONTROL_DEL_PUBLIC_IP\n"));
3678 ctdb_client_async_add(async_data, state);
3682 /* Compare IPs between node and file for IPs to be added */
3684 for (vnn = ctdb->vnn; vnn; vnn = vnn->next) {
3685 for (i = 0; i < ips->num; i++) {
3686 if (ctdb_same_ip(&vnn->public_address,
3687 &ips->ips[i].addr)) {
3688 /* IP already on node */
3692 if (i == ips->num) {
3693 /* Add IP ips->ips[i] */
3694 struct ctdb_addr_info_old *pub;
3695 const char *ifaces = NULL;
3700 ("New IP %s configured, adding it\n",
3701 ctdb_addr_to_str(&vnn->public_address)));
3703 uint32_t pnn = ctdb_get_pnn(ctdb);
3705 data.dsize = sizeof(pnn);
3706 data.dptr = (uint8_t *)&pnn;
3708 ret = ctdb_client_send_message(
3710 CTDB_BROADCAST_CONNECTED,
3711 CTDB_SRVID_REBALANCE_NODE,
3714 DEBUG(DEBUG_WARNING,
3715 ("Failed to send message to force node reallocation - IPs may be unbalanced\n"));
3721 ifaces = vnn->ifaces[0];
3723 while (vnn->ifaces[iface] != NULL) {
3724 ifaces = talloc_asprintf(vnn, "%s,%s", ifaces,
3725 vnn->ifaces[iface]);
3729 len = strlen(ifaces) + 1;
3730 pub = talloc_zero_size(mem_ctx,
3731 offsetof(struct ctdb_addr_info_old, iface) + len);
3732 CTDB_NO_MEMORY(ctdb, pub);
3734 pub->addr = vnn->public_address;
3735 pub->mask = vnn->public_netmask_bits;
3737 memcpy(&pub->iface[0], ifaces, pub->len);
3739 timeout = TAKEOVER_TIMEOUT();
3741 data.dsize = offsetof(struct ctdb_addr_info_old,
3743 data.dptr = (uint8_t *)pub;
3745 state = ctdb_control_send(ctdb, CTDB_CURRENT_NODE, 0,
3746 CTDB_CONTROL_ADD_PUBLIC_IP,
3747 0, data, async_data,
3749 if (state == NULL) {
3752 " failed sending CTDB_CONTROL_ADD_PUBLIC_IP\n"));
3756 ctdb_client_async_add(async_data, state);
3760 if (ctdb_client_async_wait(ctdb, async_data) != 0) {
3761 DEBUG(DEBUG_ERR,(__location__ " Add/delete IPs failed\n"));
3765 talloc_free(mem_ctx);
3769 talloc_free(mem_ctx);
3773 /* This control is sent to force the node to re-read the public addresses file
3774 and drop any addresses we should nnot longer host, and add new addresses
3775 that we are now able to host
3777 int32_t ctdb_control_reload_public_ips(struct ctdb_context *ctdb, struct ctdb_req_control_old *c, bool *async_reply)
3779 struct ctdb_reloadips_handle *h;
3780 pid_t parent = getpid();
3782 if (ctdb->reload_ips != NULL) {
3783 talloc_free(ctdb->reload_ips);
3784 ctdb->reload_ips = NULL;
3787 h = talloc(ctdb, struct ctdb_reloadips_handle);
3788 CTDB_NO_MEMORY(ctdb, h);
3793 if (pipe(h->fd) == -1) {
3794 DEBUG(DEBUG_ERR,("Failed to create pipe for ctdb_freeze_lock\n"));
3799 h->child = ctdb_fork(ctdb);
3800 if (h->child == (pid_t)-1) {
3801 DEBUG(DEBUG_ERR, ("Failed to fork a child for reloadips\n"));
3809 if (h->child == 0) {
3810 signed char res = 0;
3813 debug_extra = talloc_asprintf(NULL, "reloadips:");
3815 prctl_set_comment("ctdb_reloadips");
3816 if (switch_from_server_to_client(ctdb, "reloadips-child") != 0) {
3817 DEBUG(DEBUG_CRIT,("ERROR: Failed to switch reloadips child into client mode\n"));
3820 res = ctdb_reloadips_child(ctdb);
3822 DEBUG(DEBUG_ERR,("Failed to reload ips on local node\n"));
3826 sys_write(h->fd[1], &res, 1);
3827 ctdb_wait_for_process_to_exit(parent);
3831 h->c = talloc_steal(h, c);
3834 set_close_on_exec(h->fd[0]);
3836 talloc_set_destructor(h, ctdb_reloadips_destructor);
3839 h->fde = tevent_add_fd(ctdb->ev, h, h->fd[0], TEVENT_FD_READ,
3840 ctdb_reloadips_child_handler, (void *)h);
3841 tevent_fd_set_auto_close(h->fde);
3843 tevent_add_timer(ctdb->ev, h, timeval_current_ofs(120, 0),
3844 ctdb_reloadips_timeout_event, h);
3846 /* we reply later */
3847 *async_reply = true;