4 Copyright (C) Ronnie Sahlberg 2007
5 Copyright (C) Andrew Tridgell 2007
6 Copyright (C) Martin Schwenke 2011
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, see <http://www.gnu.org/licenses/>.
22 #include "system/network.h"
23 #include "system/filesys.h"
24 #include "system/time.h"
25 #include "system/wait.h"
30 #include "lib/util/dlinklist.h"
31 #include "lib/util/debug.h"
32 #include "lib/util/samba_util.h"
33 #include "lib/util/util_process.h"
35 #include "ctdb_private.h"
36 #include "ctdb_client.h"
38 #include "common/rb_tree.h"
39 #include "common/reqid.h"
40 #include "common/system.h"
41 #include "common/common.h"
42 #include "common/logging.h"
44 #include "server/ipalloc.h"
46 #define TAKEOVER_TIMEOUT() timeval_current_ofs(ctdb->tunable.takeover_timeout,0)
48 #define CTDB_ARP_INTERVAL 1
49 #define CTDB_ARP_REPEAT 3
51 struct ctdb_interface {
52 struct ctdb_interface *prev, *next;
58 static const char *ctdb_vnn_iface_string(const struct ctdb_vnn *vnn)
61 return vnn->iface->name;
67 static int ctdb_add_local_iface(struct ctdb_context *ctdb, const char *iface)
69 struct ctdb_interface *i;
71 /* Verify that we don't have an entry for this ip yet */
72 for (i=ctdb->ifaces;i;i=i->next) {
73 if (strcmp(i->name, iface) == 0) {
78 /* create a new structure for this interface */
79 i = talloc_zero(ctdb, struct ctdb_interface);
80 CTDB_NO_MEMORY_FATAL(ctdb, i);
81 i->name = talloc_strdup(i, iface);
82 CTDB_NO_MEMORY(ctdb, i->name);
86 DLIST_ADD(ctdb->ifaces, i);
91 static bool vnn_has_interface_with_name(struct ctdb_vnn *vnn,
96 for (n = 0; vnn->ifaces[n] != NULL; n++) {
97 if (strcmp(name, vnn->ifaces[n]) == 0) {
105 /* If any interfaces now have no possible IPs then delete them. This
106 * implementation is naive (i.e. simple) rather than clever
107 * (i.e. complex). Given that this is run on delip and that operation
108 * is rare, this doesn't need to be efficient - it needs to be
109 * foolproof. One alternative is reference counting, where the logic
110 * is distributed and can, therefore, be broken in multiple places.
111 * Another alternative is to build a red-black tree of interfaces that
112 * can have addresses (by walking ctdb->vnn and ctdb->single_ip_vnn
113 * once) and then walking ctdb->ifaces once and deleting those not in
114 * the tree. Let's go to one of those if the naive implementation
115 * causes problems... :-)
117 static void ctdb_remove_orphaned_ifaces(struct ctdb_context *ctdb,
118 struct ctdb_vnn *vnn)
120 struct ctdb_interface *i, *next;
122 /* For each interface, check if there's an IP using it. */
123 for (i = ctdb->ifaces; i != NULL; i = next) {
128 /* Only consider interfaces named in the given VNN. */
129 if (!vnn_has_interface_with_name(vnn, i->name)) {
133 /* Is the "single IP" on this interface? */
134 if ((ctdb->single_ip_vnn != NULL) &&
135 (ctdb->single_ip_vnn->ifaces[0] != NULL) &&
136 (strcmp(i->name, ctdb->single_ip_vnn->ifaces[0]) == 0)) {
137 /* Found, next interface please... */
140 /* Search for a vnn with this interface. */
142 for (tv=ctdb->vnn; tv; tv=tv->next) {
143 if (vnn_has_interface_with_name(tv, i->name)) {
150 /* None of the VNNs are using this interface. */
151 DLIST_REMOVE(ctdb->ifaces, i);
158 static struct ctdb_interface *ctdb_find_iface(struct ctdb_context *ctdb,
161 struct ctdb_interface *i;
163 for (i=ctdb->ifaces;i;i=i->next) {
164 if (strcmp(i->name, iface) == 0) {
172 static struct ctdb_interface *ctdb_vnn_best_iface(struct ctdb_context *ctdb,
173 struct ctdb_vnn *vnn)
176 struct ctdb_interface *cur = NULL;
177 struct ctdb_interface *best = NULL;
179 for (i=0; vnn->ifaces[i]; i++) {
181 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
195 if (cur->references < best->references) {
204 static int32_t ctdb_vnn_assign_iface(struct ctdb_context *ctdb,
205 struct ctdb_vnn *vnn)
207 struct ctdb_interface *best = NULL;
210 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
211 "still assigned to iface '%s'\n",
212 ctdb_addr_to_str(&vnn->public_address),
213 ctdb_vnn_iface_string(vnn)));
217 best = ctdb_vnn_best_iface(ctdb, vnn);
219 DEBUG(DEBUG_ERR, (__location__ " public address '%s' "
220 "cannot assign to iface any iface\n",
221 ctdb_addr_to_str(&vnn->public_address)));
227 vnn->pnn = ctdb->pnn;
229 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
230 "now assigned to iface '%s' refs[%d]\n",
231 ctdb_addr_to_str(&vnn->public_address),
232 ctdb_vnn_iface_string(vnn),
237 static void ctdb_vnn_unassign_iface(struct ctdb_context *ctdb,
238 struct ctdb_vnn *vnn)
240 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
241 "now unassigned (old iface '%s' refs[%d])\n",
242 ctdb_addr_to_str(&vnn->public_address),
243 ctdb_vnn_iface_string(vnn),
244 vnn->iface?vnn->iface->references:0));
246 vnn->iface->references--;
249 if (vnn->pnn == ctdb->pnn) {
254 static bool ctdb_vnn_available(struct ctdb_context *ctdb,
255 struct ctdb_vnn *vnn)
259 /* Nodes that are not RUNNING can not host IPs */
260 if (ctdb->runstate != CTDB_RUNSTATE_RUNNING) {
264 if (vnn->delete_pending) {
268 if (vnn->iface && vnn->iface->link_up) {
272 for (i=0; vnn->ifaces[i]; i++) {
273 struct ctdb_interface *cur;
275 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
288 struct ctdb_takeover_arp {
289 struct ctdb_context *ctdb;
292 struct ctdb_tcp_array *tcparray;
293 struct ctdb_vnn *vnn;
298 lists of tcp endpoints
300 struct ctdb_tcp_list {
301 struct ctdb_tcp_list *prev, *next;
302 struct ctdb_connection connection;
306 list of clients to kill on IP release
308 struct ctdb_client_ip {
309 struct ctdb_client_ip *prev, *next;
310 struct ctdb_context *ctdb;
317 send a gratuitous arp
319 static void ctdb_control_send_arp(struct tevent_context *ev,
320 struct tevent_timer *te,
321 struct timeval t, void *private_data)
323 struct ctdb_takeover_arp *arp = talloc_get_type(private_data,
324 struct ctdb_takeover_arp);
326 struct ctdb_tcp_array *tcparray;
327 const char *iface = ctdb_vnn_iface_string(arp->vnn);
329 ret = ctdb_sys_send_arp(&arp->addr, iface);
331 DEBUG(DEBUG_CRIT,(__location__ " sending of arp failed on iface '%s' (%s)\n",
332 iface, strerror(errno)));
335 tcparray = arp->tcparray;
337 for (i=0;i<tcparray->num;i++) {
338 struct ctdb_connection *tcon;
340 tcon = &tcparray->connections[i];
341 DEBUG(DEBUG_INFO,("sending tcp tickle ack for %u->%s:%u\n",
342 (unsigned)ntohs(tcon->dst.ip.sin_port),
343 ctdb_addr_to_str(&tcon->src),
344 (unsigned)ntohs(tcon->src.ip.sin_port)));
345 ret = ctdb_sys_send_tcp(
350 DEBUG(DEBUG_CRIT,(__location__ " Failed to send tcp tickle ack for %s\n",
351 ctdb_addr_to_str(&tcon->src)));
358 if (arp->count == CTDB_ARP_REPEAT) {
363 tevent_add_timer(arp->ctdb->ev, arp->vnn->takeover_ctx,
364 timeval_current_ofs(CTDB_ARP_INTERVAL, 100000),
365 ctdb_control_send_arp, arp);
368 static int32_t ctdb_announce_vnn_iface(struct ctdb_context *ctdb,
369 struct ctdb_vnn *vnn)
371 struct ctdb_takeover_arp *arp;
372 struct ctdb_tcp_array *tcparray;
374 if (!vnn->takeover_ctx) {
375 vnn->takeover_ctx = talloc_new(vnn);
376 if (!vnn->takeover_ctx) {
381 arp = talloc_zero(vnn->takeover_ctx, struct ctdb_takeover_arp);
387 arp->addr = vnn->public_address;
390 tcparray = vnn->tcp_array;
392 /* add all of the known tcp connections for this IP to the
393 list of tcp connections to send tickle acks for */
394 arp->tcparray = talloc_steal(arp, tcparray);
396 vnn->tcp_array = NULL;
397 vnn->tcp_update_needed = true;
400 tevent_add_timer(arp->ctdb->ev, vnn->takeover_ctx,
401 timeval_zero(), ctdb_control_send_arp, arp);
406 struct takeover_callback_state {
407 struct ctdb_req_control_old *c;
408 ctdb_sock_addr *addr;
409 struct ctdb_vnn *vnn;
412 struct ctdb_do_takeip_state {
413 struct ctdb_req_control_old *c;
414 struct ctdb_vnn *vnn;
418 called when takeip event finishes
420 static void ctdb_do_takeip_callback(struct ctdb_context *ctdb, int status,
423 struct ctdb_do_takeip_state *state =
424 talloc_get_type(private_data, struct ctdb_do_takeip_state);
429 struct ctdb_node *node = ctdb->nodes[ctdb->pnn];
431 if (status == -ETIME) {
434 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
435 ctdb_addr_to_str(&state->vnn->public_address),
436 ctdb_vnn_iface_string(state->vnn)));
437 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
439 node->flags |= NODE_FLAGS_UNHEALTHY;
444 if (ctdb->do_checkpublicip) {
446 ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
448 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
455 data.dptr = (uint8_t *)ctdb_addr_to_str(&state->vnn->public_address);
456 data.dsize = strlen((char *)data.dptr) + 1;
457 DEBUG(DEBUG_INFO,(__location__ " sending TAKE_IP for '%s'\n", data.dptr));
459 ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_TAKE_IP, data);
462 /* the control succeeded */
463 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
468 static int ctdb_takeip_destructor(struct ctdb_do_takeip_state *state)
470 state->vnn->update_in_flight = false;
475 take over an ip address
477 static int32_t ctdb_do_takeip(struct ctdb_context *ctdb,
478 struct ctdb_req_control_old *c,
479 struct ctdb_vnn *vnn)
482 struct ctdb_do_takeip_state *state;
484 if (vnn->update_in_flight) {
485 DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u rejected "
486 "update for this IP already in flight\n",
487 ctdb_addr_to_str(&vnn->public_address),
488 vnn->public_netmask_bits));
492 ret = ctdb_vnn_assign_iface(ctdb, vnn);
494 DEBUG(DEBUG_ERR,("Takeover of IP %s/%u failed to "
495 "assign a usable interface\n",
496 ctdb_addr_to_str(&vnn->public_address),
497 vnn->public_netmask_bits));
501 state = talloc(vnn, struct ctdb_do_takeip_state);
502 CTDB_NO_MEMORY(ctdb, state);
504 state->c = talloc_steal(ctdb, c);
507 vnn->update_in_flight = true;
508 talloc_set_destructor(state, ctdb_takeip_destructor);
510 DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u on interface %s\n",
511 ctdb_addr_to_str(&vnn->public_address),
512 vnn->public_netmask_bits,
513 ctdb_vnn_iface_string(vnn)));
515 ret = ctdb_event_script_callback(ctdb,
517 ctdb_do_takeip_callback,
521 ctdb_vnn_iface_string(vnn),
522 ctdb_addr_to_str(&vnn->public_address),
523 vnn->public_netmask_bits);
526 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
527 ctdb_addr_to_str(&vnn->public_address),
528 ctdb_vnn_iface_string(vnn)));
536 struct ctdb_do_updateip_state {
537 struct ctdb_req_control_old *c;
538 struct ctdb_interface *old;
539 struct ctdb_vnn *vnn;
543 called when updateip event finishes
545 static void ctdb_do_updateip_callback(struct ctdb_context *ctdb, int status,
548 struct ctdb_do_updateip_state *state =
549 talloc_get_type(private_data, struct ctdb_do_updateip_state);
553 if (status == -ETIME) {
556 DEBUG(DEBUG_ERR,(__location__ " Failed to move IP %s from interface %s to %s\n",
557 ctdb_addr_to_str(&state->vnn->public_address),
559 ctdb_vnn_iface_string(state->vnn)));
562 * All we can do is reset the old interface
563 * and let the next run fix it
565 ctdb_vnn_unassign_iface(ctdb, state->vnn);
566 state->vnn->iface = state->old;
567 state->vnn->iface->references++;
569 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
574 if (ctdb->do_checkpublicip) {
576 ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
578 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
585 /* the control succeeded */
586 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
591 static int ctdb_updateip_destructor(struct ctdb_do_updateip_state *state)
593 state->vnn->update_in_flight = false;
598 update (move) an ip address
600 static int32_t ctdb_do_updateip(struct ctdb_context *ctdb,
601 struct ctdb_req_control_old *c,
602 struct ctdb_vnn *vnn)
605 struct ctdb_do_updateip_state *state;
606 struct ctdb_interface *old = vnn->iface;
607 const char *new_name;
609 if (vnn->update_in_flight) {
610 DEBUG(DEBUG_NOTICE,("Update of IP %s/%u rejected "
611 "update for this IP already in flight\n",
612 ctdb_addr_to_str(&vnn->public_address),
613 vnn->public_netmask_bits));
617 ctdb_vnn_unassign_iface(ctdb, vnn);
618 ret = ctdb_vnn_assign_iface(ctdb, vnn);
620 DEBUG(DEBUG_ERR,("update of IP %s/%u failed to "
621 "assin a usable interface (old iface '%s')\n",
622 ctdb_addr_to_str(&vnn->public_address),
623 vnn->public_netmask_bits,
628 new_name = ctdb_vnn_iface_string(vnn);
629 if (old->name != NULL && new_name != NULL && !strcmp(old->name, new_name)) {
630 /* A benign update from one interface onto itself.
631 * no need to run the eventscripts in this case, just return
634 ctdb_request_control_reply(ctdb, c, NULL, 0, NULL);
638 state = talloc(vnn, struct ctdb_do_updateip_state);
639 CTDB_NO_MEMORY(ctdb, state);
641 state->c = talloc_steal(ctdb, c);
645 vnn->update_in_flight = true;
646 talloc_set_destructor(state, ctdb_updateip_destructor);
648 DEBUG(DEBUG_NOTICE,("Update of IP %s/%u from "
649 "interface %s to %s\n",
650 ctdb_addr_to_str(&vnn->public_address),
651 vnn->public_netmask_bits,
655 ret = ctdb_event_script_callback(ctdb,
657 ctdb_do_updateip_callback,
659 CTDB_EVENT_UPDATE_IP,
663 ctdb_addr_to_str(&vnn->public_address),
664 vnn->public_netmask_bits);
666 DEBUG(DEBUG_ERR,(__location__ " Failed update IP %s from interface %s to %s\n",
667 ctdb_addr_to_str(&vnn->public_address),
668 old->name, new_name));
677 Find the vnn of the node that has a public ip address
678 returns -1 if the address is not known as a public address
680 static struct ctdb_vnn *find_public_ip_vnn(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
682 struct ctdb_vnn *vnn;
684 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
685 if (ctdb_same_ip(&vnn->public_address, addr)) {
694 take over an ip address
696 int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb,
697 struct ctdb_req_control_old *c,
702 struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
703 struct ctdb_vnn *vnn;
704 bool have_ip = false;
705 bool do_updateip = false;
706 bool do_takeip = false;
707 struct ctdb_interface *best_iface = NULL;
709 if (pip->pnn != ctdb->pnn) {
710 DEBUG(DEBUG_ERR,(__location__" takeoverip called for an ip '%s' "
711 "with pnn %d, but we're node %d\n",
712 ctdb_addr_to_str(&pip->addr),
713 pip->pnn, ctdb->pnn));
717 /* update out vnn list */
718 vnn = find_public_ip_vnn(ctdb, &pip->addr);
720 DEBUG(DEBUG_INFO,("takeoverip called for an ip '%s' that is not a public address\n",
721 ctdb_addr_to_str(&pip->addr)));
725 if (ctdb->tunable.disable_ip_failover == 0 && ctdb->do_checkpublicip) {
726 have_ip = ctdb_sys_have_ip(&pip->addr);
728 best_iface = ctdb_vnn_best_iface(ctdb, vnn);
729 if (best_iface == NULL) {
730 DEBUG(DEBUG_ERR,("takeoverip of IP %s/%u failed to find"
731 "a usable interface (old %s, have_ip %d)\n",
732 ctdb_addr_to_str(&vnn->public_address),
733 vnn->public_netmask_bits,
734 ctdb_vnn_iface_string(vnn),
739 if (vnn->iface == NULL && vnn->pnn == -1 && have_ip && best_iface != NULL) {
740 DEBUG(DEBUG_ERR,("Taking over newly created ip\n"));
745 if (vnn->iface == NULL && have_ip) {
746 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
747 "but we have no interface assigned, has someone manually configured it? Ignore for now.\n",
748 ctdb_addr_to_str(&vnn->public_address)));
752 if (vnn->pnn != ctdb->pnn && have_ip && vnn->pnn != -1) {
753 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
754 "and we have it on iface[%s], but it was assigned to node %d"
755 "and we are node %d, banning ourself\n",
756 ctdb_addr_to_str(&vnn->public_address),
757 ctdb_vnn_iface_string(vnn), vnn->pnn, ctdb->pnn));
762 if (vnn->pnn == -1 && have_ip) {
763 vnn->pnn = ctdb->pnn;
764 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
765 "and we already have it on iface[%s], update local daemon\n",
766 ctdb_addr_to_str(&vnn->public_address),
767 ctdb_vnn_iface_string(vnn)));
772 if (vnn->iface != best_iface) {
773 if (!vnn->iface->link_up) {
775 } else if (vnn->iface->references > (best_iface->references + 1)) {
776 /* only move when the rebalance gains something */
784 ctdb_vnn_unassign_iface(ctdb, vnn);
791 ret = ctdb_do_takeip(ctdb, c, vnn);
795 } else if (do_updateip) {
796 ret = ctdb_do_updateip(ctdb, c, vnn);
802 * The interface is up and the kernel known the ip
805 DEBUG(DEBUG_INFO,("Redundant takeover of IP %s/%u on interface %s (ip already held)\n",
806 ctdb_addr_to_str(&pip->addr),
807 vnn->public_netmask_bits,
808 ctdb_vnn_iface_string(vnn)));
812 /* tell ctdb_control.c that we will be replying asynchronously */
819 kill any clients that are registered with a IP that is being released
821 static void release_kill_clients(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
823 struct ctdb_client_ip *ip;
825 DEBUG(DEBUG_INFO,("release_kill_clients for ip %s\n",
826 ctdb_addr_to_str(addr)));
828 for (ip=ctdb->client_ip_list; ip; ip=ip->next) {
829 ctdb_sock_addr tmp_addr;
832 DEBUG(DEBUG_INFO,("checking for client %u with IP %s\n",
834 ctdb_addr_to_str(&ip->addr)));
836 if (ctdb_same_ip(&tmp_addr, addr)) {
837 struct ctdb_client *client = reqid_find(ctdb->idr,
840 DEBUG(DEBUG_INFO,("matched client %u with IP %s and pid %u\n",
842 ctdb_addr_to_str(&ip->addr),
845 if (client->pid != 0) {
846 DEBUG(DEBUG_INFO,(__location__ " Killing client pid %u for IP %s on client_id %u\n",
847 (unsigned)client->pid,
848 ctdb_addr_to_str(addr),
850 kill(client->pid, SIGKILL);
856 static void do_delete_ip(struct ctdb_context *ctdb, struct ctdb_vnn *vnn)
858 DLIST_REMOVE(ctdb->vnn, vnn);
859 ctdb_vnn_unassign_iface(ctdb, vnn);
860 ctdb_remove_orphaned_ifaces(ctdb, vnn);
865 called when releaseip event finishes
867 static void release_ip_callback(struct ctdb_context *ctdb, int status,
870 struct takeover_callback_state *state =
871 talloc_get_type(private_data, struct takeover_callback_state);
874 if (status == -ETIME) {
878 if (ctdb->tunable.disable_ip_failover == 0 && ctdb->do_checkpublicip) {
879 if (ctdb_sys_have_ip(state->addr)) {
881 ("IP %s still hosted during release IP callback, failing\n",
882 ctdb_addr_to_str(state->addr)));
883 ctdb_request_control_reply(ctdb, state->c,
890 /* send a message to all clients of this node telling them
891 that the cluster has been reconfigured and they should
892 release any sockets on this IP */
893 data.dptr = (uint8_t *)talloc_strdup(state, ctdb_addr_to_str(state->addr));
894 CTDB_NO_MEMORY_VOID(ctdb, data.dptr);
895 data.dsize = strlen((char *)data.dptr)+1;
897 DEBUG(DEBUG_INFO,(__location__ " sending RELEASE_IP for '%s'\n", data.dptr));
899 ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_RELEASE_IP, data);
901 /* kill clients that have registered with this IP */
902 release_kill_clients(ctdb, state->addr);
904 ctdb_vnn_unassign_iface(ctdb, state->vnn);
906 /* Process the IP if it has been marked for deletion */
907 if (state->vnn->delete_pending) {
908 do_delete_ip(ctdb, state->vnn);
912 /* the control succeeded */
913 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
917 static int ctdb_releaseip_destructor(struct takeover_callback_state *state)
919 if (state->vnn != NULL) {
920 state->vnn->update_in_flight = false;
926 release an ip address
928 int32_t ctdb_control_release_ip(struct ctdb_context *ctdb,
929 struct ctdb_req_control_old *c,
934 struct takeover_callback_state *state;
935 struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
936 struct ctdb_vnn *vnn;
939 /* update our vnn list */
940 vnn = find_public_ip_vnn(ctdb, &pip->addr);
942 DEBUG(DEBUG_INFO,("releaseip called for an ip '%s' that is not a public address\n",
943 ctdb_addr_to_str(&pip->addr)));
948 /* stop any previous arps */
949 talloc_free(vnn->takeover_ctx);
950 vnn->takeover_ctx = NULL;
952 /* Some ctdb tool commands (e.g. moveip, rebalanceip) send
953 * lazy multicast to drop an IP from any node that isn't the
954 * intended new node. The following causes makes ctdbd ignore
955 * a release for any address it doesn't host.
957 if (ctdb->tunable.disable_ip_failover == 0 && ctdb->do_checkpublicip) {
958 if (!ctdb_sys_have_ip(&pip->addr)) {
959 DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u on interface %s (ip not held)\n",
960 ctdb_addr_to_str(&pip->addr),
961 vnn->public_netmask_bits,
962 ctdb_vnn_iface_string(vnn)));
963 ctdb_vnn_unassign_iface(ctdb, vnn);
967 if (vnn->iface == NULL) {
968 DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u (ip not held)\n",
969 ctdb_addr_to_str(&pip->addr),
970 vnn->public_netmask_bits));
975 /* There is a potential race between take_ip and us because we
976 * update the VNN via a callback that run when the
977 * eventscripts have been run. Avoid the race by allowing one
978 * update to be in flight at a time.
980 if (vnn->update_in_flight) {
981 DEBUG(DEBUG_NOTICE,("Release of IP %s/%u rejected "
982 "update for this IP already in flight\n",
983 ctdb_addr_to_str(&vnn->public_address),
984 vnn->public_netmask_bits));
988 iface = strdup(ctdb_vnn_iface_string(vnn));
990 DEBUG(DEBUG_NOTICE,("Release of IP %s/%u on interface %s node:%d\n",
991 ctdb_addr_to_str(&pip->addr),
992 vnn->public_netmask_bits,
996 state = talloc(ctdb, struct takeover_callback_state);
998 ctdb_set_error(ctdb, "Out of memory at %s:%d",
1004 state->c = talloc_steal(state, c);
1005 state->addr = talloc(state, ctdb_sock_addr);
1006 if (state->addr == NULL) {
1007 ctdb_set_error(ctdb, "Out of memory at %s:%d",
1008 __FILE__, __LINE__);
1013 *state->addr = pip->addr;
1016 vnn->update_in_flight = true;
1017 talloc_set_destructor(state, ctdb_releaseip_destructor);
1019 ret = ctdb_event_script_callback(ctdb,
1020 state, release_ip_callback, state,
1021 CTDB_EVENT_RELEASE_IP,
1024 ctdb_addr_to_str(&pip->addr),
1025 vnn->public_netmask_bits);
1028 DEBUG(DEBUG_ERR,(__location__ " Failed to release IP %s on interface %s\n",
1029 ctdb_addr_to_str(&pip->addr),
1030 ctdb_vnn_iface_string(vnn)));
1035 /* tell the control that we will be reply asynchronously */
1036 *async_reply = true;
1040 static int ctdb_add_public_address(struct ctdb_context *ctdb,
1041 ctdb_sock_addr *addr,
1042 unsigned mask, const char *ifaces,
1045 struct ctdb_vnn *vnn;
1052 tmp = strdup(ifaces);
1053 for (iface = strtok(tmp, ","); iface; iface = strtok(NULL, ",")) {
1054 if (!ctdb_sys_check_iface_exists(iface)) {
1055 DEBUG(DEBUG_CRIT,("Interface %s does not exist. Can not add public-address : %s\n", iface, ctdb_addr_to_str(addr)));
1062 /* Verify that we don't have an entry for this ip yet */
1063 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1064 if (ctdb_same_sockaddr(addr, &vnn->public_address)) {
1065 DEBUG(DEBUG_CRIT,("Same ip '%s' specified multiple times in the public address list \n",
1066 ctdb_addr_to_str(addr)));
1071 /* create a new vnn structure for this ip address */
1072 vnn = talloc_zero(ctdb, struct ctdb_vnn);
1073 CTDB_NO_MEMORY_FATAL(ctdb, vnn);
1074 vnn->ifaces = talloc_array(vnn, const char *, num + 2);
1075 tmp = talloc_strdup(vnn, ifaces);
1076 CTDB_NO_MEMORY_FATAL(ctdb, tmp);
1077 for (iface = strtok(tmp, ","); iface; iface = strtok(NULL, ",")) {
1078 vnn->ifaces = talloc_realloc(vnn, vnn->ifaces, const char *, num + 2);
1079 CTDB_NO_MEMORY_FATAL(ctdb, vnn->ifaces);
1080 vnn->ifaces[num] = talloc_strdup(vnn, iface);
1081 CTDB_NO_MEMORY_FATAL(ctdb, vnn->ifaces[num]);
1085 vnn->ifaces[num] = NULL;
1086 vnn->public_address = *addr;
1087 vnn->public_netmask_bits = mask;
1089 if (check_address) {
1090 if (ctdb_sys_have_ip(addr)) {
1091 DEBUG(DEBUG_ERR,("We are already hosting public address '%s'. setting PNN to ourself:%d\n", ctdb_addr_to_str(addr), ctdb->pnn));
1092 vnn->pnn = ctdb->pnn;
1096 for (i=0; vnn->ifaces[i]; i++) {
1097 ret = ctdb_add_local_iface(ctdb, vnn->ifaces[i]);
1099 DEBUG(DEBUG_CRIT, (__location__ " failed to add iface[%s] "
1100 "for public_address[%s]\n",
1101 vnn->ifaces[i], ctdb_addr_to_str(addr)));
1107 DLIST_ADD(ctdb->vnn, vnn);
1113 setup the public address lists from a file
1115 int ctdb_set_public_addresses(struct ctdb_context *ctdb, bool check_addresses)
1121 lines = file_lines_load(ctdb->public_addresses_file, &nlines, 0, ctdb);
1122 if (lines == NULL) {
1123 ctdb_set_error(ctdb, "Failed to load public address list '%s'\n", ctdb->public_addresses_file);
1126 while (nlines > 0 && strcmp(lines[nlines-1], "") == 0) {
1130 for (i=0;i<nlines;i++) {
1132 ctdb_sock_addr addr;
1133 const char *addrstr;
1138 while ((*line == ' ') || (*line == '\t')) {
1144 if (strcmp(line, "") == 0) {
1147 tok = strtok(line, " \t");
1149 tok = strtok(NULL, " \t");
1151 if (NULL == ctdb->default_public_interface) {
1152 DEBUG(DEBUG_CRIT,("No default public interface and no interface specified at line %u of public address list\n",
1157 ifaces = ctdb->default_public_interface;
1162 if (!addrstr || !parse_ip_mask(addrstr, ifaces, &addr, &mask)) {
1163 DEBUG(DEBUG_CRIT,("Badly formed line %u in public address list\n", i+1));
1167 if (ctdb_add_public_address(ctdb, &addr, mask, ifaces, check_addresses)) {
1168 DEBUG(DEBUG_CRIT,("Failed to add line %u to the public address list\n", i+1));
1179 int ctdb_set_single_public_ip(struct ctdb_context *ctdb,
1183 struct ctdb_vnn *svnn;
1184 struct ctdb_interface *cur = NULL;
1188 svnn = talloc_zero(ctdb, struct ctdb_vnn);
1189 CTDB_NO_MEMORY(ctdb, svnn);
1191 svnn->ifaces = talloc_array(svnn, const char *, 2);
1192 CTDB_NO_MEMORY(ctdb, svnn->ifaces);
1193 svnn->ifaces[0] = talloc_strdup(svnn->ifaces, iface);
1194 CTDB_NO_MEMORY(ctdb, svnn->ifaces[0]);
1195 svnn->ifaces[1] = NULL;
1197 ok = parse_ip(ip, iface, 0, &svnn->public_address);
1203 ret = ctdb_add_local_iface(ctdb, svnn->ifaces[0]);
1205 DEBUG(DEBUG_CRIT, (__location__ " failed to add iface[%s] "
1206 "for single_ip[%s]\n",
1208 ctdb_addr_to_str(&svnn->public_address)));
1213 /* assume the single public ip interface is initially "good" */
1214 cur = ctdb_find_iface(ctdb, iface);
1216 DEBUG(DEBUG_CRIT,("Can not find public interface %s used by --single-public-ip", iface));
1219 cur->link_up = true;
1221 ret = ctdb_vnn_assign_iface(ctdb, svnn);
1227 ctdb->single_ip_vnn = svnn;
1231 static void *add_ip_callback(void *parm, void *data)
1233 struct public_ip_list *this_ip = parm;
1234 struct public_ip_list *prev_ip = data;
1236 if (prev_ip == NULL) {
1239 if (this_ip->pnn == -1) {
1240 this_ip->pnn = prev_ip->pnn;
1246 static int getips_count_callback(void *param, void *data)
1248 struct public_ip_list **ip_list = (struct public_ip_list **)param;
1249 struct public_ip_list *new_ip = (struct public_ip_list *)data;
1251 new_ip->next = *ip_list;
1256 static int verify_remote_ip_allocation(struct ctdb_context *ctdb,
1257 struct ctdb_public_ip_list_old *ips,
1260 static int ctdb_reload_remote_public_ips(struct ctdb_context *ctdb,
1261 struct ipalloc_state *ipalloc_state,
1262 struct ctdb_node_map_old *nodemap)
1267 if (ipalloc_state->num != nodemap->num) {
1270 " ipalloc_state->num (%d) != nodemap->num (%d) invalid param\n",
1271 ipalloc_state->num, nodemap->num));
1275 for (j=0; j<nodemap->num; j++) {
1276 if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) {
1280 /* Retrieve the list of known public IPs from the node */
1281 ret = ctdb_ctrl_get_public_ips_flags(ctdb,
1284 ipalloc_state->known_public_ips,
1286 &ipalloc_state->known_public_ips[j]);
1289 ("Failed to read known public IPs from node: %u\n",
1294 if (ctdb->do_checkpublicip) {
1295 verify_remote_ip_allocation(ctdb,
1296 ipalloc_state->known_public_ips[j],
1300 /* Retrieve the list of available public IPs from the node */
1301 ret = ctdb_ctrl_get_public_ips_flags(ctdb,
1304 ipalloc_state->available_public_ips,
1305 CTDB_PUBLIC_IP_FLAGS_ONLY_AVAILABLE,
1306 &ipalloc_state->available_public_ips[j]);
1309 ("Failed to read available public IPs from node: %u\n",
1318 static struct public_ip_list *
1319 create_merged_ip_list(struct ctdb_context *ctdb, struct ipalloc_state *ipalloc_state)
1322 struct public_ip_list *ip_list;
1323 struct ctdb_public_ip_list_old *public_ips;
1325 TALLOC_FREE(ctdb->ip_tree);
1326 ctdb->ip_tree = trbt_create(ctdb, 0);
1328 for (i=0; i < ctdb->num_nodes; i++) {
1329 public_ips = ipalloc_state->known_public_ips[i];
1331 if (ctdb->nodes[i]->flags & NODE_FLAGS_DELETED) {
1335 /* there were no public ips for this node */
1336 if (public_ips == NULL) {
1340 for (j=0; j < public_ips->num; j++) {
1341 struct public_ip_list *tmp_ip;
1343 tmp_ip = talloc_zero(ctdb->ip_tree, struct public_ip_list);
1344 CTDB_NO_MEMORY_NULL(ctdb, tmp_ip);
1345 /* Do not use information about IP addresses hosted
1346 * on other nodes, it may not be accurate */
1347 if (public_ips->ips[j].pnn == ctdb->nodes[i]->pnn) {
1348 tmp_ip->pnn = public_ips->ips[j].pnn;
1352 tmp_ip->addr = public_ips->ips[j].addr;
1353 tmp_ip->next = NULL;
1355 trbt_insertarray32_callback(ctdb->ip_tree,
1356 IP_KEYLEN, ip_key(&public_ips->ips[j].addr),
1363 trbt_traversearray32(ctdb->ip_tree, IP_KEYLEN, getips_count_callback, &ip_list);
1368 static bool all_nodes_are_disabled(struct ctdb_node_map_old *nodemap)
1372 for (i=0;i<nodemap->num;i++) {
1373 if (!(nodemap->nodes[i].flags & (NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED))) {
1374 /* Found one completely healthy node */
1382 struct get_tunable_callback_data {
1383 const char *tunable;
1388 static void get_tunable_callback(struct ctdb_context *ctdb, uint32_t pnn,
1389 int32_t res, TDB_DATA outdata,
1392 struct get_tunable_callback_data *cd =
1393 (struct get_tunable_callback_data *)callback;
1397 /* Already handled in fail callback */
1401 if (outdata.dsize != sizeof(uint32_t)) {
1402 DEBUG(DEBUG_ERR,("Wrong size of returned data when reading \"%s\" tunable from node %d. Expected %d bytes but received %d bytes\n",
1403 cd->tunable, pnn, (int)sizeof(uint32_t),
1404 (int)outdata.dsize));
1409 size = talloc_array_length(cd->out);
1411 DEBUG(DEBUG_ERR,("Got %s reply from node %d but nodemap only has %d entries\n",
1412 cd->tunable, pnn, size));
1417 cd->out[pnn] = *(uint32_t *)outdata.dptr;
1420 static void get_tunable_fail_callback(struct ctdb_context *ctdb, uint32_t pnn,
1421 int32_t res, TDB_DATA outdata,
1424 struct get_tunable_callback_data *cd =
1425 (struct get_tunable_callback_data *)callback;
1430 ("Timed out getting tunable \"%s\" from node %d\n",
1436 DEBUG(DEBUG_WARNING,
1437 ("Tunable \"%s\" not implemented on node %d\n",
1442 ("Unexpected error getting tunable \"%s\" from node %d\n",
1448 static uint32_t *get_tunable_from_nodes(struct ctdb_context *ctdb,
1449 TALLOC_CTX *tmp_ctx,
1450 struct ctdb_node_map_old *nodemap,
1451 const char *tunable,
1452 uint32_t default_value)
1455 struct ctdb_control_get_tunable *t;
1458 struct get_tunable_callback_data callback_data;
1461 tvals = talloc_array(tmp_ctx, uint32_t, nodemap->num);
1462 CTDB_NO_MEMORY_NULL(ctdb, tvals);
1463 for (i=0; i<nodemap->num; i++) {
1464 tvals[i] = default_value;
1467 callback_data.out = tvals;
1468 callback_data.tunable = tunable;
1469 callback_data.fatal = false;
1471 data.dsize = offsetof(struct ctdb_control_get_tunable, name) + strlen(tunable) + 1;
1472 data.dptr = talloc_size(tmp_ctx, data.dsize);
1473 t = (struct ctdb_control_get_tunable *)data.dptr;
1474 t->length = strlen(tunable)+1;
1475 memcpy(t->name, tunable, t->length);
1476 nodes = list_of_connected_nodes(ctdb, nodemap, tmp_ctx, true);
1477 if (ctdb_client_async_control(ctdb, CTDB_CONTROL_GET_TUNABLE,
1478 nodes, 0, TAKEOVER_TIMEOUT(),
1480 get_tunable_callback,
1481 get_tunable_fail_callback,
1482 &callback_data) != 0) {
1483 if (callback_data.fatal) {
1489 talloc_free(data.dptr);
1494 /* Set internal flags for IP allocation:
1496 * Set NOIPTAKOVER ip flags from per-node NoIPTakeover tunable
1497 * Set NOIPHOST ip flag for each INACTIVE node
1498 * if all nodes are disabled:
1499 * Set NOIPHOST ip flags from per-node NoIPHostOnAllDisabled tunable
1501 * Set NOIPHOST ip flags for disabled nodes
1503 static void set_ipflags_internal(struct ipalloc_state *ipalloc_state,
1504 struct ctdb_node_map_old *nodemap,
1505 uint32_t *tval_noiptakeover,
1506 uint32_t *tval_noiphostonalldisabled)
1510 for (i=0;i<nodemap->num;i++) {
1511 /* Can not take IPs on node with NoIPTakeover set */
1512 if (tval_noiptakeover[i] != 0) {
1513 ipalloc_state->noiptakeover[i] = true;
1516 /* Can not host IPs on INACTIVE node */
1517 if (nodemap->nodes[i].flags & NODE_FLAGS_INACTIVE) {
1518 ipalloc_state->noiphost[i] = true;
1522 if (all_nodes_are_disabled(nodemap)) {
1523 /* If all nodes are disabled, can not host IPs on node
1524 * with NoIPHostOnAllDisabled set
1526 for (i=0;i<nodemap->num;i++) {
1527 if (tval_noiphostonalldisabled[i] != 0) {
1528 ipalloc_state->noiphost[i] = true;
1532 /* If some nodes are not disabled, then can not host
1533 * IPs on DISABLED node
1535 for (i=0;i<nodemap->num;i++) {
1536 if (nodemap->nodes[i].flags & NODE_FLAGS_DISABLED) {
1537 ipalloc_state->noiphost[i] = true;
1543 static bool set_ipflags(struct ctdb_context *ctdb,
1544 struct ipalloc_state *ipalloc_state,
1545 struct ctdb_node_map_old *nodemap)
1547 uint32_t *tval_noiptakeover;
1548 uint32_t *tval_noiphostonalldisabled;
1550 tval_noiptakeover = get_tunable_from_nodes(ctdb, ipalloc_state, nodemap,
1552 if (tval_noiptakeover == NULL) {
1556 tval_noiphostonalldisabled =
1557 get_tunable_from_nodes(ctdb, ipalloc_state, nodemap,
1558 "NoIPHostOnAllDisabled", 0);
1559 if (tval_noiphostonalldisabled == NULL) {
1560 /* Caller frees tmp_ctx */
1564 set_ipflags_internal(ipalloc_state, nodemap,
1566 tval_noiphostonalldisabled);
1568 talloc_free(tval_noiptakeover);
1569 talloc_free(tval_noiphostonalldisabled);
1574 static struct ipalloc_state * ipalloc_state_init(struct ctdb_context *ctdb,
1575 TALLOC_CTX *mem_ctx)
1577 struct ipalloc_state *ipalloc_state =
1578 talloc_zero(mem_ctx, struct ipalloc_state);
1579 if (ipalloc_state == NULL) {
1580 DEBUG(DEBUG_ERR, (__location__ " Out of memory\n"));
1584 ipalloc_state->num = ctdb->num_nodes;
1585 ipalloc_state->known_public_ips =
1586 talloc_zero_array(ipalloc_state,
1587 struct ctdb_public_ip_list_old *,
1588 ipalloc_state->num);
1589 if (ipalloc_state->known_public_ips == NULL) {
1590 DEBUG(DEBUG_ERR, (__location__ " Out of memory\n"));
1591 talloc_free(ipalloc_state);
1594 ipalloc_state->available_public_ips =
1595 talloc_zero_array(ipalloc_state,
1596 struct ctdb_public_ip_list_old *,
1597 ipalloc_state->num);
1598 if (ipalloc_state->available_public_ips == NULL) {
1599 DEBUG(DEBUG_ERR, (__location__ " Out of memory\n"));
1600 talloc_free(ipalloc_state);
1603 ipalloc_state->noiptakeover =
1604 talloc_zero_array(ipalloc_state,
1606 ipalloc_state->num);
1607 if (ipalloc_state->noiptakeover == NULL) {
1608 DEBUG(DEBUG_ERR, (__location__ " Out of memory\n"));
1609 talloc_free(ipalloc_state);
1612 ipalloc_state->noiphost =
1613 talloc_zero_array(ipalloc_state,
1615 ipalloc_state->num);
1616 if (ipalloc_state->noiphost == NULL) {
1617 DEBUG(DEBUG_ERR, (__location__ " Out of memory\n"));
1618 talloc_free(ipalloc_state);
1622 if (1 == ctdb->tunable.lcp2_public_ip_assignment) {
1623 ipalloc_state->algorithm = IPALLOC_LCP2;
1624 } else if (1 == ctdb->tunable.deterministic_public_ips) {
1625 ipalloc_state->algorithm = IPALLOC_DETERMINISTIC;
1627 ipalloc_state->algorithm = IPALLOC_NONDETERMINISTIC;
1630 ipalloc_state->no_ip_failback = ctdb->tunable.no_ip_failback;
1632 return ipalloc_state;
1635 struct iprealloc_callback_data {
1638 client_async_callback fail_callback;
1639 void *fail_callback_data;
1640 struct ctdb_node_map_old *nodemap;
1643 static void iprealloc_fail_callback(struct ctdb_context *ctdb, uint32_t pnn,
1644 int32_t res, TDB_DATA outdata,
1648 struct iprealloc_callback_data *cd =
1649 (struct iprealloc_callback_data *)callback;
1651 numnodes = talloc_array_length(cd->retry_nodes);
1652 if (pnn > numnodes) {
1654 ("ipreallocated failure from node %d, "
1655 "but only %d nodes in nodemap\n",
1660 /* Can't run the "ipreallocated" event on a INACTIVE node */
1661 if (cd->nodemap->nodes[pnn].flags & NODE_FLAGS_INACTIVE) {
1662 DEBUG(DEBUG_WARNING,
1663 ("ipreallocated failed on inactive node %d, ignoring\n",
1670 /* If the control timed out then that's a real error,
1671 * so call the real fail callback
1673 if (cd->fail_callback) {
1674 cd->fail_callback(ctdb, pnn, res, outdata,
1675 cd->fail_callback_data);
1677 DEBUG(DEBUG_WARNING,
1678 ("iprealloc timed out but no callback registered\n"));
1682 /* If not a timeout then either the ipreallocated
1683 * eventscript (or some setup) failed. This might
1684 * have failed because the IPREALLOCATED control isn't
1685 * implemented - right now there is no way of knowing
1686 * because the error codes are all folded down to -1.
1687 * Consider retrying using EVENTSCRIPT control...
1689 DEBUG(DEBUG_WARNING,
1690 ("ipreallocated failure from node %d, flagging retry\n",
1692 cd->retry_nodes[pnn] = true;
1697 struct takeover_callback_data {
1699 client_async_callback fail_callback;
1700 void *fail_callback_data;
1701 struct ctdb_node_map_old *nodemap;
1704 static void takeover_run_fail_callback(struct ctdb_context *ctdb,
1705 uint32_t node_pnn, int32_t res,
1706 TDB_DATA outdata, void *callback_data)
1708 struct takeover_callback_data *cd =
1709 talloc_get_type_abort(callback_data,
1710 struct takeover_callback_data);
1713 for (i = 0; i < cd->nodemap->num; i++) {
1714 if (node_pnn == cd->nodemap->nodes[i].pnn) {
1719 if (i == cd->nodemap->num) {
1720 DEBUG(DEBUG_ERR, (__location__ " invalid PNN %u\n", node_pnn));
1724 if (!cd->node_failed[i]) {
1725 cd->node_failed[i] = true;
1726 cd->fail_callback(ctdb, node_pnn, res, outdata,
1727 cd->fail_callback_data);
1732 make any IP alias changes for public addresses that are necessary
1734 int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map_old *nodemap,
1735 uint32_t *force_rebalance_nodes,
1736 client_async_callback fail_callback, void *callback_data)
1739 struct ctdb_public_ip ip;
1741 struct public_ip_list *all_ips, *tmp_ip;
1743 struct timeval timeout;
1744 struct client_async_data *async_data;
1745 struct ctdb_client_control_state *state;
1746 TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
1747 struct ipalloc_state *ipalloc_state;
1748 struct takeover_callback_data *takeover_data;
1749 struct iprealloc_callback_data iprealloc_data;
1754 * ip failover is completely disabled, just send out the
1755 * ipreallocated event.
1757 if (ctdb->tunable.disable_ip_failover != 0) {
1761 ipalloc_state = ipalloc_state_init(ctdb, tmp_ctx);
1762 if (ipalloc_state == NULL) {
1763 talloc_free(tmp_ctx);
1767 if (!set_ipflags(ctdb, ipalloc_state, nodemap)) {
1768 DEBUG(DEBUG_ERR,("Failed to set IP flags - aborting takeover run\n"));
1769 talloc_free(tmp_ctx);
1773 /* Fetch known/available public IPs from each active node */
1774 ret = ctdb_reload_remote_public_ips(ctdb, ipalloc_state, nodemap);
1776 talloc_free(tmp_ctx);
1780 /* Short-circuit IP allocation if no node has available IPs */
1781 can_host_ips = false;
1782 for (i=0; i < ipalloc_state->num; i++) {
1783 if (ipalloc_state->available_public_ips[i] != NULL) {
1784 can_host_ips = true;
1787 if (!can_host_ips) {
1788 DEBUG(DEBUG_WARNING,("No nodes available to host public IPs yet\n"));
1792 /* since nodes only know about those public addresses that
1793 can be served by that particular node, no single node has
1794 a full list of all public addresses that exist in the cluster.
1795 Walk over all node structures and create a merged list of
1796 all public addresses that exist in the cluster.
1798 keep the tree of ips around as ctdb->ip_tree
1800 all_ips = create_merged_ip_list(ctdb, ipalloc_state);
1801 ipalloc_state->all_ips = all_ips;
1803 ipalloc_state->force_rebalance_nodes = force_rebalance_nodes;
1805 /* Do the IP reassignment calculations */
1806 ipalloc(ipalloc_state);
1808 /* Now tell all nodes to release any public IPs should not
1809 * host. This will be a NOOP on nodes that don't currently
1810 * hold the given IP.
1812 takeover_data = talloc_zero(tmp_ctx, struct takeover_callback_data);
1813 CTDB_NO_MEMORY_FATAL(ctdb, takeover_data);
1815 takeover_data->node_failed = talloc_zero_array(tmp_ctx,
1816 bool, nodemap->num);
1817 CTDB_NO_MEMORY_FATAL(ctdb, takeover_data->node_failed);
1818 takeover_data->fail_callback = fail_callback;
1819 takeover_data->fail_callback_data = callback_data;
1820 takeover_data->nodemap = nodemap;
1822 async_data = talloc_zero(tmp_ctx, struct client_async_data);
1823 CTDB_NO_MEMORY_FATAL(ctdb, async_data);
1825 async_data->fail_callback = takeover_run_fail_callback;
1826 async_data->callback_data = takeover_data;
1828 ZERO_STRUCT(ip); /* Avoid valgrind warnings for union */
1830 /* Send a RELEASE_IP to all nodes that should not be hosting
1831 * each IP. For each IP, all but one of these will be
1832 * redundant. However, the redundant ones are used to tell
1833 * nodes which node should be hosting the IP so that commands
1834 * like "ctdb ip" can display a particular nodes idea of who
1835 * is hosting what. */
1836 for (i=0;i<nodemap->num;i++) {
1837 /* don't talk to unconnected nodes, but do talk to banned nodes */
1838 if (nodemap->nodes[i].flags & NODE_FLAGS_DISCONNECTED) {
1842 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1843 if (tmp_ip->pnn == nodemap->nodes[i].pnn) {
1844 /* This node should be serving this
1845 vnn so don't tell it to release the ip
1849 ip.pnn = tmp_ip->pnn;
1850 ip.addr = tmp_ip->addr;
1852 timeout = TAKEOVER_TIMEOUT();
1853 data.dsize = sizeof(ip);
1854 data.dptr = (uint8_t *)&ip;
1855 state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
1856 0, CTDB_CONTROL_RELEASE_IP, 0,
1859 if (state == NULL) {
1860 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_RELEASE_IP to node %u\n", nodemap->nodes[i].pnn));
1861 talloc_free(tmp_ctx);
1865 ctdb_client_async_add(async_data, state);
1868 if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1869 DEBUG(DEBUG_ERR,(__location__ " Async control CTDB_CONTROL_RELEASE_IP failed\n"));
1870 talloc_free(tmp_ctx);
1873 talloc_free(async_data);
1876 /* For each IP, send a TAKOVER_IP to the node that should be
1877 * hosting it. Many of these will often be redundant (since
1878 * the allocation won't have changed) but they can be useful
1879 * to recover from inconsistencies. */
1880 async_data = talloc_zero(tmp_ctx, struct client_async_data);
1881 CTDB_NO_MEMORY_FATAL(ctdb, async_data);
1883 async_data->fail_callback = fail_callback;
1884 async_data->callback_data = callback_data;
1886 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1887 if (tmp_ip->pnn == -1) {
1888 /* this IP won't be taken over */
1892 ip.pnn = tmp_ip->pnn;
1893 ip.addr = tmp_ip->addr;
1895 timeout = TAKEOVER_TIMEOUT();
1896 data.dsize = sizeof(ip);
1897 data.dptr = (uint8_t *)&ip;
1898 state = ctdb_control_send(ctdb, tmp_ip->pnn,
1899 0, CTDB_CONTROL_TAKEOVER_IP, 0,
1900 data, async_data, &timeout, NULL);
1901 if (state == NULL) {
1902 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_TAKEOVER_IP to node %u\n", tmp_ip->pnn));
1903 talloc_free(tmp_ctx);
1907 ctdb_client_async_add(async_data, state);
1909 if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1910 DEBUG(DEBUG_ERR,(__location__ " Async control CTDB_CONTROL_TAKEOVER_IP failed\n"));
1911 talloc_free(tmp_ctx);
1917 * Tell all nodes to run eventscripts to process the
1918 * "ipreallocated" event. This can do a lot of things,
1919 * including restarting services to reconfigure them if public
1920 * IPs have moved. Once upon a time this event only used to
1923 retry_data = talloc_zero_array(tmp_ctx, bool, nodemap->num);
1924 CTDB_NO_MEMORY_FATAL(ctdb, retry_data);
1925 iprealloc_data.retry_nodes = retry_data;
1926 iprealloc_data.retry_count = 0;
1927 iprealloc_data.fail_callback = fail_callback;
1928 iprealloc_data.fail_callback_data = callback_data;
1929 iprealloc_data.nodemap = nodemap;
1931 nodes = list_of_connected_nodes(ctdb, nodemap, tmp_ctx, true);
1932 ret = ctdb_client_async_control(ctdb, CTDB_CONTROL_IPREALLOCATED,
1933 nodes, 0, TAKEOVER_TIMEOUT(),
1935 NULL, iprealloc_fail_callback,
1938 /* If the control failed then we should retry to any
1939 * nodes flagged by iprealloc_fail_callback using the
1940 * EVENTSCRIPT control. This is a best-effort at
1941 * backward compatiblity when running a mixed cluster
1942 * where some nodes have not yet been upgraded to
1943 * support the IPREALLOCATED control.
1945 DEBUG(DEBUG_WARNING,
1946 ("Retry ipreallocated to some nodes using eventscript control\n"));
1948 nodes = talloc_array(tmp_ctx, uint32_t,
1949 iprealloc_data.retry_count);
1950 CTDB_NO_MEMORY_FATAL(ctdb, nodes);
1953 for (i=0; i<nodemap->num; i++) {
1954 if (iprealloc_data.retry_nodes[i]) {
1960 data.dptr = discard_const("ipreallocated");
1961 data.dsize = strlen((char *)data.dptr) + 1;
1962 ret = ctdb_client_async_control(ctdb,
1963 CTDB_CONTROL_RUN_EVENTSCRIPTS,
1964 nodes, 0, TAKEOVER_TIMEOUT(),
1966 NULL, fail_callback,
1969 DEBUG(DEBUG_ERR, (__location__ " failed to send control to run eventscripts with \"ipreallocated\"\n"));
1973 talloc_free(tmp_ctx);
1979 destroy a ctdb_client_ip structure
1981 static int ctdb_client_ip_destructor(struct ctdb_client_ip *ip)
1983 DEBUG(DEBUG_DEBUG,("destroying client tcp for %s:%u (client_id %u)\n",
1984 ctdb_addr_to_str(&ip->addr),
1985 ntohs(ip->addr.ip.sin_port),
1988 DLIST_REMOVE(ip->ctdb->client_ip_list, ip);
1993 called by a client to inform us of a TCP connection that it is managing
1994 that should tickled with an ACK when IP takeover is done
1996 int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id,
1999 struct ctdb_client *client = reqid_find(ctdb->idr, client_id, struct ctdb_client);
2000 struct ctdb_connection *tcp_sock = NULL;
2001 struct ctdb_tcp_list *tcp;
2002 struct ctdb_connection t;
2005 struct ctdb_client_ip *ip;
2006 struct ctdb_vnn *vnn;
2007 ctdb_sock_addr addr;
2009 /* If we don't have public IPs, tickles are useless */
2010 if (ctdb->vnn == NULL) {
2014 tcp_sock = (struct ctdb_connection *)indata.dptr;
2016 addr = tcp_sock->src;
2017 ctdb_canonicalize_ip(&addr, &tcp_sock->src);
2018 addr = tcp_sock->dst;
2019 ctdb_canonicalize_ip(&addr, &tcp_sock->dst);
2022 memcpy(&addr, &tcp_sock->dst, sizeof(addr));
2023 vnn = find_public_ip_vnn(ctdb, &addr);
2025 switch (addr.sa.sa_family) {
2027 if (ntohl(addr.ip.sin_addr.s_addr) != INADDR_LOOPBACK) {
2028 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public address.\n",
2029 ctdb_addr_to_str(&addr)));
2033 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public ipv6 address.\n",
2034 ctdb_addr_to_str(&addr)));
2037 DEBUG(DEBUG_ERR,(__location__ " Unknown family type %d\n", addr.sa.sa_family));
2043 if (vnn->pnn != ctdb->pnn) {
2044 DEBUG(DEBUG_ERR,("Attempt to register tcp client for IP %s we don't hold - failing (client_id %u pid %u)\n",
2045 ctdb_addr_to_str(&addr),
2046 client_id, client->pid));
2047 /* failing this call will tell smbd to die */
2051 ip = talloc(client, struct ctdb_client_ip);
2052 CTDB_NO_MEMORY(ctdb, ip);
2056 ip->client_id = client_id;
2057 talloc_set_destructor(ip, ctdb_client_ip_destructor);
2058 DLIST_ADD(ctdb->client_ip_list, ip);
2060 tcp = talloc(client, struct ctdb_tcp_list);
2061 CTDB_NO_MEMORY(ctdb, tcp);
2063 tcp->connection.src = tcp_sock->src;
2064 tcp->connection.dst = tcp_sock->dst;
2066 DLIST_ADD(client->tcp_list, tcp);
2068 t.src = tcp_sock->src;
2069 t.dst = tcp_sock->dst;
2071 data.dptr = (uint8_t *)&t;
2072 data.dsize = sizeof(t);
2074 switch (addr.sa.sa_family) {
2076 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
2077 (unsigned)ntohs(tcp_sock->dst.ip.sin_port),
2078 ctdb_addr_to_str(&tcp_sock->src),
2079 (unsigned)ntohs(tcp_sock->src.ip.sin_port), client_id, client->pid));
2082 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
2083 (unsigned)ntohs(tcp_sock->dst.ip6.sin6_port),
2084 ctdb_addr_to_str(&tcp_sock->src),
2085 (unsigned)ntohs(tcp_sock->src.ip6.sin6_port), client_id, client->pid));
2088 DEBUG(DEBUG_ERR,(__location__ " Unknown family %d\n", addr.sa.sa_family));
2092 /* tell all nodes about this tcp connection */
2093 ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0,
2094 CTDB_CONTROL_TCP_ADD,
2095 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
2097 DEBUG(DEBUG_ERR,(__location__ " Failed to send CTDB_CONTROL_TCP_ADD\n"));
2105 find a tcp address on a list
2107 static struct ctdb_connection *ctdb_tcp_find(struct ctdb_tcp_array *array,
2108 struct ctdb_connection *tcp)
2112 if (array == NULL) {
2116 for (i=0;i<array->num;i++) {
2117 if (ctdb_same_sockaddr(&array->connections[i].src, &tcp->src) &&
2118 ctdb_same_sockaddr(&array->connections[i].dst, &tcp->dst)) {
2119 return &array->connections[i];
2128 called by a daemon to inform us of a TCP connection that one of its
2129 clients managing that should tickled with an ACK when IP takeover is
2132 int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata, bool tcp_update_needed)
2134 struct ctdb_connection *p = (struct ctdb_connection *)indata.dptr;
2135 struct ctdb_tcp_array *tcparray;
2136 struct ctdb_connection tcp;
2137 struct ctdb_vnn *vnn;
2139 /* If we don't have public IPs, tickles are useless */
2140 if (ctdb->vnn == NULL) {
2144 vnn = find_public_ip_vnn(ctdb, &p->dst);
2146 DEBUG(DEBUG_INFO,(__location__ " got TCP_ADD control for an address which is not a public address '%s'\n",
2147 ctdb_addr_to_str(&p->dst)));
2153 tcparray = vnn->tcp_array;
2155 /* If this is the first tickle */
2156 if (tcparray == NULL) {
2157 tcparray = talloc(vnn, struct ctdb_tcp_array);
2158 CTDB_NO_MEMORY(ctdb, tcparray);
2159 vnn->tcp_array = tcparray;
2162 tcparray->connections = talloc_size(tcparray, sizeof(struct ctdb_connection));
2163 CTDB_NO_MEMORY(ctdb, tcparray->connections);
2165 tcparray->connections[tcparray->num].src = p->src;
2166 tcparray->connections[tcparray->num].dst = p->dst;
2169 if (tcp_update_needed) {
2170 vnn->tcp_update_needed = true;
2176 /* Do we already have this tickle ?*/
2179 if (ctdb_tcp_find(tcparray, &tcp) != NULL) {
2180 DEBUG(DEBUG_DEBUG,("Already had tickle info for %s:%u for vnn:%u\n",
2181 ctdb_addr_to_str(&tcp.dst),
2182 ntohs(tcp.dst.ip.sin_port),
2187 /* A new tickle, we must add it to the array */
2188 tcparray->connections = talloc_realloc(tcparray, tcparray->connections,
2189 struct ctdb_connection,
2191 CTDB_NO_MEMORY(ctdb, tcparray->connections);
2193 tcparray->connections[tcparray->num].src = p->src;
2194 tcparray->connections[tcparray->num].dst = p->dst;
2197 DEBUG(DEBUG_INFO,("Added tickle info for %s:%u from vnn %u\n",
2198 ctdb_addr_to_str(&tcp.dst),
2199 ntohs(tcp.dst.ip.sin_port),
2202 if (tcp_update_needed) {
2203 vnn->tcp_update_needed = true;
2210 static void ctdb_remove_connection(struct ctdb_vnn *vnn, struct ctdb_connection *conn)
2212 struct ctdb_connection *tcpp;
2218 /* if the array is empty we cant remove it
2219 and we don't need to do anything
2221 if (vnn->tcp_array == NULL) {
2222 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist (array is empty) %s:%u\n",
2223 ctdb_addr_to_str(&conn->dst),
2224 ntohs(conn->dst.ip.sin_port)));
2229 /* See if we know this connection
2230 if we don't know this connection then we dont need to do anything
2232 tcpp = ctdb_tcp_find(vnn->tcp_array, conn);
2234 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist %s:%u\n",
2235 ctdb_addr_to_str(&conn->dst),
2236 ntohs(conn->dst.ip.sin_port)));
2241 /* We need to remove this entry from the array.
2242 Instead of allocating a new array and copying data to it
2243 we cheat and just copy the last entry in the existing array
2244 to the entry that is to be removed and just shring the
2247 *tcpp = vnn->tcp_array->connections[vnn->tcp_array->num - 1];
2248 vnn->tcp_array->num--;
2250 /* If we deleted the last entry we also need to remove the entire array
2252 if (vnn->tcp_array->num == 0) {
2253 talloc_free(vnn->tcp_array);
2254 vnn->tcp_array = NULL;
2257 vnn->tcp_update_needed = true;
2259 DEBUG(DEBUG_INFO,("Removed tickle info for %s:%u\n",
2260 ctdb_addr_to_str(&conn->src),
2261 ntohs(conn->src.ip.sin_port)));
2266 called by a daemon to inform us of a TCP connection that one of its
2267 clients used are no longer needed in the tickle database
2269 int32_t ctdb_control_tcp_remove(struct ctdb_context *ctdb, TDB_DATA indata)
2271 struct ctdb_vnn *vnn;
2272 struct ctdb_connection *conn = (struct ctdb_connection *)indata.dptr;
2274 /* If we don't have public IPs, tickles are useless */
2275 if (ctdb->vnn == NULL) {
2279 vnn = find_public_ip_vnn(ctdb, &conn->dst);
2282 (__location__ " unable to find public address %s\n",
2283 ctdb_addr_to_str(&conn->dst)));
2287 ctdb_remove_connection(vnn, conn);
2294 Called when another daemon starts - causes all tickles for all
2295 public addresses we are serving to be sent to the new node on the
2296 next check. This actually causes the next scheduled call to
2297 tdb_update_tcp_tickles() to update all nodes. This is simple and
2298 doesn't require careful error handling.
2300 int32_t ctdb_control_startup(struct ctdb_context *ctdb, uint32_t pnn)
2302 struct ctdb_vnn *vnn;
2304 DEBUG(DEBUG_INFO, ("Received startup control from node %lu\n",
2305 (unsigned long) pnn));
2307 for (vnn = ctdb->vnn; vnn != NULL; vnn = vnn->next) {
2308 vnn->tcp_update_needed = true;
2316 called when a client structure goes away - hook to remove
2317 elements from the tcp_list in all daemons
2319 void ctdb_takeover_client_destructor_hook(struct ctdb_client *client)
2321 while (client->tcp_list) {
2322 struct ctdb_vnn *vnn;
2323 struct ctdb_tcp_list *tcp = client->tcp_list;
2324 struct ctdb_connection *conn = &tcp->connection;
2326 DLIST_REMOVE(client->tcp_list, tcp);
2328 vnn = find_public_ip_vnn(client->ctdb,
2332 (__location__ " unable to find public address %s\n",
2333 ctdb_addr_to_str(&conn->dst)));
2337 /* If the IP address is hosted on this node then
2338 * remove the connection. */
2339 if (vnn->pnn == client->ctdb->pnn) {
2340 ctdb_remove_connection(vnn, conn);
2343 /* Otherwise this function has been called because the
2344 * server IP address has been released to another node
2345 * and the client has exited. This means that we
2346 * should not delete the connection information. The
2347 * takeover node processes connections too. */
2352 void ctdb_release_all_ips(struct ctdb_context *ctdb)
2354 struct ctdb_vnn *vnn;
2357 if (ctdb->tunable.disable_ip_failover == 1) {
2361 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2362 if (!ctdb_sys_have_ip(&vnn->public_address)) {
2363 ctdb_vnn_unassign_iface(ctdb, vnn);
2370 /* Don't allow multiple releases at once. Some code,
2371 * particularly ctdb_tickle_sentenced_connections() is
2373 if (vnn->update_in_flight) {
2374 DEBUG(DEBUG_WARNING,
2376 " Not releasing IP %s/%u on interface %s, an update is already in progess\n",
2377 ctdb_addr_to_str(&vnn->public_address),
2378 vnn->public_netmask_bits,
2379 ctdb_vnn_iface_string(vnn)));
2382 vnn->update_in_flight = true;
2384 DEBUG(DEBUG_INFO,("Release of IP %s/%u on interface %s node:-1\n",
2385 ctdb_addr_to_str(&vnn->public_address),
2386 vnn->public_netmask_bits,
2387 ctdb_vnn_iface_string(vnn)));
2389 ctdb_event_script_args(ctdb, CTDB_EVENT_RELEASE_IP, "%s %s %u",
2390 ctdb_vnn_iface_string(vnn),
2391 ctdb_addr_to_str(&vnn->public_address),
2392 vnn->public_netmask_bits);
2393 release_kill_clients(ctdb, &vnn->public_address);
2394 ctdb_vnn_unassign_iface(ctdb, vnn);
2395 vnn->update_in_flight = false;
2399 DEBUG(DEBUG_NOTICE,(__location__ " Released %d public IPs\n", count));
2404 get list of public IPs
2406 int32_t ctdb_control_get_public_ips(struct ctdb_context *ctdb,
2407 struct ctdb_req_control_old *c, TDB_DATA *outdata)
2410 struct ctdb_public_ip_list_old *ips;
2411 struct ctdb_vnn *vnn;
2412 bool only_available = false;
2414 if (c->flags & CTDB_PUBLIC_IP_FLAGS_ONLY_AVAILABLE) {
2415 only_available = true;
2418 /* count how many public ip structures we have */
2420 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2424 len = offsetof(struct ctdb_public_ip_list_old, ips) +
2425 num*sizeof(struct ctdb_public_ip);
2426 ips = talloc_zero_size(outdata, len);
2427 CTDB_NO_MEMORY(ctdb, ips);
2430 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2431 if (only_available && !ctdb_vnn_available(ctdb, vnn)) {
2434 ips->ips[i].pnn = vnn->pnn;
2435 ips->ips[i].addr = vnn->public_address;
2439 len = offsetof(struct ctdb_public_ip_list_old, ips) +
2440 i*sizeof(struct ctdb_public_ip);
2442 outdata->dsize = len;
2443 outdata->dptr = (uint8_t *)ips;
2449 int32_t ctdb_control_get_public_ip_info(struct ctdb_context *ctdb,
2450 struct ctdb_req_control_old *c,
2455 ctdb_sock_addr *addr;
2456 struct ctdb_public_ip_info_old *info;
2457 struct ctdb_vnn *vnn;
2459 addr = (ctdb_sock_addr *)indata.dptr;
2461 vnn = find_public_ip_vnn(ctdb, addr);
2463 /* if it is not a public ip it could be our 'single ip' */
2464 if (ctdb->single_ip_vnn) {
2465 if (ctdb_same_ip(&ctdb->single_ip_vnn->public_address, addr)) {
2466 vnn = ctdb->single_ip_vnn;
2471 DEBUG(DEBUG_ERR,(__location__ " Could not get public ip info, "
2472 "'%s'not a public address\n",
2473 ctdb_addr_to_str(addr)));
2477 /* count how many public ip structures we have */
2479 for (;vnn->ifaces[num];) {
2483 len = offsetof(struct ctdb_public_ip_info_old, ifaces) +
2484 num*sizeof(struct ctdb_iface);
2485 info = talloc_zero_size(outdata, len);
2486 CTDB_NO_MEMORY(ctdb, info);
2488 info->ip.addr = vnn->public_address;
2489 info->ip.pnn = vnn->pnn;
2490 info->active_idx = 0xFFFFFFFF;
2492 for (i=0; vnn->ifaces[i]; i++) {
2493 struct ctdb_interface *cur;
2495 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
2497 DEBUG(DEBUG_CRIT, (__location__ " internal error iface[%s] unknown\n",
2501 if (vnn->iface == cur) {
2502 info->active_idx = i;
2504 strncpy(info->ifaces[i].name, cur->name, sizeof(info->ifaces[i].name)-1);
2505 info->ifaces[i].link_state = cur->link_up;
2506 info->ifaces[i].references = cur->references;
2509 len = offsetof(struct ctdb_public_ip_info_old, ifaces) +
2510 i*sizeof(struct ctdb_iface);
2512 outdata->dsize = len;
2513 outdata->dptr = (uint8_t *)info;
2518 int32_t ctdb_control_get_ifaces(struct ctdb_context *ctdb,
2519 struct ctdb_req_control_old *c,
2523 struct ctdb_iface_list_old *ifaces;
2524 struct ctdb_interface *cur;
2526 /* count how many public ip structures we have */
2528 for (cur=ctdb->ifaces;cur;cur=cur->next) {
2532 len = offsetof(struct ctdb_iface_list_old, ifaces) +
2533 num*sizeof(struct ctdb_iface);
2534 ifaces = talloc_zero_size(outdata, len);
2535 CTDB_NO_MEMORY(ctdb, ifaces);
2538 for (cur=ctdb->ifaces;cur;cur=cur->next) {
2539 strcpy(ifaces->ifaces[i].name, cur->name);
2540 ifaces->ifaces[i].link_state = cur->link_up;
2541 ifaces->ifaces[i].references = cur->references;
2545 len = offsetof(struct ctdb_iface_list_old, ifaces) +
2546 i*sizeof(struct ctdb_iface);
2548 outdata->dsize = len;
2549 outdata->dptr = (uint8_t *)ifaces;
2554 int32_t ctdb_control_set_iface_link(struct ctdb_context *ctdb,
2555 struct ctdb_req_control_old *c,
2558 struct ctdb_iface *info;
2559 struct ctdb_interface *iface;
2560 bool link_up = false;
2562 info = (struct ctdb_iface *)indata.dptr;
2564 if (info->name[CTDB_IFACE_SIZE] != '\0') {
2565 int len = strnlen(info->name, CTDB_IFACE_SIZE);
2566 DEBUG(DEBUG_ERR, (__location__ " name[%*.*s] not terminated\n",
2567 len, len, info->name));
2571 switch (info->link_state) {
2579 DEBUG(DEBUG_ERR, (__location__ " link_state[%u] invalid\n",
2580 (unsigned int)info->link_state));
2584 if (info->references != 0) {
2585 DEBUG(DEBUG_ERR, (__location__ " references[%u] should be 0\n",
2586 (unsigned int)info->references));
2590 iface = ctdb_find_iface(ctdb, info->name);
2591 if (iface == NULL) {
2595 if (link_up == iface->link_up) {
2599 DEBUG(iface->link_up?DEBUG_ERR:DEBUG_NOTICE,
2600 ("iface[%s] has changed it's link status %s => %s\n",
2602 iface->link_up?"up":"down",
2603 link_up?"up":"down"));
2605 iface->link_up = link_up;
2611 structure containing the listening socket and the list of tcp connections
2612 that the ctdb daemon is to kill
2614 struct ctdb_kill_tcp {
2615 struct ctdb_vnn *vnn;
2616 struct ctdb_context *ctdb;
2618 struct tevent_fd *fde;
2619 trbt_tree_t *connections;
2624 a tcp connection that is to be killed
2626 struct ctdb_killtcp_con {
2627 ctdb_sock_addr src_addr;
2628 ctdb_sock_addr dst_addr;
2630 struct ctdb_kill_tcp *killtcp;
2633 /* this function is used to create a key to represent this socketpair
2634 in the killtcp tree.
2635 this key is used to insert and lookup matching socketpairs that are
2636 to be tickled and RST
2638 #define KILLTCP_KEYLEN 10
2639 static uint32_t *killtcp_key(ctdb_sock_addr *src, ctdb_sock_addr *dst)
2641 static uint32_t key[KILLTCP_KEYLEN];
2643 bzero(key, sizeof(key));
2645 if (src->sa.sa_family != dst->sa.sa_family) {
2646 DEBUG(DEBUG_ERR, (__location__ " ERROR, different families passed :%u vs %u\n", src->sa.sa_family, dst->sa.sa_family));
2650 switch (src->sa.sa_family) {
2652 key[0] = dst->ip.sin_addr.s_addr;
2653 key[1] = src->ip.sin_addr.s_addr;
2654 key[2] = dst->ip.sin_port;
2655 key[3] = src->ip.sin_port;
2658 uint32_t *dst6_addr32 =
2659 (uint32_t *)&(dst->ip6.sin6_addr.s6_addr);
2660 uint32_t *src6_addr32 =
2661 (uint32_t *)&(src->ip6.sin6_addr.s6_addr);
2662 key[0] = dst6_addr32[3];
2663 key[1] = src6_addr32[3];
2664 key[2] = dst6_addr32[2];
2665 key[3] = src6_addr32[2];
2666 key[4] = dst6_addr32[1];
2667 key[5] = src6_addr32[1];
2668 key[6] = dst6_addr32[0];
2669 key[7] = src6_addr32[0];
2670 key[8] = dst->ip6.sin6_port;
2671 key[9] = src->ip6.sin6_port;
2675 DEBUG(DEBUG_ERR, (__location__ " ERROR, unknown family passed :%u\n", src->sa.sa_family));
2683 called when we get a read event on the raw socket
2685 static void capture_tcp_handler(struct tevent_context *ev,
2686 struct tevent_fd *fde,
2687 uint16_t flags, void *private_data)
2689 struct ctdb_kill_tcp *killtcp = talloc_get_type(private_data, struct ctdb_kill_tcp);
2690 struct ctdb_killtcp_con *con;
2691 ctdb_sock_addr src, dst;
2692 uint32_t ack_seq, seq;
2694 if (!(flags & TEVENT_FD_READ)) {
2698 if (ctdb_sys_read_tcp_packet(killtcp->capture_fd,
2699 killtcp->private_data,
2701 &ack_seq, &seq) != 0) {
2702 /* probably a non-tcp ACK packet */
2706 /* check if we have this guy in our list of connections
2709 con = trbt_lookuparray32(killtcp->connections,
2710 KILLTCP_KEYLEN, killtcp_key(&src, &dst));
2712 /* no this was some other packet we can just ignore */
2716 /* This one has been tickled !
2717 now reset him and remove him from the list.
2719 DEBUG(DEBUG_INFO, ("sending a tcp reset to kill connection :%d -> %s:%d\n",
2720 ntohs(con->dst_addr.ip.sin_port),
2721 ctdb_addr_to_str(&con->src_addr),
2722 ntohs(con->src_addr.ip.sin_port)));
2724 ctdb_sys_send_tcp(&con->dst_addr, &con->src_addr, ack_seq, seq, 1);
2729 /* when traversing the list of all tcp connections to send tickle acks to
2730 (so that we can capture the ack coming back and kill the connection
2732 this callback is called for each connection we are currently trying to kill
2734 static int tickle_connection_traverse(void *param, void *data)
2736 struct ctdb_killtcp_con *con = talloc_get_type(data, struct ctdb_killtcp_con);
2738 /* have tried too many times, just give up */
2739 if (con->count >= 5) {
2740 /* can't delete in traverse: reparent to delete_cons */
2741 talloc_steal(param, con);
2745 /* othervise, try tickling it again */
2748 (ctdb_sock_addr *)&con->dst_addr,
2749 (ctdb_sock_addr *)&con->src_addr,
2756 called every second until all sentenced connections have been reset
2758 static void ctdb_tickle_sentenced_connections(struct tevent_context *ev,
2759 struct tevent_timer *te,
2760 struct timeval t, void *private_data)
2762 struct ctdb_kill_tcp *killtcp = talloc_get_type(private_data, struct ctdb_kill_tcp);
2763 void *delete_cons = talloc_new(NULL);
2765 /* loop over all connections sending tickle ACKs */
2766 trbt_traversearray32(killtcp->connections, KILLTCP_KEYLEN, tickle_connection_traverse, delete_cons);
2768 /* now we've finished traverse, it's safe to do deletion. */
2769 talloc_free(delete_cons);
2771 /* If there are no more connections to kill we can remove the
2772 entire killtcp structure
2774 if ( (killtcp->connections == NULL) ||
2775 (killtcp->connections->root == NULL) ) {
2776 talloc_free(killtcp);
2780 /* try tickling them again in a seconds time
2782 tevent_add_timer(killtcp->ctdb->ev, killtcp,
2783 timeval_current_ofs(1, 0),
2784 ctdb_tickle_sentenced_connections, killtcp);
2788 destroy the killtcp structure
2790 static int ctdb_killtcp_destructor(struct ctdb_kill_tcp *killtcp)
2792 struct ctdb_vnn *tmpvnn;
2794 /* verify that this vnn is still active */
2795 for (tmpvnn = killtcp->ctdb->vnn; tmpvnn; tmpvnn = tmpvnn->next) {
2796 if (tmpvnn == killtcp->vnn) {
2801 if (tmpvnn == NULL) {
2805 if (killtcp->vnn->killtcp != killtcp) {
2809 killtcp->vnn->killtcp = NULL;
2815 /* nothing fancy here, just unconditionally replace any existing
2816 connection structure with the new one.
2818 don't even free the old one if it did exist, that one is talloc_stolen
2819 by the same node in the tree anyway and will be deleted when the new data
2822 static void *add_killtcp_callback(void *parm, void *data)
2828 add a tcp socket to the list of connections we want to RST
2830 static int ctdb_killtcp_add_connection(struct ctdb_context *ctdb,
2834 ctdb_sock_addr src, dst;
2835 struct ctdb_kill_tcp *killtcp;
2836 struct ctdb_killtcp_con *con;
2837 struct ctdb_vnn *vnn;
2839 ctdb_canonicalize_ip(s, &src);
2840 ctdb_canonicalize_ip(d, &dst);
2842 vnn = find_public_ip_vnn(ctdb, &dst);
2844 vnn = find_public_ip_vnn(ctdb, &src);
2847 /* if it is not a public ip it could be our 'single ip' */
2848 if (ctdb->single_ip_vnn) {
2849 if (ctdb_same_ip(&ctdb->single_ip_vnn->public_address, &dst)) {
2850 vnn = ctdb->single_ip_vnn;
2855 DEBUG(DEBUG_ERR,(__location__ " Could not killtcp, not a public address\n"));
2859 killtcp = vnn->killtcp;
2861 /* If this is the first connection to kill we must allocate
2864 if (killtcp == NULL) {
2865 killtcp = talloc_zero(vnn, struct ctdb_kill_tcp);
2866 CTDB_NO_MEMORY(ctdb, killtcp);
2869 killtcp->ctdb = ctdb;
2870 killtcp->capture_fd = -1;
2871 killtcp->connections = trbt_create(killtcp, 0);
2873 vnn->killtcp = killtcp;
2874 talloc_set_destructor(killtcp, ctdb_killtcp_destructor);
2879 /* create a structure that describes this connection we want to
2880 RST and store it in killtcp->connections
2882 con = talloc(killtcp, struct ctdb_killtcp_con);
2883 CTDB_NO_MEMORY(ctdb, con);
2884 con->src_addr = src;
2885 con->dst_addr = dst;
2887 con->killtcp = killtcp;
2890 trbt_insertarray32_callback(killtcp->connections,
2891 KILLTCP_KEYLEN, killtcp_key(&con->dst_addr, &con->src_addr),
2892 add_killtcp_callback, con);
2895 If we don't have a socket to listen on yet we must create it
2897 if (killtcp->capture_fd == -1) {
2898 const char *iface = ctdb_vnn_iface_string(vnn);
2899 killtcp->capture_fd = ctdb_sys_open_capture_socket(iface, &killtcp->private_data);
2900 if (killtcp->capture_fd == -1) {
2901 DEBUG(DEBUG_CRIT,(__location__ " Failed to open capturing "
2902 "socket on iface '%s' for killtcp (%s)\n",
2903 iface, strerror(errno)));
2909 if (killtcp->fde == NULL) {
2910 killtcp->fde = tevent_add_fd(ctdb->ev, killtcp,
2911 killtcp->capture_fd,
2913 capture_tcp_handler, killtcp);
2914 tevent_fd_set_auto_close(killtcp->fde);
2916 /* We also need to set up some events to tickle all these connections
2917 until they are all reset
2919 tevent_add_timer(ctdb->ev, killtcp, timeval_current_ofs(1, 0),
2920 ctdb_tickle_sentenced_connections, killtcp);
2923 /* tickle him once now */
2932 talloc_free(vnn->killtcp);
2933 vnn->killtcp = NULL;
2938 kill a TCP connection.
2940 int32_t ctdb_control_kill_tcp(struct ctdb_context *ctdb, TDB_DATA indata)
2942 struct ctdb_connection *killtcp = (struct ctdb_connection *)indata.dptr;
2944 return ctdb_killtcp_add_connection(ctdb, &killtcp->src, &killtcp->dst);
2948 called by a daemon to inform us of the entire list of TCP tickles for
2949 a particular public address.
2950 this control should only be sent by the node that is currently serving
2951 that public address.
2953 int32_t ctdb_control_set_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata)
2955 struct ctdb_tickle_list_old *list = (struct ctdb_tickle_list_old *)indata.dptr;
2956 struct ctdb_tcp_array *tcparray;
2957 struct ctdb_vnn *vnn;
2959 /* We must at least have tickles.num or else we cant verify the size
2960 of the received data blob
2962 if (indata.dsize < offsetof(struct ctdb_tickle_list_old, connections)) {
2963 DEBUG(DEBUG_ERR,("Bad indata in ctdb_tickle_list. Not enough data for the tickle.num field\n"));
2967 /* verify that the size of data matches what we expect */
2968 if (indata.dsize < offsetof(struct ctdb_tickle_list_old, connections)
2969 + sizeof(struct ctdb_connection) * list->num) {
2970 DEBUG(DEBUG_ERR,("Bad indata in ctdb_tickle_list\n"));
2974 DEBUG(DEBUG_INFO, ("Received tickle update for public address %s\n",
2975 ctdb_addr_to_str(&list->addr)));
2977 vnn = find_public_ip_vnn(ctdb, &list->addr);
2979 DEBUG(DEBUG_INFO,(__location__ " Could not set tcp tickle list, '%s' is not a public address\n",
2980 ctdb_addr_to_str(&list->addr)));
2985 if (vnn->pnn == ctdb->pnn) {
2987 ("Ignoring redundant set tcp tickle list, this node hosts '%s'\n",
2988 ctdb_addr_to_str(&list->addr)));
2992 /* remove any old ticklelist we might have */
2993 talloc_free(vnn->tcp_array);
2994 vnn->tcp_array = NULL;
2996 tcparray = talloc(vnn, struct ctdb_tcp_array);
2997 CTDB_NO_MEMORY(ctdb, tcparray);
2999 tcparray->num = list->num;
3001 tcparray->connections = talloc_array(tcparray, struct ctdb_connection, tcparray->num);
3002 CTDB_NO_MEMORY(ctdb, tcparray->connections);
3004 memcpy(tcparray->connections, &list->connections[0],
3005 sizeof(struct ctdb_connection)*tcparray->num);
3007 /* We now have a new fresh tickle list array for this vnn */
3008 vnn->tcp_array = tcparray;
3014 called to return the full list of tickles for the puclic address associated
3015 with the provided vnn
3017 int32_t ctdb_control_get_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata)
3019 ctdb_sock_addr *addr = (ctdb_sock_addr *)indata.dptr;
3020 struct ctdb_tickle_list_old *list;
3021 struct ctdb_tcp_array *tcparray;
3023 struct ctdb_vnn *vnn;
3025 vnn = find_public_ip_vnn(ctdb, addr);
3027 DEBUG(DEBUG_ERR,(__location__ " Could not get tcp tickle list, '%s' is not a public address\n",
3028 ctdb_addr_to_str(addr)));
3033 tcparray = vnn->tcp_array;
3035 num = tcparray->num;
3040 outdata->dsize = offsetof(struct ctdb_tickle_list_old, connections)
3041 + sizeof(struct ctdb_connection) * num;
3043 outdata->dptr = talloc_size(outdata, outdata->dsize);
3044 CTDB_NO_MEMORY(ctdb, outdata->dptr);
3045 list = (struct ctdb_tickle_list_old *)outdata->dptr;
3050 memcpy(&list->connections[0], tcparray->connections,
3051 sizeof(struct ctdb_connection) * num);
3059 set the list of all tcp tickles for a public address
3061 static int ctdb_send_set_tcp_tickles_for_ip(struct ctdb_context *ctdb,
3062 ctdb_sock_addr *addr,
3063 struct ctdb_tcp_array *tcparray)
3067 struct ctdb_tickle_list_old *list;
3070 num = tcparray->num;
3075 data.dsize = offsetof(struct ctdb_tickle_list_old, connections) +
3076 sizeof(struct ctdb_connection) * num;
3077 data.dptr = talloc_size(ctdb, data.dsize);
3078 CTDB_NO_MEMORY(ctdb, data.dptr);
3080 list = (struct ctdb_tickle_list_old *)data.dptr;
3084 memcpy(&list->connections[0], tcparray->connections, sizeof(struct ctdb_connection) * num);
3087 ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_ALL, 0,
3088 CTDB_CONTROL_SET_TCP_TICKLE_LIST,
3089 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
3091 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set tcp tickles failed\n"));
3095 talloc_free(data.dptr);
3102 perform tickle updates if required
3104 static void ctdb_update_tcp_tickles(struct tevent_context *ev,
3105 struct tevent_timer *te,
3106 struct timeval t, void *private_data)
3108 struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
3110 struct ctdb_vnn *vnn;
3112 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
3113 /* we only send out updates for public addresses that
3116 if (ctdb->pnn != vnn->pnn) {
3119 /* We only send out the updates if we need to */
3120 if (!vnn->tcp_update_needed) {
3123 ret = ctdb_send_set_tcp_tickles_for_ip(ctdb,
3124 &vnn->public_address,
3127 DEBUG(DEBUG_ERR,("Failed to send the tickle update for public address %s\n",
3128 ctdb_addr_to_str(&vnn->public_address)));
3131 ("Sent tickle update for public address %s\n",
3132 ctdb_addr_to_str(&vnn->public_address)));
3133 vnn->tcp_update_needed = false;
3137 tevent_add_timer(ctdb->ev, ctdb->tickle_update_context,
3138 timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
3139 ctdb_update_tcp_tickles, ctdb);
3143 start periodic update of tcp tickles
3145 void ctdb_start_tcp_tickle_update(struct ctdb_context *ctdb)
3147 ctdb->tickle_update_context = talloc_new(ctdb);
3149 tevent_add_timer(ctdb->ev, ctdb->tickle_update_context,
3150 timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
3151 ctdb_update_tcp_tickles, ctdb);
3157 struct control_gratious_arp {
3158 struct ctdb_context *ctdb;
3159 ctdb_sock_addr addr;
3165 send a control_gratuitous arp
3167 static void send_gratious_arp(struct tevent_context *ev,
3168 struct tevent_timer *te,
3169 struct timeval t, void *private_data)
3172 struct control_gratious_arp *arp = talloc_get_type(private_data,
3173 struct control_gratious_arp);
3175 ret = ctdb_sys_send_arp(&arp->addr, arp->iface);
3177 DEBUG(DEBUG_ERR,(__location__ " sending of gratious arp on iface '%s' failed (%s)\n",
3178 arp->iface, strerror(errno)));
3183 if (arp->count == CTDB_ARP_REPEAT) {
3188 tevent_add_timer(arp->ctdb->ev, arp,
3189 timeval_current_ofs(CTDB_ARP_INTERVAL, 0),
3190 send_gratious_arp, arp);
3197 int32_t ctdb_control_send_gratious_arp(struct ctdb_context *ctdb, TDB_DATA indata)
3199 struct ctdb_addr_info_old *gratious_arp = (struct ctdb_addr_info_old *)indata.dptr;
3200 struct control_gratious_arp *arp;
3202 /* verify the size of indata */
3203 if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
3204 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_gratious_arp structure. Got %u require %u bytes\n",
3205 (unsigned)indata.dsize,
3206 (unsigned)offsetof(struct ctdb_addr_info_old, iface)));
3210 ( offsetof(struct ctdb_addr_info_old, iface)
3211 + gratious_arp->len ) ){
3213 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
3214 "but should be %u bytes\n",
3215 (unsigned)indata.dsize,
3216 (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+gratious_arp->len)));
3221 arp = talloc(ctdb, struct control_gratious_arp);
3222 CTDB_NO_MEMORY(ctdb, arp);
3225 arp->addr = gratious_arp->addr;
3226 arp->iface = talloc_strdup(arp, gratious_arp->iface);
3227 CTDB_NO_MEMORY(ctdb, arp->iface);
3230 tevent_add_timer(arp->ctdb->ev, arp,
3231 timeval_zero(), send_gratious_arp, arp);
3236 int32_t ctdb_control_add_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
3238 struct ctdb_addr_info_old *pub = (struct ctdb_addr_info_old *)indata.dptr;
3241 /* verify the size of indata */
3242 if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
3243 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_addr_info structure\n"));
3247 ( offsetof(struct ctdb_addr_info_old, iface)
3250 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
3251 "but should be %u bytes\n",
3252 (unsigned)indata.dsize,
3253 (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+pub->len)));
3257 DEBUG(DEBUG_NOTICE,("Add IP %s\n", ctdb_addr_to_str(&pub->addr)));
3259 ret = ctdb_add_public_address(ctdb, &pub->addr, pub->mask, &pub->iface[0], true);
3262 DEBUG(DEBUG_ERR,(__location__ " Failed to add public address\n"));
3269 struct delete_ip_callback_state {
3270 struct ctdb_req_control_old *c;
3274 called when releaseip event finishes for del_public_address
3276 static void delete_ip_callback(struct ctdb_context *ctdb,
3277 int32_t status, TDB_DATA data,
3278 const char *errormsg,
3281 struct delete_ip_callback_state *state =
3282 talloc_get_type(private_data, struct delete_ip_callback_state);
3284 /* If release failed then fail. */
3285 ctdb_request_control_reply(ctdb, state->c, NULL, status, errormsg);
3286 talloc_free(private_data);
3289 int32_t ctdb_control_del_public_address(struct ctdb_context *ctdb,
3290 struct ctdb_req_control_old *c,
3291 TDB_DATA indata, bool *async_reply)
3293 struct ctdb_addr_info_old *pub = (struct ctdb_addr_info_old *)indata.dptr;
3294 struct ctdb_vnn *vnn;
3296 /* verify the size of indata */
3297 if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
3298 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_addr_info structure\n"));
3302 ( offsetof(struct ctdb_addr_info_old, iface)
3305 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
3306 "but should be %u bytes\n",
3307 (unsigned)indata.dsize,
3308 (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+pub->len)));
3312 DEBUG(DEBUG_NOTICE,("Delete IP %s\n", ctdb_addr_to_str(&pub->addr)));
3314 /* walk over all public addresses until we find a match */
3315 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
3316 if (ctdb_same_ip(&vnn->public_address, &pub->addr)) {
3317 if (vnn->pnn == ctdb->pnn) {
3318 struct delete_ip_callback_state *state;
3319 struct ctdb_public_ip *ip;
3323 vnn->delete_pending = true;
3325 state = talloc(ctdb,
3326 struct delete_ip_callback_state);
3327 CTDB_NO_MEMORY(ctdb, state);
3330 ip = talloc(state, struct ctdb_public_ip);
3333 (__location__ " Out of memory\n"));
3338 ip->addr = pub->addr;
3340 data.dsize = sizeof(struct ctdb_public_ip);
3341 data.dptr = (unsigned char *)ip;
3343 ret = ctdb_daemon_send_control(ctdb,
3346 CTDB_CONTROL_RELEASE_IP,
3353 (__location__ "Unable to send "
3354 "CTDB_CONTROL_RELEASE_IP\n"));
3359 state->c = talloc_steal(state, c);
3360 *async_reply = true;
3362 /* This IP is not hosted on the
3363 * current node so just delete it
3365 do_delete_ip(ctdb, vnn);
3372 DEBUG(DEBUG_ERR,("Delete IP of unknown public IP address %s\n",
3373 ctdb_addr_to_str(&pub->addr)));
3378 struct ipreallocated_callback_state {
3379 struct ctdb_req_control_old *c;
3382 static void ctdb_ipreallocated_callback(struct ctdb_context *ctdb,
3383 int status, void *p)
3385 struct ipreallocated_callback_state *state =
3386 talloc_get_type(p, struct ipreallocated_callback_state);
3390 (" \"ipreallocated\" event script failed (status %d)\n",
3392 if (status == -ETIME) {
3393 ctdb_ban_self(ctdb);
3397 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
3401 /* A control to run the ipreallocated event */
3402 int32_t ctdb_control_ipreallocated(struct ctdb_context *ctdb,
3403 struct ctdb_req_control_old *c,
3407 struct ipreallocated_callback_state *state;
3409 state = talloc(ctdb, struct ipreallocated_callback_state);
3410 CTDB_NO_MEMORY(ctdb, state);
3412 DEBUG(DEBUG_INFO,(__location__ " Running \"ipreallocated\" event\n"));
3414 ret = ctdb_event_script_callback(ctdb, state,
3415 ctdb_ipreallocated_callback, state,
3416 CTDB_EVENT_IPREALLOCATED,
3420 DEBUG(DEBUG_ERR,("Failed to run \"ipreallocated\" event \n"));
3425 /* tell the control that we will be reply asynchronously */
3426 state->c = talloc_steal(state, c);
3427 *async_reply = true;
3433 /* This function is called from the recovery daemon to verify that a remote
3434 node has the expected ip allocation.
3435 This is verified against ctdb->ip_tree
3437 static int verify_remote_ip_allocation(struct ctdb_context *ctdb,
3438 struct ctdb_public_ip_list_old *ips,
3441 struct public_ip_list *tmp_ip;
3444 if (ctdb->ip_tree == NULL) {
3445 /* don't know the expected allocation yet, assume remote node
3454 for (i=0; i<ips->num; i++) {
3455 tmp_ip = trbt_lookuparray32(ctdb->ip_tree, IP_KEYLEN, ip_key(&ips->ips[i].addr));
3456 if (tmp_ip == NULL) {
3457 DEBUG(DEBUG_ERR,("Node %u has new or unknown public IP %s\n", pnn, ctdb_addr_to_str(&ips->ips[i].addr)));
3461 if (tmp_ip->pnn == -1 || ips->ips[i].pnn == -1) {
3465 if (tmp_ip->pnn != ips->ips[i].pnn) {
3467 ("Inconsistent IP allocation - node %u thinks %s is held by node %u while it is assigned to node %u\n",
3469 ctdb_addr_to_str(&ips->ips[i].addr),
3470 ips->ips[i].pnn, tmp_ip->pnn));
3478 int update_ip_assignment_tree(struct ctdb_context *ctdb, struct ctdb_public_ip *ip)
3480 struct public_ip_list *tmp_ip;
3482 /* IP tree is never built if DisableIPFailover is set */
3483 if (ctdb->tunable.disable_ip_failover != 0) {
3487 if (ctdb->ip_tree == NULL) {
3488 DEBUG(DEBUG_ERR,("No ctdb->ip_tree yet. Failed to update ip assignment\n"));
3492 tmp_ip = trbt_lookuparray32(ctdb->ip_tree, IP_KEYLEN, ip_key(&ip->addr));
3493 if (tmp_ip == NULL) {
3494 DEBUG(DEBUG_ERR,(__location__ " Could not find record for address %s, update ip\n", ctdb_addr_to_str(&ip->addr)));
3498 DEBUG(DEBUG_NOTICE,("Updated ip assignment tree for ip : %s from node %u to node %u\n", ctdb_addr_to_str(&ip->addr), tmp_ip->pnn, ip->pnn));
3499 tmp_ip->pnn = ip->pnn;
3504 void clear_ip_assignment_tree(struct ctdb_context *ctdb)
3506 TALLOC_FREE(ctdb->ip_tree);
3509 struct ctdb_reloadips_handle {
3510 struct ctdb_context *ctdb;
3511 struct ctdb_req_control_old *c;
3515 struct tevent_fd *fde;
3518 static int ctdb_reloadips_destructor(struct ctdb_reloadips_handle *h)
3520 if (h == h->ctdb->reload_ips) {
3521 h->ctdb->reload_ips = NULL;
3524 ctdb_request_control_reply(h->ctdb, h->c, NULL, h->status, NULL);
3527 ctdb_kill(h->ctdb, h->child, SIGKILL);
3531 static void ctdb_reloadips_timeout_event(struct tevent_context *ev,
3532 struct tevent_timer *te,
3533 struct timeval t, void *private_data)
3535 struct ctdb_reloadips_handle *h = talloc_get_type(private_data, struct ctdb_reloadips_handle);
3540 static void ctdb_reloadips_child_handler(struct tevent_context *ev,
3541 struct tevent_fd *fde,
3542 uint16_t flags, void *private_data)
3544 struct ctdb_reloadips_handle *h = talloc_get_type(private_data, struct ctdb_reloadips_handle);
3549 ret = sys_read(h->fd[0], &res, 1);
3550 if (ret < 1 || res != 0) {
3551 DEBUG(DEBUG_ERR, (__location__ " Reloadips child process returned error\n"));
3559 static int ctdb_reloadips_child(struct ctdb_context *ctdb)
3561 TALLOC_CTX *mem_ctx = talloc_new(NULL);
3562 struct ctdb_public_ip_list_old *ips;
3563 struct ctdb_vnn *vnn;
3564 struct client_async_data *async_data;
3565 struct timeval timeout;
3567 struct ctdb_client_control_state *state;
3571 CTDB_NO_MEMORY(ctdb, mem_ctx);
3573 /* Read IPs from local node */
3574 ret = ctdb_ctrl_get_public_ips(ctdb, TAKEOVER_TIMEOUT(),
3575 CTDB_CURRENT_NODE, mem_ctx, &ips);
3578 ("Unable to fetch public IPs from local node\n"));
3579 talloc_free(mem_ctx);
3583 /* Read IPs file - this is safe since this is a child process */
3585 if (ctdb_set_public_addresses(ctdb, false) != 0) {
3586 DEBUG(DEBUG_ERR,("Failed to re-read public addresses file\n"));
3587 talloc_free(mem_ctx);
3591 async_data = talloc_zero(mem_ctx, struct client_async_data);
3592 CTDB_NO_MEMORY(ctdb, async_data);
3594 /* Compare IPs between node and file for IPs to be deleted */
3595 for (i = 0; i < ips->num; i++) {
3597 for (vnn = ctdb->vnn; vnn; vnn = vnn->next) {
3598 if (ctdb_same_ip(&vnn->public_address,
3599 &ips->ips[i].addr)) {
3600 /* IP is still in file */
3606 /* Delete IP ips->ips[i] */
3607 struct ctdb_addr_info_old *pub;
3610 ("IP %s no longer configured, deleting it\n",
3611 ctdb_addr_to_str(&ips->ips[i].addr)));
3613 pub = talloc_zero(mem_ctx, struct ctdb_addr_info_old);
3614 CTDB_NO_MEMORY(ctdb, pub);
3616 pub->addr = ips->ips[i].addr;
3620 timeout = TAKEOVER_TIMEOUT();
3622 data.dsize = offsetof(struct ctdb_addr_info_old,
3624 data.dptr = (uint8_t *)pub;
3626 state = ctdb_control_send(ctdb, CTDB_CURRENT_NODE, 0,
3627 CTDB_CONTROL_DEL_PUBLIC_IP,
3628 0, data, async_data,
3630 if (state == NULL) {
3633 " failed sending CTDB_CONTROL_DEL_PUBLIC_IP\n"));
3637 ctdb_client_async_add(async_data, state);
3641 /* Compare IPs between node and file for IPs to be added */
3643 for (vnn = ctdb->vnn; vnn; vnn = vnn->next) {
3644 for (i = 0; i < ips->num; i++) {
3645 if (ctdb_same_ip(&vnn->public_address,
3646 &ips->ips[i].addr)) {
3647 /* IP already on node */
3651 if (i == ips->num) {
3652 /* Add IP ips->ips[i] */
3653 struct ctdb_addr_info_old *pub;
3654 const char *ifaces = NULL;
3659 ("New IP %s configured, adding it\n",
3660 ctdb_addr_to_str(&vnn->public_address)));
3662 uint32_t pnn = ctdb_get_pnn(ctdb);
3664 data.dsize = sizeof(pnn);
3665 data.dptr = (uint8_t *)&pnn;
3667 ret = ctdb_client_send_message(
3669 CTDB_BROADCAST_CONNECTED,
3670 CTDB_SRVID_REBALANCE_NODE,
3673 DEBUG(DEBUG_WARNING,
3674 ("Failed to send message to force node reallocation - IPs may be unbalanced\n"));
3680 ifaces = vnn->ifaces[0];
3682 while (vnn->ifaces[iface] != NULL) {
3683 ifaces = talloc_asprintf(vnn, "%s,%s", ifaces,
3684 vnn->ifaces[iface]);
3688 len = strlen(ifaces) + 1;
3689 pub = talloc_zero_size(mem_ctx,
3690 offsetof(struct ctdb_addr_info_old, iface) + len);
3691 CTDB_NO_MEMORY(ctdb, pub);
3693 pub->addr = vnn->public_address;
3694 pub->mask = vnn->public_netmask_bits;
3696 memcpy(&pub->iface[0], ifaces, pub->len);
3698 timeout = TAKEOVER_TIMEOUT();
3700 data.dsize = offsetof(struct ctdb_addr_info_old,
3702 data.dptr = (uint8_t *)pub;
3704 state = ctdb_control_send(ctdb, CTDB_CURRENT_NODE, 0,
3705 CTDB_CONTROL_ADD_PUBLIC_IP,
3706 0, data, async_data,
3708 if (state == NULL) {
3711 " failed sending CTDB_CONTROL_ADD_PUBLIC_IP\n"));
3715 ctdb_client_async_add(async_data, state);
3719 if (ctdb_client_async_wait(ctdb, async_data) != 0) {
3720 DEBUG(DEBUG_ERR,(__location__ " Add/delete IPs failed\n"));
3724 talloc_free(mem_ctx);
3728 talloc_free(mem_ctx);
3732 /* This control is sent to force the node to re-read the public addresses file
3733 and drop any addresses we should nnot longer host, and add new addresses
3734 that we are now able to host
3736 int32_t ctdb_control_reload_public_ips(struct ctdb_context *ctdb, struct ctdb_req_control_old *c, bool *async_reply)
3738 struct ctdb_reloadips_handle *h;
3739 pid_t parent = getpid();
3741 if (ctdb->reload_ips != NULL) {
3742 talloc_free(ctdb->reload_ips);
3743 ctdb->reload_ips = NULL;
3746 h = talloc(ctdb, struct ctdb_reloadips_handle);
3747 CTDB_NO_MEMORY(ctdb, h);
3752 if (pipe(h->fd) == -1) {
3753 DEBUG(DEBUG_ERR,("Failed to create pipe for ctdb_freeze_lock\n"));
3758 h->child = ctdb_fork(ctdb);
3759 if (h->child == (pid_t)-1) {
3760 DEBUG(DEBUG_ERR, ("Failed to fork a child for reloadips\n"));
3768 if (h->child == 0) {
3769 signed char res = 0;
3772 debug_extra = talloc_asprintf(NULL, "reloadips:");
3774 prctl_set_comment("ctdb_reloadips");
3775 if (switch_from_server_to_client(ctdb, "reloadips-child") != 0) {
3776 DEBUG(DEBUG_CRIT,("ERROR: Failed to switch reloadips child into client mode\n"));
3779 res = ctdb_reloadips_child(ctdb);
3781 DEBUG(DEBUG_ERR,("Failed to reload ips on local node\n"));
3785 sys_write(h->fd[1], &res, 1);
3786 /* make sure we die when our parent dies */
3787 while (ctdb_kill(ctdb, parent, 0) == 0 || errno != ESRCH) {
3793 h->c = talloc_steal(h, c);
3796 set_close_on_exec(h->fd[0]);
3798 talloc_set_destructor(h, ctdb_reloadips_destructor);
3801 h->fde = tevent_add_fd(ctdb->ev, h, h->fd[0], TEVENT_FD_READ,
3802 ctdb_reloadips_child_handler, (void *)h);
3803 tevent_fd_set_auto_close(h->fde);
3805 tevent_add_timer(ctdb->ev, h, timeval_current_ofs(120, 0),
3806 ctdb_reloadips_timeout_event, h);
3808 /* we reply later */
3809 *async_reply = true;