4 Copyright (C) Ronnie Sahlberg 2007
5 Copyright (C) Andrew Tridgell 2007
6 Copyright (C) Martin Schwenke 2011
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, see <http://www.gnu.org/licenses/>.
22 #include "system/network.h"
23 #include "system/filesys.h"
24 #include "system/time.h"
25 #include "system/wait.h"
30 #include "lib/util/dlinklist.h"
31 #include "lib/util/debug.h"
32 #include "lib/util/samba_util.h"
33 #include "lib/util/sys_rw.h"
34 #include "lib/util/util_process.h"
36 #include "ctdb_private.h"
37 #include "ctdb_client.h"
39 #include "common/rb_tree.h"
40 #include "common/reqid.h"
41 #include "common/system.h"
42 #include "common/common.h"
43 #include "common/logging.h"
45 #include "server/ipalloc.h"
47 #define TAKEOVER_TIMEOUT() timeval_current_ofs(ctdb->tunable.takeover_timeout,0)
49 #define CTDB_ARP_INTERVAL 1
50 #define CTDB_ARP_REPEAT 3
52 struct ctdb_interface {
53 struct ctdb_interface *prev, *next;
59 struct vnn_interface {
60 struct vnn_interface *prev, *next;
61 struct ctdb_interface *iface;
64 /* state associated with a public ip address */
66 struct ctdb_vnn *prev, *next;
68 struct ctdb_interface *iface;
69 struct vnn_interface *ifaces;
70 ctdb_sock_addr public_address;
71 uint8_t public_netmask_bits;
73 /* the node number that is serving this public address, if any.
74 If no node serves this ip it is set to -1 */
77 /* List of clients to tickle for this public address */
78 struct ctdb_tcp_array *tcp_array;
80 /* whether we need to update the other nodes with changes to our list
81 of connected clients */
82 bool tcp_update_needed;
84 /* a context to hang sending gratious arp events off */
85 TALLOC_CTX *takeover_ctx;
87 /* Set to true any time an update to this VNN is in flight.
88 This helps to avoid races. */
89 bool update_in_flight;
91 /* If CTDB_CONTROL_DEL_PUBLIC_IP is received for this IP
92 * address then this flag is set. It will be deleted in the
93 * release IP callback. */
97 static const char *iface_string(const struct ctdb_interface *iface)
99 return (iface != NULL ? iface->name : "__none__");
102 static const char *ctdb_vnn_iface_string(const struct ctdb_vnn *vnn)
104 return iface_string(vnn->iface);
107 static struct ctdb_interface *ctdb_find_iface(struct ctdb_context *ctdb,
110 static struct ctdb_interface *
111 ctdb_add_local_iface(struct ctdb_context *ctdb, const char *iface)
113 struct ctdb_interface *i;
115 if (strlen(iface) > CTDB_IFACE_SIZE) {
116 DEBUG(DEBUG_ERR, ("Interface name too long \"%s\"\n", iface));
120 /* Verify that we don't have an entry for this ip yet */
121 i = ctdb_find_iface(ctdb, iface);
126 /* create a new structure for this interface */
127 i = talloc_zero(ctdb, struct ctdb_interface);
129 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
132 i->name = talloc_strdup(i, iface);
133 if (i->name == NULL) {
134 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
141 DLIST_ADD(ctdb->ifaces, i);
146 static bool vnn_has_interface(struct ctdb_vnn *vnn,
147 const struct ctdb_interface *iface)
149 struct vnn_interface *i;
151 for (i = vnn->ifaces; i != NULL; i = i->next) {
152 if (iface == i->iface) {
160 /* If any interfaces now have no possible IPs then delete them. This
161 * implementation is naive (i.e. simple) rather than clever
162 * (i.e. complex). Given that this is run on delip and that operation
163 * is rare, this doesn't need to be efficient - it needs to be
164 * foolproof. One alternative is reference counting, where the logic
165 * is distributed and can, therefore, be broken in multiple places.
166 * Another alternative is to build a red-black tree of interfaces that
167 * can have addresses (by walking ctdb->vnn once) and then walking
168 * ctdb->ifaces once and deleting those not in the tree. Let's go to
169 * one of those if the naive implementation causes problems... :-)
171 static void ctdb_remove_orphaned_ifaces(struct ctdb_context *ctdb,
172 struct ctdb_vnn *vnn)
174 struct ctdb_interface *i, *next;
176 /* For each interface, check if there's an IP using it. */
177 for (i = ctdb->ifaces; i != NULL; i = next) {
182 /* Only consider interfaces named in the given VNN. */
183 if (!vnn_has_interface(vnn, i)) {
187 /* Search for a vnn with this interface. */
189 for (tv=ctdb->vnn; tv; tv=tv->next) {
190 if (vnn_has_interface(tv, i)) {
197 /* None of the VNNs are using this interface. */
198 DLIST_REMOVE(ctdb->ifaces, i);
205 static struct ctdb_interface *ctdb_find_iface(struct ctdb_context *ctdb,
208 struct ctdb_interface *i;
210 for (i=ctdb->ifaces;i;i=i->next) {
211 if (strcmp(i->name, iface) == 0) {
219 static struct ctdb_interface *ctdb_vnn_best_iface(struct ctdb_context *ctdb,
220 struct ctdb_vnn *vnn)
222 struct vnn_interface *i;
223 struct ctdb_interface *cur = NULL;
224 struct ctdb_interface *best = NULL;
226 for (i = vnn->ifaces; i != NULL; i = i->next) {
239 if (cur->references < best->references) {
248 static int32_t ctdb_vnn_assign_iface(struct ctdb_context *ctdb,
249 struct ctdb_vnn *vnn)
251 struct ctdb_interface *best = NULL;
254 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
255 "still assigned to iface '%s'\n",
256 ctdb_addr_to_str(&vnn->public_address),
257 ctdb_vnn_iface_string(vnn)));
261 best = ctdb_vnn_best_iface(ctdb, vnn);
263 DEBUG(DEBUG_ERR, (__location__ " public address '%s' "
264 "cannot assign to iface any iface\n",
265 ctdb_addr_to_str(&vnn->public_address)));
271 vnn->pnn = ctdb->pnn;
273 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
274 "now assigned to iface '%s' refs[%d]\n",
275 ctdb_addr_to_str(&vnn->public_address),
276 ctdb_vnn_iface_string(vnn),
281 static void ctdb_vnn_unassign_iface(struct ctdb_context *ctdb,
282 struct ctdb_vnn *vnn)
284 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
285 "now unassigned (old iface '%s' refs[%d])\n",
286 ctdb_addr_to_str(&vnn->public_address),
287 ctdb_vnn_iface_string(vnn),
288 vnn->iface?vnn->iface->references:0));
290 vnn->iface->references--;
293 if (vnn->pnn == ctdb->pnn) {
298 static bool ctdb_vnn_available(struct ctdb_context *ctdb,
299 struct ctdb_vnn *vnn)
301 struct vnn_interface *i;
303 /* Nodes that are not RUNNING can not host IPs */
304 if (ctdb->runstate != CTDB_RUNSTATE_RUNNING) {
308 if (vnn->delete_pending) {
312 if (vnn->iface && vnn->iface->link_up) {
316 for (i = vnn->ifaces; i != NULL; i = i->next) {
317 if (i->iface->link_up) {
325 struct ctdb_takeover_arp {
326 struct ctdb_context *ctdb;
329 struct ctdb_tcp_array *tcparray;
330 struct ctdb_vnn *vnn;
335 lists of tcp endpoints
337 struct ctdb_tcp_list {
338 struct ctdb_tcp_list *prev, *next;
339 struct ctdb_connection connection;
343 list of clients to kill on IP release
345 struct ctdb_client_ip {
346 struct ctdb_client_ip *prev, *next;
347 struct ctdb_context *ctdb;
354 send a gratuitous arp
356 static void ctdb_control_send_arp(struct tevent_context *ev,
357 struct tevent_timer *te,
358 struct timeval t, void *private_data)
360 struct ctdb_takeover_arp *arp = talloc_get_type(private_data,
361 struct ctdb_takeover_arp);
363 struct ctdb_tcp_array *tcparray;
364 const char *iface = ctdb_vnn_iface_string(arp->vnn);
366 ret = ctdb_sys_send_arp(&arp->addr, iface);
368 DEBUG(DEBUG_CRIT,(__location__ " sending of arp failed on iface '%s' (%s)\n",
369 iface, strerror(errno)));
372 tcparray = arp->tcparray;
374 for (i=0;i<tcparray->num;i++) {
375 struct ctdb_connection *tcon;
377 tcon = &tcparray->connections[i];
378 DEBUG(DEBUG_INFO,("sending tcp tickle ack for %u->%s:%u\n",
379 (unsigned)ntohs(tcon->dst.ip.sin_port),
380 ctdb_addr_to_str(&tcon->src),
381 (unsigned)ntohs(tcon->src.ip.sin_port)));
382 ret = ctdb_sys_send_tcp(
387 DEBUG(DEBUG_CRIT,(__location__ " Failed to send tcp tickle ack for %s\n",
388 ctdb_addr_to_str(&tcon->src)));
395 if (arp->count == CTDB_ARP_REPEAT) {
400 tevent_add_timer(arp->ctdb->ev, arp->vnn->takeover_ctx,
401 timeval_current_ofs(CTDB_ARP_INTERVAL, 100000),
402 ctdb_control_send_arp, arp);
405 static int32_t ctdb_announce_vnn_iface(struct ctdb_context *ctdb,
406 struct ctdb_vnn *vnn)
408 struct ctdb_takeover_arp *arp;
409 struct ctdb_tcp_array *tcparray;
411 if (!vnn->takeover_ctx) {
412 vnn->takeover_ctx = talloc_new(vnn);
413 if (!vnn->takeover_ctx) {
418 arp = talloc_zero(vnn->takeover_ctx, struct ctdb_takeover_arp);
424 arp->addr = vnn->public_address;
427 tcparray = vnn->tcp_array;
429 /* add all of the known tcp connections for this IP to the
430 list of tcp connections to send tickle acks for */
431 arp->tcparray = talloc_steal(arp, tcparray);
433 vnn->tcp_array = NULL;
434 vnn->tcp_update_needed = true;
437 tevent_add_timer(arp->ctdb->ev, vnn->takeover_ctx,
438 timeval_zero(), ctdb_control_send_arp, arp);
443 struct ctdb_do_takeip_state {
444 struct ctdb_req_control_old *c;
445 struct ctdb_vnn *vnn;
449 called when takeip event finishes
451 static void ctdb_do_takeip_callback(struct ctdb_context *ctdb, int status,
454 struct ctdb_do_takeip_state *state =
455 talloc_get_type(private_data, struct ctdb_do_takeip_state);
460 if (status == -ETIME) {
463 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
464 ctdb_addr_to_str(&state->vnn->public_address),
465 ctdb_vnn_iface_string(state->vnn)));
466 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
472 if (ctdb->do_checkpublicip) {
474 ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
476 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
483 data.dptr = (uint8_t *)ctdb_addr_to_str(&state->vnn->public_address);
484 data.dsize = strlen((char *)data.dptr) + 1;
485 DEBUG(DEBUG_INFO,(__location__ " sending TAKE_IP for '%s'\n", data.dptr));
487 ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_TAKE_IP, data);
490 /* the control succeeded */
491 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
496 static int ctdb_takeip_destructor(struct ctdb_do_takeip_state *state)
498 state->vnn->update_in_flight = false;
503 take over an ip address
505 static int32_t ctdb_do_takeip(struct ctdb_context *ctdb,
506 struct ctdb_req_control_old *c,
507 struct ctdb_vnn *vnn)
510 struct ctdb_do_takeip_state *state;
512 if (vnn->update_in_flight) {
513 DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u rejected "
514 "update for this IP already in flight\n",
515 ctdb_addr_to_str(&vnn->public_address),
516 vnn->public_netmask_bits));
520 ret = ctdb_vnn_assign_iface(ctdb, vnn);
522 DEBUG(DEBUG_ERR,("Takeover of IP %s/%u failed to "
523 "assign a usable interface\n",
524 ctdb_addr_to_str(&vnn->public_address),
525 vnn->public_netmask_bits));
529 state = talloc(vnn, struct ctdb_do_takeip_state);
530 CTDB_NO_MEMORY(ctdb, state);
535 vnn->update_in_flight = true;
536 talloc_set_destructor(state, ctdb_takeip_destructor);
538 DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u on interface %s\n",
539 ctdb_addr_to_str(&vnn->public_address),
540 vnn->public_netmask_bits,
541 ctdb_vnn_iface_string(vnn)));
543 ret = ctdb_event_script_callback(ctdb,
545 ctdb_do_takeip_callback,
549 ctdb_vnn_iface_string(vnn),
550 ctdb_addr_to_str(&vnn->public_address),
551 vnn->public_netmask_bits);
554 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
555 ctdb_addr_to_str(&vnn->public_address),
556 ctdb_vnn_iface_string(vnn)));
561 state->c = talloc_steal(ctdb, c);
565 struct ctdb_do_updateip_state {
566 struct ctdb_req_control_old *c;
567 struct ctdb_interface *old;
568 struct ctdb_vnn *vnn;
572 called when updateip event finishes
574 static void ctdb_do_updateip_callback(struct ctdb_context *ctdb, int status,
577 struct ctdb_do_updateip_state *state =
578 talloc_get_type(private_data, struct ctdb_do_updateip_state);
581 if (status == -ETIME) {
585 ("Failed update of IP %s from interface %s to %s\n",
586 ctdb_addr_to_str(&state->vnn->public_address),
587 iface_string(state->old),
588 ctdb_vnn_iface_string(state->vnn)));
591 * All we can do is reset the old interface
592 * and let the next run fix it
594 ctdb_vnn_unassign_iface(ctdb, state->vnn);
595 state->vnn->iface = state->old;
596 state->vnn->iface->references++;
598 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
603 /* the control succeeded */
604 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
609 static int ctdb_updateip_destructor(struct ctdb_do_updateip_state *state)
611 state->vnn->update_in_flight = false;
616 update (move) an ip address
618 static int32_t ctdb_do_updateip(struct ctdb_context *ctdb,
619 struct ctdb_req_control_old *c,
620 struct ctdb_vnn *vnn)
623 struct ctdb_do_updateip_state *state;
624 struct ctdb_interface *old = vnn->iface;
625 const char *old_name = iface_string(old);
626 const char *new_name;
628 if (vnn->update_in_flight) {
629 DEBUG(DEBUG_NOTICE,("Update of IP %s/%u rejected "
630 "update for this IP already in flight\n",
631 ctdb_addr_to_str(&vnn->public_address),
632 vnn->public_netmask_bits));
636 ctdb_vnn_unassign_iface(ctdb, vnn);
637 ret = ctdb_vnn_assign_iface(ctdb, vnn);
639 DEBUG(DEBUG_ERR,("Update of IP %s/%u failed to "
640 "assign a usable interface (old iface '%s')\n",
641 ctdb_addr_to_str(&vnn->public_address),
642 vnn->public_netmask_bits,
647 if (old == vnn->iface) {
648 /* A benign update from one interface onto itself.
649 * no need to run the eventscripts in this case, just return
652 ctdb_request_control_reply(ctdb, c, NULL, 0, NULL);
656 state = talloc(vnn, struct ctdb_do_updateip_state);
657 CTDB_NO_MEMORY(ctdb, state);
663 vnn->update_in_flight = true;
664 talloc_set_destructor(state, ctdb_updateip_destructor);
666 new_name = ctdb_vnn_iface_string(vnn);
667 DEBUG(DEBUG_NOTICE,("Update of IP %s/%u from "
668 "interface %s to %s\n",
669 ctdb_addr_to_str(&vnn->public_address),
670 vnn->public_netmask_bits,
674 ret = ctdb_event_script_callback(ctdb,
676 ctdb_do_updateip_callback,
678 CTDB_EVENT_UPDATE_IP,
682 ctdb_addr_to_str(&vnn->public_address),
683 vnn->public_netmask_bits);
686 ("Failed update IP %s from interface %s to %s\n",
687 ctdb_addr_to_str(&vnn->public_address),
688 old_name, new_name));
693 state->c = talloc_steal(ctdb, c);
698 Find the vnn of the node that has a public ip address
699 returns -1 if the address is not known as a public address
701 static struct ctdb_vnn *find_public_ip_vnn(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
703 struct ctdb_vnn *vnn;
705 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
706 if (ctdb_same_ip(&vnn->public_address, addr)) {
715 take over an ip address
717 int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb,
718 struct ctdb_req_control_old *c,
723 struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
724 struct ctdb_vnn *vnn;
725 bool have_ip = false;
726 bool do_updateip = false;
727 bool do_takeip = false;
728 struct ctdb_interface *best_iface = NULL;
730 if (pip->pnn != ctdb->pnn) {
731 DEBUG(DEBUG_ERR,(__location__" takeoverip called for an ip '%s' "
732 "with pnn %d, but we're node %d\n",
733 ctdb_addr_to_str(&pip->addr),
734 pip->pnn, ctdb->pnn));
738 /* update out vnn list */
739 vnn = find_public_ip_vnn(ctdb, &pip->addr);
741 DEBUG(DEBUG_INFO,("takeoverip called for an ip '%s' that is not a public address\n",
742 ctdb_addr_to_str(&pip->addr)));
746 if (ctdb->tunable.disable_ip_failover == 0 && ctdb->do_checkpublicip) {
747 have_ip = ctdb_sys_have_ip(&pip->addr);
749 best_iface = ctdb_vnn_best_iface(ctdb, vnn);
750 if (best_iface == NULL) {
751 DEBUG(DEBUG_ERR,("takeoverip of IP %s/%u failed to find"
752 "a usable interface (old %s, have_ip %d)\n",
753 ctdb_addr_to_str(&vnn->public_address),
754 vnn->public_netmask_bits,
755 ctdb_vnn_iface_string(vnn),
760 if (vnn->pnn != ctdb->pnn && have_ip && vnn->pnn != -1) {
761 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
762 "and we have it on iface[%s], but it was assigned to node %d"
763 "and we are node %d, banning ourself\n",
764 ctdb_addr_to_str(&vnn->public_address),
765 ctdb_vnn_iface_string(vnn), vnn->pnn, ctdb->pnn));
770 if (vnn->pnn == -1 && have_ip) {
771 /* This will cause connections to be reset and
772 * reestablished. However, this is a very unusual
773 * situation and doing this will completely repair the
774 * inconsistency in the VNN.
778 " Doing updateip for IP %s already on an interface\n",
779 ctdb_addr_to_str(&vnn->public_address)));
784 if (vnn->iface != best_iface) {
785 if (!vnn->iface->link_up) {
787 } else if (vnn->iface->references > (best_iface->references + 1)) {
788 /* only move when the rebalance gains something */
796 ctdb_vnn_unassign_iface(ctdb, vnn);
803 ret = ctdb_do_takeip(ctdb, c, vnn);
807 } else if (do_updateip) {
808 ret = ctdb_do_updateip(ctdb, c, vnn);
814 * The interface is up and the kernel known the ip
817 DEBUG(DEBUG_INFO,("Redundant takeover of IP %s/%u on interface %s (ip already held)\n",
818 ctdb_addr_to_str(&pip->addr),
819 vnn->public_netmask_bits,
820 ctdb_vnn_iface_string(vnn)));
824 /* tell ctdb_control.c that we will be replying asynchronously */
830 static void do_delete_ip(struct ctdb_context *ctdb, struct ctdb_vnn *vnn)
832 DLIST_REMOVE(ctdb->vnn, vnn);
833 ctdb_vnn_unassign_iface(ctdb, vnn);
834 ctdb_remove_orphaned_ifaces(ctdb, vnn);
838 static struct ctdb_vnn *release_ip_post(struct ctdb_context *ctdb,
839 struct ctdb_vnn *vnn,
840 ctdb_sock_addr *addr)
844 /* Send a message to all clients of this node telling them
845 * that the cluster has been reconfigured and they should
846 * close any connections on this IP address
848 data.dptr = (uint8_t *)ctdb_addr_to_str(addr);
849 data.dsize = strlen((char *)data.dptr)+1;
850 DEBUG(DEBUG_INFO, ("Sending RELEASE_IP message for %s\n", data.dptr));
851 ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_RELEASE_IP, data);
853 ctdb_vnn_unassign_iface(ctdb, vnn);
855 /* Process the IP if it has been marked for deletion */
856 if (vnn->delete_pending) {
857 do_delete_ip(ctdb, vnn);
864 struct release_ip_callback_state {
865 struct ctdb_req_control_old *c;
866 ctdb_sock_addr *addr;
867 struct ctdb_vnn *vnn;
872 called when releaseip event finishes
874 static void release_ip_callback(struct ctdb_context *ctdb, int status,
877 struct release_ip_callback_state *state =
878 talloc_get_type(private_data, struct release_ip_callback_state);
880 if (status == -ETIME) {
884 if (ctdb->tunable.disable_ip_failover == 0 && ctdb->do_checkpublicip) {
885 if (ctdb_sys_have_ip(state->addr)) {
887 ("IP %s still hosted during release IP callback, failing\n",
888 ctdb_addr_to_str(state->addr)));
889 ctdb_request_control_reply(ctdb, state->c,
896 state->vnn->pnn = state->target_pnn;
897 state->vnn = release_ip_post(ctdb, state->vnn, state->addr);
899 /* the control succeeded */
900 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
904 static int ctdb_releaseip_destructor(struct release_ip_callback_state *state)
906 if (state->vnn != NULL) {
907 state->vnn->update_in_flight = false;
913 release an ip address
915 int32_t ctdb_control_release_ip(struct ctdb_context *ctdb,
916 struct ctdb_req_control_old *c,
921 struct release_ip_callback_state *state;
922 struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
923 struct ctdb_vnn *vnn;
926 /* update our vnn list */
927 vnn = find_public_ip_vnn(ctdb, &pip->addr);
929 DEBUG(DEBUG_INFO,("releaseip called for an ip '%s' that is not a public address\n",
930 ctdb_addr_to_str(&pip->addr)));
934 /* stop any previous arps */
935 talloc_free(vnn->takeover_ctx);
936 vnn->takeover_ctx = NULL;
938 /* RELEASE_IP controls are sent to all nodes that should not
939 * be hosting a particular IP. This serves 2 purposes. The
940 * first is to help resolve any inconsistencies. If a node
941 * does unexpectly host an IP then it will be released. The
942 * 2nd is to use a "redundant release" to tell non-takeover
943 * nodes where an IP is moving to. This is how "ctdb ip" can
944 * report the (likely) location of an IP by only asking the
945 * local node. Redundant releases need to update the PNN but
946 * are otherwise ignored.
948 if (ctdb->tunable.disable_ip_failover == 0 && ctdb->do_checkpublicip) {
949 if (!ctdb_sys_have_ip(&pip->addr)) {
950 DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u on interface %s (ip not held)\n",
951 ctdb_addr_to_str(&pip->addr),
952 vnn->public_netmask_bits,
953 ctdb_vnn_iface_string(vnn)));
955 ctdb_vnn_unassign_iface(ctdb, vnn);
959 if (vnn->iface == NULL) {
960 DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u (ip not held)\n",
961 ctdb_addr_to_str(&pip->addr),
962 vnn->public_netmask_bits));
968 /* There is a potential race between take_ip and us because we
969 * update the VNN via a callback that run when the
970 * eventscripts have been run. Avoid the race by allowing one
971 * update to be in flight at a time.
973 if (vnn->update_in_flight) {
974 DEBUG(DEBUG_NOTICE,("Release of IP %s/%u rejected "
975 "update for this IP already in flight\n",
976 ctdb_addr_to_str(&vnn->public_address),
977 vnn->public_netmask_bits));
981 iface = ctdb_vnn_iface_string(vnn);
983 DEBUG(DEBUG_NOTICE,("Release of IP %s/%u on interface %s node:%d\n",
984 ctdb_addr_to_str(&pip->addr),
985 vnn->public_netmask_bits,
989 state = talloc(ctdb, struct release_ip_callback_state);
991 ctdb_set_error(ctdb, "Out of memory at %s:%d",
997 state->addr = talloc(state, ctdb_sock_addr);
998 if (state->addr == NULL) {
999 ctdb_set_error(ctdb, "Out of memory at %s:%d",
1000 __FILE__, __LINE__);
1004 *state->addr = pip->addr;
1005 state->target_pnn = pip->pnn;
1008 vnn->update_in_flight = true;
1009 talloc_set_destructor(state, ctdb_releaseip_destructor);
1011 ret = ctdb_event_script_callback(ctdb,
1012 state, release_ip_callback, state,
1013 CTDB_EVENT_RELEASE_IP,
1016 ctdb_addr_to_str(&pip->addr),
1017 vnn->public_netmask_bits);
1019 DEBUG(DEBUG_ERR,(__location__ " Failed to release IP %s on interface %s\n",
1020 ctdb_addr_to_str(&pip->addr),
1021 ctdb_vnn_iface_string(vnn)));
1026 /* tell the control that we will be reply asynchronously */
1027 *async_reply = true;
1028 state->c = talloc_steal(state, c);
1032 static int ctdb_add_public_address(struct ctdb_context *ctdb,
1033 ctdb_sock_addr *addr,
1034 unsigned mask, const char *ifaces,
1037 struct ctdb_vnn *vnn;
1041 /* Verify that we don't have an entry for this IP yet */
1042 for (vnn = ctdb->vnn; vnn != NULL; vnn = vnn->next) {
1043 if (ctdb_same_sockaddr(addr, &vnn->public_address)) {
1045 ("Duplicate public IP address '%s'\n",
1046 ctdb_addr_to_str(addr)));
1051 /* Create a new VNN structure for this IP address */
1052 vnn = talloc_zero(ctdb, struct ctdb_vnn);
1054 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
1057 tmp = talloc_strdup(vnn, ifaces);
1059 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
1063 for (iface = strtok(tmp, ","); iface; iface = strtok(NULL, ",")) {
1064 struct vnn_interface *vnn_iface;
1065 struct ctdb_interface *i;
1066 if (!ctdb_sys_check_iface_exists(iface)) {
1068 ("Unknown interface %s for public address %s\n",
1069 iface, ctdb_addr_to_str(addr)));
1074 i = ctdb_add_local_iface(ctdb, iface);
1077 ("Failed to add interface '%s' "
1078 "for public address %s\n",
1079 iface, ctdb_addr_to_str(addr)));
1084 vnn_iface = talloc_zero(vnn, struct vnn_interface);
1085 if (vnn_iface == NULL) {
1086 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
1091 vnn_iface->iface = i;
1092 DLIST_ADD_END(vnn->ifaces, vnn_iface);
1095 vnn->public_address = *addr;
1096 vnn->public_netmask_bits = mask;
1099 DLIST_ADD(ctdb->vnn, vnn);
1105 setup the public address lists from a file
1107 int ctdb_set_public_addresses(struct ctdb_context *ctdb, bool check_addresses)
1113 lines = file_lines_load(ctdb->public_addresses_file, &nlines, 0, ctdb);
1114 if (lines == NULL) {
1115 ctdb_set_error(ctdb, "Failed to load public address list '%s'\n", ctdb->public_addresses_file);
1118 while (nlines > 0 && strcmp(lines[nlines-1], "") == 0) {
1122 for (i=0;i<nlines;i++) {
1124 ctdb_sock_addr addr;
1125 const char *addrstr;
1130 while ((*line == ' ') || (*line == '\t')) {
1136 if (strcmp(line, "") == 0) {
1139 tok = strtok(line, " \t");
1141 tok = strtok(NULL, " \t");
1143 if (NULL == ctdb->default_public_interface) {
1144 DEBUG(DEBUG_CRIT,("No default public interface and no interface specified at line %u of public address list\n",
1149 ifaces = ctdb->default_public_interface;
1154 if (!addrstr || !parse_ip_mask(addrstr, ifaces, &addr, &mask)) {
1155 DEBUG(DEBUG_CRIT,("Badly formed line %u in public address list\n", i+1));
1159 if (ctdb_add_public_address(ctdb, &addr, mask, ifaces, check_addresses)) {
1160 DEBUG(DEBUG_CRIT,("Failed to add line %u to the public address list\n", i+1));
1172 destroy a ctdb_client_ip structure
1174 static int ctdb_client_ip_destructor(struct ctdb_client_ip *ip)
1176 DEBUG(DEBUG_DEBUG,("destroying client tcp for %s:%u (client_id %u)\n",
1177 ctdb_addr_to_str(&ip->addr),
1178 ntohs(ip->addr.ip.sin_port),
1181 DLIST_REMOVE(ip->ctdb->client_ip_list, ip);
1186 called by a client to inform us of a TCP connection that it is managing
1187 that should tickled with an ACK when IP takeover is done
1189 int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id,
1192 struct ctdb_client *client = reqid_find(ctdb->idr, client_id, struct ctdb_client);
1193 struct ctdb_connection *tcp_sock = NULL;
1194 struct ctdb_tcp_list *tcp;
1195 struct ctdb_connection t;
1198 struct ctdb_client_ip *ip;
1199 struct ctdb_vnn *vnn;
1200 ctdb_sock_addr addr;
1202 /* If we don't have public IPs, tickles are useless */
1203 if (ctdb->vnn == NULL) {
1207 tcp_sock = (struct ctdb_connection *)indata.dptr;
1209 addr = tcp_sock->src;
1210 ctdb_canonicalize_ip(&addr, &tcp_sock->src);
1211 addr = tcp_sock->dst;
1212 ctdb_canonicalize_ip(&addr, &tcp_sock->dst);
1215 memcpy(&addr, &tcp_sock->dst, sizeof(addr));
1216 vnn = find_public_ip_vnn(ctdb, &addr);
1218 switch (addr.sa.sa_family) {
1220 if (ntohl(addr.ip.sin_addr.s_addr) != INADDR_LOOPBACK) {
1221 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public address.\n",
1222 ctdb_addr_to_str(&addr)));
1226 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public ipv6 address.\n",
1227 ctdb_addr_to_str(&addr)));
1230 DEBUG(DEBUG_ERR,(__location__ " Unknown family type %d\n", addr.sa.sa_family));
1236 if (vnn->pnn != ctdb->pnn) {
1237 DEBUG(DEBUG_ERR,("Attempt to register tcp client for IP %s we don't hold - failing (client_id %u pid %u)\n",
1238 ctdb_addr_to_str(&addr),
1239 client_id, client->pid));
1240 /* failing this call will tell smbd to die */
1244 ip = talloc(client, struct ctdb_client_ip);
1245 CTDB_NO_MEMORY(ctdb, ip);
1249 ip->client_id = client_id;
1250 talloc_set_destructor(ip, ctdb_client_ip_destructor);
1251 DLIST_ADD(ctdb->client_ip_list, ip);
1253 tcp = talloc(client, struct ctdb_tcp_list);
1254 CTDB_NO_MEMORY(ctdb, tcp);
1256 tcp->connection.src = tcp_sock->src;
1257 tcp->connection.dst = tcp_sock->dst;
1259 DLIST_ADD(client->tcp_list, tcp);
1261 t.src = tcp_sock->src;
1262 t.dst = tcp_sock->dst;
1264 data.dptr = (uint8_t *)&t;
1265 data.dsize = sizeof(t);
1267 switch (addr.sa.sa_family) {
1269 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1270 (unsigned)ntohs(tcp_sock->dst.ip.sin_port),
1271 ctdb_addr_to_str(&tcp_sock->src),
1272 (unsigned)ntohs(tcp_sock->src.ip.sin_port), client_id, client->pid));
1275 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1276 (unsigned)ntohs(tcp_sock->dst.ip6.sin6_port),
1277 ctdb_addr_to_str(&tcp_sock->src),
1278 (unsigned)ntohs(tcp_sock->src.ip6.sin6_port), client_id, client->pid));
1281 DEBUG(DEBUG_ERR,(__location__ " Unknown family %d\n", addr.sa.sa_family));
1285 /* tell all nodes about this tcp connection */
1286 ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0,
1287 CTDB_CONTROL_TCP_ADD,
1288 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
1290 DEBUG(DEBUG_ERR,(__location__ " Failed to send CTDB_CONTROL_TCP_ADD\n"));
1298 find a tcp address on a list
1300 static struct ctdb_connection *ctdb_tcp_find(struct ctdb_tcp_array *array,
1301 struct ctdb_connection *tcp)
1305 if (array == NULL) {
1309 for (i=0;i<array->num;i++) {
1310 if (ctdb_same_sockaddr(&array->connections[i].src, &tcp->src) &&
1311 ctdb_same_sockaddr(&array->connections[i].dst, &tcp->dst)) {
1312 return &array->connections[i];
1321 called by a daemon to inform us of a TCP connection that one of its
1322 clients managing that should tickled with an ACK when IP takeover is
1325 int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata, bool tcp_update_needed)
1327 struct ctdb_connection *p = (struct ctdb_connection *)indata.dptr;
1328 struct ctdb_tcp_array *tcparray;
1329 struct ctdb_connection tcp;
1330 struct ctdb_vnn *vnn;
1332 /* If we don't have public IPs, tickles are useless */
1333 if (ctdb->vnn == NULL) {
1337 vnn = find_public_ip_vnn(ctdb, &p->dst);
1339 DEBUG(DEBUG_INFO,(__location__ " got TCP_ADD control for an address which is not a public address '%s'\n",
1340 ctdb_addr_to_str(&p->dst)));
1346 tcparray = vnn->tcp_array;
1348 /* If this is the first tickle */
1349 if (tcparray == NULL) {
1350 tcparray = talloc(vnn, struct ctdb_tcp_array);
1351 CTDB_NO_MEMORY(ctdb, tcparray);
1352 vnn->tcp_array = tcparray;
1355 tcparray->connections = talloc_size(tcparray, sizeof(struct ctdb_connection));
1356 CTDB_NO_MEMORY(ctdb, tcparray->connections);
1358 tcparray->connections[tcparray->num].src = p->src;
1359 tcparray->connections[tcparray->num].dst = p->dst;
1362 if (tcp_update_needed) {
1363 vnn->tcp_update_needed = true;
1369 /* Do we already have this tickle ?*/
1372 if (ctdb_tcp_find(tcparray, &tcp) != NULL) {
1373 DEBUG(DEBUG_DEBUG,("Already had tickle info for %s:%u for vnn:%u\n",
1374 ctdb_addr_to_str(&tcp.dst),
1375 ntohs(tcp.dst.ip.sin_port),
1380 /* A new tickle, we must add it to the array */
1381 tcparray->connections = talloc_realloc(tcparray, tcparray->connections,
1382 struct ctdb_connection,
1384 CTDB_NO_MEMORY(ctdb, tcparray->connections);
1386 tcparray->connections[tcparray->num].src = p->src;
1387 tcparray->connections[tcparray->num].dst = p->dst;
1390 DEBUG(DEBUG_INFO,("Added tickle info for %s:%u from vnn %u\n",
1391 ctdb_addr_to_str(&tcp.dst),
1392 ntohs(tcp.dst.ip.sin_port),
1395 if (tcp_update_needed) {
1396 vnn->tcp_update_needed = true;
1403 static void ctdb_remove_connection(struct ctdb_vnn *vnn, struct ctdb_connection *conn)
1405 struct ctdb_connection *tcpp;
1411 /* if the array is empty we cant remove it
1412 and we don't need to do anything
1414 if (vnn->tcp_array == NULL) {
1415 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesn't exist (array is empty) %s:%u\n",
1416 ctdb_addr_to_str(&conn->dst),
1417 ntohs(conn->dst.ip.sin_port)));
1422 /* See if we know this connection
1423 if we don't know this connection then we dont need to do anything
1425 tcpp = ctdb_tcp_find(vnn->tcp_array, conn);
1427 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesn't exist %s:%u\n",
1428 ctdb_addr_to_str(&conn->dst),
1429 ntohs(conn->dst.ip.sin_port)));
1434 /* We need to remove this entry from the array.
1435 Instead of allocating a new array and copying data to it
1436 we cheat and just copy the last entry in the existing array
1437 to the entry that is to be removed and just shring the
1440 *tcpp = vnn->tcp_array->connections[vnn->tcp_array->num - 1];
1441 vnn->tcp_array->num--;
1443 /* If we deleted the last entry we also need to remove the entire array
1445 if (vnn->tcp_array->num == 0) {
1446 talloc_free(vnn->tcp_array);
1447 vnn->tcp_array = NULL;
1450 vnn->tcp_update_needed = true;
1452 DEBUG(DEBUG_INFO,("Removed tickle info for %s:%u\n",
1453 ctdb_addr_to_str(&conn->src),
1454 ntohs(conn->src.ip.sin_port)));
1459 called by a daemon to inform us of a TCP connection that one of its
1460 clients used are no longer needed in the tickle database
1462 int32_t ctdb_control_tcp_remove(struct ctdb_context *ctdb, TDB_DATA indata)
1464 struct ctdb_vnn *vnn;
1465 struct ctdb_connection *conn = (struct ctdb_connection *)indata.dptr;
1467 /* If we don't have public IPs, tickles are useless */
1468 if (ctdb->vnn == NULL) {
1472 vnn = find_public_ip_vnn(ctdb, &conn->dst);
1475 (__location__ " unable to find public address %s\n",
1476 ctdb_addr_to_str(&conn->dst)));
1480 ctdb_remove_connection(vnn, conn);
1487 Called when another daemon starts - causes all tickles for all
1488 public addresses we are serving to be sent to the new node on the
1489 next check. This actually causes the next scheduled call to
1490 tdb_update_tcp_tickles() to update all nodes. This is simple and
1491 doesn't require careful error handling.
1493 int32_t ctdb_control_startup(struct ctdb_context *ctdb, uint32_t pnn)
1495 struct ctdb_vnn *vnn;
1497 DEBUG(DEBUG_INFO, ("Received startup control from node %lu\n",
1498 (unsigned long) pnn));
1500 for (vnn = ctdb->vnn; vnn != NULL; vnn = vnn->next) {
1501 vnn->tcp_update_needed = true;
1509 called when a client structure goes away - hook to remove
1510 elements from the tcp_list in all daemons
1512 void ctdb_takeover_client_destructor_hook(struct ctdb_client *client)
1514 while (client->tcp_list) {
1515 struct ctdb_vnn *vnn;
1516 struct ctdb_tcp_list *tcp = client->tcp_list;
1517 struct ctdb_connection *conn = &tcp->connection;
1519 DLIST_REMOVE(client->tcp_list, tcp);
1521 vnn = find_public_ip_vnn(client->ctdb,
1525 (__location__ " unable to find public address %s\n",
1526 ctdb_addr_to_str(&conn->dst)));
1530 /* If the IP address is hosted on this node then
1531 * remove the connection. */
1532 if (vnn->pnn == client->ctdb->pnn) {
1533 ctdb_remove_connection(vnn, conn);
1536 /* Otherwise this function has been called because the
1537 * server IP address has been released to another node
1538 * and the client has exited. This means that we
1539 * should not delete the connection information. The
1540 * takeover node processes connections too. */
1545 void ctdb_release_all_ips(struct ctdb_context *ctdb)
1547 struct ctdb_vnn *vnn, *next;
1550 if (ctdb->tunable.disable_ip_failover == 1) {
1554 for (vnn = ctdb->vnn; vnn != NULL; vnn = next) {
1555 /* vnn can be freed below in release_ip_post() */
1558 if (!ctdb_sys_have_ip(&vnn->public_address)) {
1559 ctdb_vnn_unassign_iface(ctdb, vnn);
1563 /* Don't allow multiple releases at once. Some code,
1564 * particularly ctdb_tickle_sentenced_connections() is
1566 if (vnn->update_in_flight) {
1567 DEBUG(DEBUG_WARNING,
1569 " Not releasing IP %s/%u on interface %s, an update is already in progress\n",
1570 ctdb_addr_to_str(&vnn->public_address),
1571 vnn->public_netmask_bits,
1572 ctdb_vnn_iface_string(vnn)));
1575 vnn->update_in_flight = true;
1577 DEBUG(DEBUG_INFO,("Release of IP %s/%u on interface %s node:-1\n",
1578 ctdb_addr_to_str(&vnn->public_address),
1579 vnn->public_netmask_bits,
1580 ctdb_vnn_iface_string(vnn)));
1582 ctdb_event_script_args(ctdb, CTDB_EVENT_RELEASE_IP, "%s %s %u",
1583 ctdb_vnn_iface_string(vnn),
1584 ctdb_addr_to_str(&vnn->public_address),
1585 vnn->public_netmask_bits);
1586 /* releaseip timeouts are converted to success, so to
1587 * detect failures just check if the IP address is
1590 if (ctdb_sys_have_ip(&vnn->public_address)) {
1593 " IP address %s not released\n",
1594 ctdb_addr_to_str(&vnn->public_address)));
1595 vnn->update_in_flight = false;
1599 vnn = release_ip_post(ctdb, vnn, &vnn->public_address);
1601 vnn->update_in_flight = false;
1606 DEBUG(DEBUG_NOTICE,(__location__ " Released %d public IPs\n", count));
1611 get list of public IPs
1613 int32_t ctdb_control_get_public_ips(struct ctdb_context *ctdb,
1614 struct ctdb_req_control_old *c, TDB_DATA *outdata)
1617 struct ctdb_public_ip_list_old *ips;
1618 struct ctdb_vnn *vnn;
1619 bool only_available = false;
1621 if (c->flags & CTDB_PUBLIC_IP_FLAGS_ONLY_AVAILABLE) {
1622 only_available = true;
1625 /* count how many public ip structures we have */
1627 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1631 len = offsetof(struct ctdb_public_ip_list_old, ips) +
1632 num*sizeof(struct ctdb_public_ip);
1633 ips = talloc_zero_size(outdata, len);
1634 CTDB_NO_MEMORY(ctdb, ips);
1637 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1638 if (only_available && !ctdb_vnn_available(ctdb, vnn)) {
1641 ips->ips[i].pnn = vnn->pnn;
1642 ips->ips[i].addr = vnn->public_address;
1646 len = offsetof(struct ctdb_public_ip_list_old, ips) +
1647 i*sizeof(struct ctdb_public_ip);
1649 outdata->dsize = len;
1650 outdata->dptr = (uint8_t *)ips;
1656 int32_t ctdb_control_get_public_ip_info(struct ctdb_context *ctdb,
1657 struct ctdb_req_control_old *c,
1662 ctdb_sock_addr *addr;
1663 struct ctdb_public_ip_info_old *info;
1664 struct ctdb_vnn *vnn;
1665 struct vnn_interface *iface;
1667 addr = (ctdb_sock_addr *)indata.dptr;
1669 vnn = find_public_ip_vnn(ctdb, addr);
1671 DEBUG(DEBUG_ERR,(__location__ " Could not get public ip info, "
1672 "'%s'not a public address\n",
1673 ctdb_addr_to_str(addr)));
1677 /* count how many public ip structures we have */
1679 for (iface = vnn->ifaces; iface != NULL; iface = iface->next) {
1683 len = offsetof(struct ctdb_public_ip_info_old, ifaces) +
1684 num*sizeof(struct ctdb_iface);
1685 info = talloc_zero_size(outdata, len);
1686 CTDB_NO_MEMORY(ctdb, info);
1688 info->ip.addr = vnn->public_address;
1689 info->ip.pnn = vnn->pnn;
1690 info->active_idx = 0xFFFFFFFF;
1693 for (iface = vnn->ifaces; iface != NULL; iface = iface->next) {
1694 struct ctdb_interface *cur;
1697 if (vnn->iface == cur) {
1698 info->active_idx = i;
1700 strncpy(info->ifaces[i].name, cur->name,
1701 sizeof(info->ifaces[i].name));
1702 info->ifaces[i].name[sizeof(info->ifaces[i].name)-1] = '\0';
1703 info->ifaces[i].link_state = cur->link_up;
1704 info->ifaces[i].references = cur->references;
1709 len = offsetof(struct ctdb_public_ip_info_old, ifaces) +
1710 i*sizeof(struct ctdb_iface);
1712 outdata->dsize = len;
1713 outdata->dptr = (uint8_t *)info;
1718 int32_t ctdb_control_get_ifaces(struct ctdb_context *ctdb,
1719 struct ctdb_req_control_old *c,
1723 struct ctdb_iface_list_old *ifaces;
1724 struct ctdb_interface *cur;
1726 /* count how many public ip structures we have */
1728 for (cur=ctdb->ifaces;cur;cur=cur->next) {
1732 len = offsetof(struct ctdb_iface_list_old, ifaces) +
1733 num*sizeof(struct ctdb_iface);
1734 ifaces = talloc_zero_size(outdata, len);
1735 CTDB_NO_MEMORY(ctdb, ifaces);
1738 for (cur=ctdb->ifaces;cur;cur=cur->next) {
1739 strncpy(ifaces->ifaces[i].name, cur->name,
1740 sizeof(ifaces->ifaces[i].name));
1741 ifaces->ifaces[i].name[sizeof(ifaces->ifaces[i].name)-1] = '\0';
1742 ifaces->ifaces[i].link_state = cur->link_up;
1743 ifaces->ifaces[i].references = cur->references;
1747 len = offsetof(struct ctdb_iface_list_old, ifaces) +
1748 i*sizeof(struct ctdb_iface);
1750 outdata->dsize = len;
1751 outdata->dptr = (uint8_t *)ifaces;
1756 int32_t ctdb_control_set_iface_link(struct ctdb_context *ctdb,
1757 struct ctdb_req_control_old *c,
1760 struct ctdb_iface *info;
1761 struct ctdb_interface *iface;
1762 bool link_up = false;
1764 info = (struct ctdb_iface *)indata.dptr;
1766 if (info->name[CTDB_IFACE_SIZE] != '\0') {
1767 int len = strnlen(info->name, CTDB_IFACE_SIZE);
1768 DEBUG(DEBUG_ERR, (__location__ " name[%*.*s] not terminated\n",
1769 len, len, info->name));
1773 switch (info->link_state) {
1781 DEBUG(DEBUG_ERR, (__location__ " link_state[%u] invalid\n",
1782 (unsigned int)info->link_state));
1786 if (info->references != 0) {
1787 DEBUG(DEBUG_ERR, (__location__ " references[%u] should be 0\n",
1788 (unsigned int)info->references));
1792 iface = ctdb_find_iface(ctdb, info->name);
1793 if (iface == NULL) {
1797 if (link_up == iface->link_up) {
1802 ("iface[%s] has changed it's link status %s => %s\n",
1804 iface->link_up?"up":"down",
1805 link_up?"up":"down"));
1807 iface->link_up = link_up;
1813 called by a daemon to inform us of the entire list of TCP tickles for
1814 a particular public address.
1815 this control should only be sent by the node that is currently serving
1816 that public address.
1818 int32_t ctdb_control_set_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata)
1820 struct ctdb_tickle_list_old *list = (struct ctdb_tickle_list_old *)indata.dptr;
1821 struct ctdb_tcp_array *tcparray;
1822 struct ctdb_vnn *vnn;
1824 /* We must at least have tickles.num or else we cant verify the size
1825 of the received data blob
1827 if (indata.dsize < offsetof(struct ctdb_tickle_list_old, connections)) {
1828 DEBUG(DEBUG_ERR,("Bad indata in ctdb_tickle_list. Not enough data for the tickle.num field\n"));
1832 /* verify that the size of data matches what we expect */
1833 if (indata.dsize < offsetof(struct ctdb_tickle_list_old, connections)
1834 + sizeof(struct ctdb_connection) * list->num) {
1835 DEBUG(DEBUG_ERR,("Bad indata in ctdb_tickle_list\n"));
1839 DEBUG(DEBUG_INFO, ("Received tickle update for public address %s\n",
1840 ctdb_addr_to_str(&list->addr)));
1842 vnn = find_public_ip_vnn(ctdb, &list->addr);
1844 DEBUG(DEBUG_INFO,(__location__ " Could not set tcp tickle list, '%s' is not a public address\n",
1845 ctdb_addr_to_str(&list->addr)));
1850 if (vnn->pnn == ctdb->pnn) {
1852 ("Ignoring redundant set tcp tickle list, this node hosts '%s'\n",
1853 ctdb_addr_to_str(&list->addr)));
1857 /* remove any old ticklelist we might have */
1858 talloc_free(vnn->tcp_array);
1859 vnn->tcp_array = NULL;
1861 tcparray = talloc(vnn, struct ctdb_tcp_array);
1862 CTDB_NO_MEMORY(ctdb, tcparray);
1864 tcparray->num = list->num;
1866 tcparray->connections = talloc_array(tcparray, struct ctdb_connection, tcparray->num);
1867 CTDB_NO_MEMORY(ctdb, tcparray->connections);
1869 memcpy(tcparray->connections, &list->connections[0],
1870 sizeof(struct ctdb_connection)*tcparray->num);
1872 /* We now have a new fresh tickle list array for this vnn */
1873 vnn->tcp_array = tcparray;
1879 called to return the full list of tickles for the puclic address associated
1880 with the provided vnn
1882 int32_t ctdb_control_get_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata)
1884 ctdb_sock_addr *addr = (ctdb_sock_addr *)indata.dptr;
1885 struct ctdb_tickle_list_old *list;
1886 struct ctdb_tcp_array *tcparray;
1888 struct ctdb_vnn *vnn;
1891 vnn = find_public_ip_vnn(ctdb, addr);
1893 DEBUG(DEBUG_ERR,(__location__ " Could not get tcp tickle list, '%s' is not a public address\n",
1894 ctdb_addr_to_str(addr)));
1899 port = ctdb_addr_to_port(addr);
1901 tcparray = vnn->tcp_array;
1903 if (tcparray != NULL) {
1905 /* All connections */
1906 num = tcparray->num;
1908 /* Count connections for port */
1909 for (i = 0; i < tcparray->num; i++) {
1910 if (port == ctdb_addr_to_port(&tcparray->connections[i].dst)) {
1917 outdata->dsize = offsetof(struct ctdb_tickle_list_old, connections)
1918 + sizeof(struct ctdb_connection) * num;
1920 outdata->dptr = talloc_size(outdata, outdata->dsize);
1921 CTDB_NO_MEMORY(ctdb, outdata->dptr);
1922 list = (struct ctdb_tickle_list_old *)outdata->dptr;
1932 for (i = 0; i < tcparray->num; i++) {
1934 port == ctdb_addr_to_port(&tcparray->connections[i].dst)) {
1935 list->connections[num] = tcparray->connections[i];
1945 set the list of all tcp tickles for a public address
1947 static int ctdb_send_set_tcp_tickles_for_ip(struct ctdb_context *ctdb,
1948 ctdb_sock_addr *addr,
1949 struct ctdb_tcp_array *tcparray)
1953 struct ctdb_tickle_list_old *list;
1956 num = tcparray->num;
1961 data.dsize = offsetof(struct ctdb_tickle_list_old, connections) +
1962 sizeof(struct ctdb_connection) * num;
1963 data.dptr = talloc_size(ctdb, data.dsize);
1964 CTDB_NO_MEMORY(ctdb, data.dptr);
1966 list = (struct ctdb_tickle_list_old *)data.dptr;
1970 memcpy(&list->connections[0], tcparray->connections, sizeof(struct ctdb_connection) * num);
1973 ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_ALL, 0,
1974 CTDB_CONTROL_SET_TCP_TICKLE_LIST,
1975 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
1977 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set tcp tickles failed\n"));
1981 talloc_free(data.dptr);
1986 static void ctdb_send_set_tcp_tickles_for_all(struct ctdb_context *ctdb,
1989 struct ctdb_vnn *vnn;
1992 for (vnn = ctdb->vnn; vnn != NULL; vnn = vnn->next) {
1993 /* we only send out updates for public addresses that
1996 if (ctdb->pnn != vnn->pnn) {
2000 /* We only send out the updates if we need to */
2001 if (!force && !vnn->tcp_update_needed) {
2005 ret = ctdb_send_set_tcp_tickles_for_ip(ctdb,
2006 &vnn->public_address,
2009 D_ERR("Failed to send the tickle update for ip %s\n",
2010 ctdb_addr_to_str(&vnn->public_address));
2011 vnn->tcp_update_needed = true;
2013 D_INFO("Sent tickle update for ip %s\n",
2014 ctdb_addr_to_str(&vnn->public_address));
2015 vnn->tcp_update_needed = false;
2022 perform tickle updates if required
2024 static void ctdb_update_tcp_tickles(struct tevent_context *ev,
2025 struct tevent_timer *te,
2026 struct timeval t, void *private_data)
2028 struct ctdb_context *ctdb = talloc_get_type(
2029 private_data, struct ctdb_context);
2031 ctdb_send_set_tcp_tickles_for_all(ctdb, false);
2033 tevent_add_timer(ctdb->ev, ctdb->tickle_update_context,
2034 timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
2035 ctdb_update_tcp_tickles, ctdb);
2039 start periodic update of tcp tickles
2041 void ctdb_start_tcp_tickle_update(struct ctdb_context *ctdb)
2043 ctdb->tickle_update_context = talloc_new(ctdb);
2045 tevent_add_timer(ctdb->ev, ctdb->tickle_update_context,
2046 timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
2047 ctdb_update_tcp_tickles, ctdb);
2053 struct control_gratious_arp {
2054 struct ctdb_context *ctdb;
2055 ctdb_sock_addr addr;
2061 send a control_gratuitous arp
2063 static void send_gratious_arp(struct tevent_context *ev,
2064 struct tevent_timer *te,
2065 struct timeval t, void *private_data)
2068 struct control_gratious_arp *arp = talloc_get_type(private_data,
2069 struct control_gratious_arp);
2071 ret = ctdb_sys_send_arp(&arp->addr, arp->iface);
2073 DEBUG(DEBUG_ERR,(__location__ " sending of gratious arp on iface '%s' failed (%s)\n",
2074 arp->iface, strerror(errno)));
2079 if (arp->count == CTDB_ARP_REPEAT) {
2084 tevent_add_timer(arp->ctdb->ev, arp,
2085 timeval_current_ofs(CTDB_ARP_INTERVAL, 0),
2086 send_gratious_arp, arp);
2093 int32_t ctdb_control_send_gratious_arp(struct ctdb_context *ctdb, TDB_DATA indata)
2095 struct ctdb_addr_info_old *gratious_arp = (struct ctdb_addr_info_old *)indata.dptr;
2096 struct control_gratious_arp *arp;
2098 /* verify the size of indata */
2099 if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
2100 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_gratious_arp structure. Got %u require %u bytes\n",
2101 (unsigned)indata.dsize,
2102 (unsigned)offsetof(struct ctdb_addr_info_old, iface)));
2106 ( offsetof(struct ctdb_addr_info_old, iface)
2107 + gratious_arp->len ) ){
2109 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2110 "but should be %u bytes\n",
2111 (unsigned)indata.dsize,
2112 (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+gratious_arp->len)));
2117 arp = talloc(ctdb, struct control_gratious_arp);
2118 CTDB_NO_MEMORY(ctdb, arp);
2121 arp->addr = gratious_arp->addr;
2122 arp->iface = talloc_strdup(arp, gratious_arp->iface);
2123 CTDB_NO_MEMORY(ctdb, arp->iface);
2126 tevent_add_timer(arp->ctdb->ev, arp,
2127 timeval_zero(), send_gratious_arp, arp);
2132 int32_t ctdb_control_add_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2134 struct ctdb_addr_info_old *pub = (struct ctdb_addr_info_old *)indata.dptr;
2137 /* verify the size of indata */
2138 if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
2139 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_addr_info structure\n"));
2143 ( offsetof(struct ctdb_addr_info_old, iface)
2146 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2147 "but should be %u bytes\n",
2148 (unsigned)indata.dsize,
2149 (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+pub->len)));
2153 DEBUG(DEBUG_NOTICE,("Add IP %s\n", ctdb_addr_to_str(&pub->addr)));
2155 ret = ctdb_add_public_address(ctdb, &pub->addr, pub->mask, &pub->iface[0], true);
2158 DEBUG(DEBUG_ERR,(__location__ " Failed to add public address\n"));
2165 int32_t ctdb_control_del_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2167 struct ctdb_addr_info_old *pub = (struct ctdb_addr_info_old *)indata.dptr;
2168 struct ctdb_vnn *vnn;
2170 /* verify the size of indata */
2171 if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
2172 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_addr_info structure\n"));
2176 ( offsetof(struct ctdb_addr_info_old, iface)
2179 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2180 "but should be %u bytes\n",
2181 (unsigned)indata.dsize,
2182 (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+pub->len)));
2186 DEBUG(DEBUG_NOTICE,("Delete IP %s\n", ctdb_addr_to_str(&pub->addr)));
2188 /* walk over all public addresses until we find a match */
2189 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2190 if (ctdb_same_ip(&vnn->public_address, &pub->addr)) {
2191 if (vnn->pnn == ctdb->pnn) {
2192 /* This IP is currently being hosted.
2193 * Defer the deletion until the next
2194 * takeover run. "ctdb reloadips" will
2195 * always cause a takeover run. "ctdb
2196 * delip" will now need an explicit
2197 * "ctdb ipreallocated" afterwards. */
2198 vnn->delete_pending = true;
2200 /* This IP is not hosted on the
2201 * current node so just delete it
2203 do_delete_ip(ctdb, vnn);
2210 DEBUG(DEBUG_ERR,("Delete IP of unknown public IP address %s\n",
2211 ctdb_addr_to_str(&pub->addr)));
2216 struct ipreallocated_callback_state {
2217 struct ctdb_req_control_old *c;
2220 static void ctdb_ipreallocated_callback(struct ctdb_context *ctdb,
2221 int status, void *p)
2223 struct ipreallocated_callback_state *state =
2224 talloc_get_type(p, struct ipreallocated_callback_state);
2228 (" \"ipreallocated\" event script failed (status %d)\n",
2230 if (status == -ETIME) {
2231 ctdb_ban_self(ctdb);
2235 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
2239 /* A control to run the ipreallocated event */
2240 int32_t ctdb_control_ipreallocated(struct ctdb_context *ctdb,
2241 struct ctdb_req_control_old *c,
2245 struct ipreallocated_callback_state *state;
2247 state = talloc(ctdb, struct ipreallocated_callback_state);
2248 CTDB_NO_MEMORY(ctdb, state);
2250 DEBUG(DEBUG_INFO,(__location__ " Running \"ipreallocated\" event\n"));
2252 ret = ctdb_event_script_callback(ctdb, state,
2253 ctdb_ipreallocated_callback, state,
2254 CTDB_EVENT_IPREALLOCATED,
2258 DEBUG(DEBUG_ERR,("Failed to run \"ipreallocated\" event \n"));
2263 /* tell the control that we will be reply asynchronously */
2264 state->c = talloc_steal(state, c);
2265 *async_reply = true;
2271 struct ctdb_reloadips_handle {
2272 struct ctdb_context *ctdb;
2273 struct ctdb_req_control_old *c;
2277 struct tevent_fd *fde;
2280 static int ctdb_reloadips_destructor(struct ctdb_reloadips_handle *h)
2282 if (h == h->ctdb->reload_ips) {
2283 h->ctdb->reload_ips = NULL;
2286 ctdb_request_control_reply(h->ctdb, h->c, NULL, h->status, NULL);
2289 ctdb_kill(h->ctdb, h->child, SIGKILL);
2293 static void ctdb_reloadips_timeout_event(struct tevent_context *ev,
2294 struct tevent_timer *te,
2295 struct timeval t, void *private_data)
2297 struct ctdb_reloadips_handle *h = talloc_get_type(private_data, struct ctdb_reloadips_handle);
2302 static void ctdb_reloadips_child_handler(struct tevent_context *ev,
2303 struct tevent_fd *fde,
2304 uint16_t flags, void *private_data)
2306 struct ctdb_reloadips_handle *h = talloc_get_type(private_data, struct ctdb_reloadips_handle);
2311 ret = sys_read(h->fd[0], &res, 1);
2312 if (ret < 1 || res != 0) {
2313 DEBUG(DEBUG_ERR, (__location__ " Reloadips child process returned error\n"));
2321 static int ctdb_reloadips_child(struct ctdb_context *ctdb)
2323 TALLOC_CTX *mem_ctx = talloc_new(NULL);
2324 struct ctdb_public_ip_list_old *ips;
2325 struct ctdb_vnn *vnn;
2326 struct client_async_data *async_data;
2327 struct timeval timeout;
2329 struct ctdb_client_control_state *state;
2333 CTDB_NO_MEMORY(ctdb, mem_ctx);
2335 /* Read IPs from local node */
2336 ret = ctdb_ctrl_get_public_ips(ctdb, TAKEOVER_TIMEOUT(),
2337 CTDB_CURRENT_NODE, mem_ctx, &ips);
2340 ("Unable to fetch public IPs from local node\n"));
2341 talloc_free(mem_ctx);
2345 /* Read IPs file - this is safe since this is a child process */
2347 if (ctdb_set_public_addresses(ctdb, false) != 0) {
2348 DEBUG(DEBUG_ERR,("Failed to re-read public addresses file\n"));
2349 talloc_free(mem_ctx);
2353 async_data = talloc_zero(mem_ctx, struct client_async_data);
2354 CTDB_NO_MEMORY(ctdb, async_data);
2356 /* Compare IPs between node and file for IPs to be deleted */
2357 for (i = 0; i < ips->num; i++) {
2359 for (vnn = ctdb->vnn; vnn; vnn = vnn->next) {
2360 if (ctdb_same_ip(&vnn->public_address,
2361 &ips->ips[i].addr)) {
2362 /* IP is still in file */
2368 /* Delete IP ips->ips[i] */
2369 struct ctdb_addr_info_old *pub;
2372 ("IP %s no longer configured, deleting it\n",
2373 ctdb_addr_to_str(&ips->ips[i].addr)));
2375 pub = talloc_zero(mem_ctx, struct ctdb_addr_info_old);
2376 CTDB_NO_MEMORY(ctdb, pub);
2378 pub->addr = ips->ips[i].addr;
2382 timeout = TAKEOVER_TIMEOUT();
2384 data.dsize = offsetof(struct ctdb_addr_info_old,
2386 data.dptr = (uint8_t *)pub;
2388 state = ctdb_control_send(ctdb, CTDB_CURRENT_NODE, 0,
2389 CTDB_CONTROL_DEL_PUBLIC_IP,
2390 0, data, async_data,
2392 if (state == NULL) {
2395 " failed sending CTDB_CONTROL_DEL_PUBLIC_IP\n"));
2399 ctdb_client_async_add(async_data, state);
2403 /* Compare IPs between node and file for IPs to be added */
2405 for (vnn = ctdb->vnn; vnn; vnn = vnn->next) {
2406 for (i = 0; i < ips->num; i++) {
2407 if (ctdb_same_ip(&vnn->public_address,
2408 &ips->ips[i].addr)) {
2409 /* IP already on node */
2413 if (i == ips->num) {
2414 /* Add IP ips->ips[i] */
2415 struct ctdb_addr_info_old *pub;
2416 const char *ifaces = NULL;
2418 struct vnn_interface *iface = NULL;
2421 ("New IP %s configured, adding it\n",
2422 ctdb_addr_to_str(&vnn->public_address)));
2424 uint32_t pnn = ctdb_get_pnn(ctdb);
2426 data.dsize = sizeof(pnn);
2427 data.dptr = (uint8_t *)&pnn;
2429 ret = ctdb_client_send_message(
2431 CTDB_BROADCAST_CONNECTED,
2432 CTDB_SRVID_REBALANCE_NODE,
2435 DEBUG(DEBUG_WARNING,
2436 ("Failed to send message to force node reallocation - IPs may be unbalanced\n"));
2442 ifaces = vnn->ifaces->iface->name;
2443 iface = vnn->ifaces->next;
2444 while (iface != NULL) {
2445 ifaces = talloc_asprintf(vnn, "%s,%s", ifaces,
2446 iface->iface->name);
2447 iface = iface->next;
2450 len = strlen(ifaces) + 1;
2451 pub = talloc_zero_size(mem_ctx,
2452 offsetof(struct ctdb_addr_info_old, iface) + len);
2453 CTDB_NO_MEMORY(ctdb, pub);
2455 pub->addr = vnn->public_address;
2456 pub->mask = vnn->public_netmask_bits;
2458 memcpy(&pub->iface[0], ifaces, pub->len);
2460 timeout = TAKEOVER_TIMEOUT();
2462 data.dsize = offsetof(struct ctdb_addr_info_old,
2464 data.dptr = (uint8_t *)pub;
2466 state = ctdb_control_send(ctdb, CTDB_CURRENT_NODE, 0,
2467 CTDB_CONTROL_ADD_PUBLIC_IP,
2468 0, data, async_data,
2470 if (state == NULL) {
2473 " failed sending CTDB_CONTROL_ADD_PUBLIC_IP\n"));
2477 ctdb_client_async_add(async_data, state);
2481 if (ctdb_client_async_wait(ctdb, async_data) != 0) {
2482 DEBUG(DEBUG_ERR,(__location__ " Add/delete IPs failed\n"));
2486 talloc_free(mem_ctx);
2490 talloc_free(mem_ctx);
2494 /* This control is sent to force the node to re-read the public addresses file
2495 and drop any addresses we should nnot longer host, and add new addresses
2496 that we are now able to host
2498 int32_t ctdb_control_reload_public_ips(struct ctdb_context *ctdb, struct ctdb_req_control_old *c, bool *async_reply)
2500 struct ctdb_reloadips_handle *h;
2501 pid_t parent = getpid();
2503 if (ctdb->reload_ips != NULL) {
2504 talloc_free(ctdb->reload_ips);
2505 ctdb->reload_ips = NULL;
2508 h = talloc(ctdb, struct ctdb_reloadips_handle);
2509 CTDB_NO_MEMORY(ctdb, h);
2514 if (pipe(h->fd) == -1) {
2515 DEBUG(DEBUG_ERR,("Failed to create pipe for ctdb_freeze_lock\n"));
2520 h->child = ctdb_fork(ctdb);
2521 if (h->child == (pid_t)-1) {
2522 DEBUG(DEBUG_ERR, ("Failed to fork a child for reloadips\n"));
2530 if (h->child == 0) {
2531 signed char res = 0;
2535 prctl_set_comment("ctdb_reloadips");
2536 if (switch_from_server_to_client(ctdb) != 0) {
2537 DEBUG(DEBUG_CRIT,("ERROR: Failed to switch reloadips child into client mode\n"));
2540 res = ctdb_reloadips_child(ctdb);
2542 DEBUG(DEBUG_ERR,("Failed to reload ips on local node\n"));
2546 sys_write(h->fd[1], &res, 1);
2547 ctdb_wait_for_process_to_exit(parent);
2551 h->c = talloc_steal(h, c);
2554 set_close_on_exec(h->fd[0]);
2556 talloc_set_destructor(h, ctdb_reloadips_destructor);
2559 h->fde = tevent_add_fd(ctdb->ev, h, h->fd[0], TEVENT_FD_READ,
2560 ctdb_reloadips_child_handler, (void *)h);
2561 tevent_fd_set_auto_close(h->fde);
2563 tevent_add_timer(ctdb->ev, h, timeval_current_ofs(120, 0),
2564 ctdb_reloadips_timeout_event, h);
2566 /* we reply later */
2567 *async_reply = true;