4 Copyright (C) Ronnie Sahlberg 2007
5 Copyright (C) Andrew Tridgell 2007
6 Copyright (C) Martin Schwenke 2011
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, see <http://www.gnu.org/licenses/>.
22 #include "lib/tevent/tevent.h"
23 #include "lib/tdb/include/tdb.h"
24 #include "lib/util/dlinklist.h"
25 #include "system/network.h"
26 #include "system/filesys.h"
27 #include "system/wait.h"
28 #include "../include/ctdb_private.h"
29 #include "../common/rb_tree.h"
32 #define TAKEOVER_TIMEOUT() timeval_current_ofs(ctdb->tunable.takeover_timeout,0)
34 #define CTDB_ARP_INTERVAL 1
35 #define CTDB_ARP_REPEAT 3
38 struct ctdb_iface *prev, *next;
44 static const char *ctdb_vnn_iface_string(const struct ctdb_vnn *vnn)
47 return vnn->iface->name;
53 static int ctdb_add_local_iface(struct ctdb_context *ctdb, const char *iface)
57 /* Verify that we dont have an entry for this ip yet */
58 for (i=ctdb->ifaces;i;i=i->next) {
59 if (strcmp(i->name, iface) == 0) {
64 /* create a new structure for this interface */
65 i = talloc_zero(ctdb, struct ctdb_iface);
66 CTDB_NO_MEMORY_FATAL(ctdb, i);
67 i->name = talloc_strdup(i, iface);
68 CTDB_NO_MEMORY(ctdb, i->name);
71 DLIST_ADD(ctdb->ifaces, i);
76 static struct ctdb_iface *ctdb_find_iface(struct ctdb_context *ctdb,
81 /* Verify that we dont have an entry for this ip yet */
82 for (i=ctdb->ifaces;i;i=i->next) {
83 if (strcmp(i->name, iface) == 0) {
91 static struct ctdb_iface *ctdb_vnn_best_iface(struct ctdb_context *ctdb,
95 struct ctdb_iface *cur = NULL;
96 struct ctdb_iface *best = NULL;
98 for (i=0; vnn->ifaces[i]; i++) {
100 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
114 if (cur->references < best->references) {
123 static int32_t ctdb_vnn_assign_iface(struct ctdb_context *ctdb,
124 struct ctdb_vnn *vnn)
126 struct ctdb_iface *best = NULL;
129 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
130 "still assigned to iface '%s'\n",
131 ctdb_addr_to_str(&vnn->public_address),
132 ctdb_vnn_iface_string(vnn)));
136 best = ctdb_vnn_best_iface(ctdb, vnn);
138 DEBUG(DEBUG_ERR, (__location__ " public address '%s' "
139 "cannot assign to iface any iface\n",
140 ctdb_addr_to_str(&vnn->public_address)));
146 vnn->pnn = ctdb->pnn;
148 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
149 "now assigned to iface '%s' refs[%d]\n",
150 ctdb_addr_to_str(&vnn->public_address),
151 ctdb_vnn_iface_string(vnn),
156 static void ctdb_vnn_unassign_iface(struct ctdb_context *ctdb,
157 struct ctdb_vnn *vnn)
159 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
160 "now unassigned (old iface '%s' refs[%d])\n",
161 ctdb_addr_to_str(&vnn->public_address),
162 ctdb_vnn_iface_string(vnn),
163 vnn->iface?vnn->iface->references:0));
165 vnn->iface->references--;
168 if (vnn->pnn == ctdb->pnn) {
173 static bool ctdb_vnn_available(struct ctdb_context *ctdb,
174 struct ctdb_vnn *vnn)
178 if (vnn->iface && vnn->iface->link_up) {
182 for (i=0; vnn->ifaces[i]; i++) {
183 struct ctdb_iface *cur;
185 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
198 struct ctdb_takeover_arp {
199 struct ctdb_context *ctdb;
202 struct ctdb_tcp_array *tcparray;
203 struct ctdb_vnn *vnn;
208 lists of tcp endpoints
210 struct ctdb_tcp_list {
211 struct ctdb_tcp_list *prev, *next;
212 struct ctdb_tcp_connection connection;
216 list of clients to kill on IP release
218 struct ctdb_client_ip {
219 struct ctdb_client_ip *prev, *next;
220 struct ctdb_context *ctdb;
227 send a gratuitous arp
229 static void ctdb_control_send_arp(struct event_context *ev, struct timed_event *te,
230 struct timeval t, void *private_data)
232 struct ctdb_takeover_arp *arp = talloc_get_type(private_data,
233 struct ctdb_takeover_arp);
235 struct ctdb_tcp_array *tcparray;
236 const char *iface = ctdb_vnn_iface_string(arp->vnn);
238 ret = ctdb_sys_send_arp(&arp->addr, iface);
240 DEBUG(DEBUG_CRIT,(__location__ " sending of arp failed on iface '%s' (%s)\n",
241 iface, strerror(errno)));
244 tcparray = arp->tcparray;
246 for (i=0;i<tcparray->num;i++) {
247 struct ctdb_tcp_connection *tcon;
249 tcon = &tcparray->connections[i];
250 DEBUG(DEBUG_INFO,("sending tcp tickle ack for %u->%s:%u\n",
251 (unsigned)ntohs(tcon->dst_addr.ip.sin_port),
252 ctdb_addr_to_str(&tcon->src_addr),
253 (unsigned)ntohs(tcon->src_addr.ip.sin_port)));
254 ret = ctdb_sys_send_tcp(
259 DEBUG(DEBUG_CRIT,(__location__ " Failed to send tcp tickle ack for %s\n",
260 ctdb_addr_to_str(&tcon->src_addr)));
267 if (arp->count == CTDB_ARP_REPEAT) {
272 event_add_timed(arp->ctdb->ev, arp->vnn->takeover_ctx,
273 timeval_current_ofs(CTDB_ARP_INTERVAL, 100000),
274 ctdb_control_send_arp, arp);
277 static int32_t ctdb_announce_vnn_iface(struct ctdb_context *ctdb,
278 struct ctdb_vnn *vnn)
280 struct ctdb_takeover_arp *arp;
281 struct ctdb_tcp_array *tcparray;
283 if (!vnn->takeover_ctx) {
284 vnn->takeover_ctx = talloc_new(vnn);
285 if (!vnn->takeover_ctx) {
290 arp = talloc_zero(vnn->takeover_ctx, struct ctdb_takeover_arp);
296 arp->addr = vnn->public_address;
299 tcparray = vnn->tcp_array;
301 /* add all of the known tcp connections for this IP to the
302 list of tcp connections to send tickle acks for */
303 arp->tcparray = talloc_steal(arp, tcparray);
305 vnn->tcp_array = NULL;
306 vnn->tcp_update_needed = true;
309 event_add_timed(arp->ctdb->ev, vnn->takeover_ctx,
310 timeval_zero(), ctdb_control_send_arp, arp);
315 struct takeover_callback_state {
316 struct ctdb_req_control *c;
317 ctdb_sock_addr *addr;
318 struct ctdb_vnn *vnn;
321 struct ctdb_do_takeip_state {
322 struct ctdb_req_control *c;
323 struct ctdb_vnn *vnn;
327 called when takeip event finishes
329 static void ctdb_do_takeip_callback(struct ctdb_context *ctdb, int status,
332 struct ctdb_do_takeip_state *state =
333 talloc_get_type(private_data, struct ctdb_do_takeip_state);
338 struct ctdb_node *node = ctdb->nodes[ctdb->pnn];
340 if (status == -ETIME) {
343 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
344 ctdb_addr_to_str(&state->vnn->public_address),
345 ctdb_vnn_iface_string(state->vnn)));
346 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
348 node->flags |= NODE_FLAGS_UNHEALTHY;
353 ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
355 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
360 data.dptr = (uint8_t *)ctdb_addr_to_str(&state->vnn->public_address);
361 data.dsize = strlen((char *)data.dptr) + 1;
362 DEBUG(DEBUG_INFO,(__location__ " sending TAKE_IP for '%s'\n", data.dptr));
364 ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_TAKE_IP, data);
367 /* the control succeeded */
368 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
374 take over an ip address
376 static int32_t ctdb_do_takeip(struct ctdb_context *ctdb,
377 struct ctdb_req_control *c,
378 struct ctdb_vnn *vnn)
381 struct ctdb_do_takeip_state *state;
383 ret = ctdb_vnn_assign_iface(ctdb, vnn);
385 DEBUG(DEBUG_ERR,("Takeover of IP %s/%u failed to "
386 "assin a usable interface\n",
387 ctdb_addr_to_str(&vnn->public_address),
388 vnn->public_netmask_bits));
392 state = talloc(vnn, struct ctdb_do_takeip_state);
393 CTDB_NO_MEMORY(ctdb, state);
395 state->c = talloc_steal(ctdb, c);
398 DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u on interface %s\n",
399 ctdb_addr_to_str(&vnn->public_address),
400 vnn->public_netmask_bits,
401 ctdb_vnn_iface_string(vnn)));
403 ret = ctdb_event_script_callback(ctdb,
405 ctdb_do_takeip_callback,
410 ctdb_vnn_iface_string(vnn),
411 ctdb_addr_to_str(&vnn->public_address),
412 vnn->public_netmask_bits);
415 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
416 ctdb_addr_to_str(&vnn->public_address),
417 ctdb_vnn_iface_string(vnn)));
425 struct ctdb_do_updateip_state {
426 struct ctdb_req_control *c;
427 struct ctdb_iface *old;
428 struct ctdb_vnn *vnn;
432 called when updateip event finishes
434 static void ctdb_do_updateip_callback(struct ctdb_context *ctdb, int status,
437 struct ctdb_do_updateip_state *state =
438 talloc_get_type(private_data, struct ctdb_do_updateip_state);
442 if (status == -ETIME) {
445 DEBUG(DEBUG_ERR,(__location__ " Failed to move IP %s from interface %s to %s\n",
446 ctdb_addr_to_str(&state->vnn->public_address),
448 ctdb_vnn_iface_string(state->vnn)));
451 * All we can do is reset the old interface
452 * and let the next run fix it
454 ctdb_vnn_unassign_iface(ctdb, state->vnn);
455 state->vnn->iface = state->old;
456 state->vnn->iface->references++;
458 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
463 ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
465 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
470 /* the control succeeded */
471 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
477 update (move) an ip address
479 static int32_t ctdb_do_updateip(struct ctdb_context *ctdb,
480 struct ctdb_req_control *c,
481 struct ctdb_vnn *vnn)
484 struct ctdb_do_updateip_state *state;
485 struct ctdb_iface *old = vnn->iface;
488 ctdb_vnn_unassign_iface(ctdb, vnn);
489 ret = ctdb_vnn_assign_iface(ctdb, vnn);
491 DEBUG(DEBUG_ERR,("update of IP %s/%u failed to "
492 "assin a usable interface (old iface '%s')\n",
493 ctdb_addr_to_str(&vnn->public_address),
494 vnn->public_netmask_bits,
499 new_name = ctdb_vnn_iface_string(vnn);
500 if (old->name != NULL && new_name != NULL && !strcmp(old->name, new_name)) {
501 /* A benign update from one interface onto itself.
502 * no need to run the eventscripts in this case, just return
505 ctdb_request_control_reply(ctdb, c, NULL, 0, NULL);
509 state = talloc(vnn, struct ctdb_do_updateip_state);
510 CTDB_NO_MEMORY(ctdb, state);
512 state->c = talloc_steal(ctdb, c);
516 DEBUG(DEBUG_NOTICE,("Update of IP %s/%u from "
517 "interface %s to %s\n",
518 ctdb_addr_to_str(&vnn->public_address),
519 vnn->public_netmask_bits,
523 ret = ctdb_event_script_callback(ctdb,
525 ctdb_do_updateip_callback,
528 CTDB_EVENT_UPDATE_IP,
532 ctdb_addr_to_str(&vnn->public_address),
533 vnn->public_netmask_bits);
535 DEBUG(DEBUG_ERR,(__location__ " Failed update IP %s from interface %s to %s\n",
536 ctdb_addr_to_str(&vnn->public_address),
537 old->name, new_name));
546 Find the vnn of the node that has a public ip address
547 returns -1 if the address is not known as a public address
549 static struct ctdb_vnn *find_public_ip_vnn(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
551 struct ctdb_vnn *vnn;
553 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
554 if (ctdb_same_ip(&vnn->public_address, addr)) {
563 take over an ip address
565 int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb,
566 struct ctdb_req_control *c,
571 struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
572 struct ctdb_vnn *vnn;
573 bool have_ip = false;
574 bool do_updateip = false;
575 bool do_takeip = false;
576 struct ctdb_iface *best_iface = NULL;
578 if (pip->pnn != ctdb->pnn) {
579 DEBUG(DEBUG_ERR,(__location__" takeoverip called for an ip '%s' "
580 "with pnn %d, but we're node %d\n",
581 ctdb_addr_to_str(&pip->addr),
582 pip->pnn, ctdb->pnn));
586 /* update out vnn list */
587 vnn = find_public_ip_vnn(ctdb, &pip->addr);
589 DEBUG(DEBUG_INFO,("takeoverip called for an ip '%s' that is not a public address\n",
590 ctdb_addr_to_str(&pip->addr)));
594 have_ip = ctdb_sys_have_ip(&pip->addr);
595 best_iface = ctdb_vnn_best_iface(ctdb, vnn);
596 if (best_iface == NULL) {
597 DEBUG(DEBUG_ERR,("takeoverip of IP %s/%u failed to find"
598 "a usable interface (old %s, have_ip %d)\n",
599 ctdb_addr_to_str(&vnn->public_address),
600 vnn->public_netmask_bits,
601 ctdb_vnn_iface_string(vnn),
606 if (vnn->iface == NULL && vnn->pnn == -1 && have_ip && best_iface != NULL) {
607 DEBUG(DEBUG_ERR,("Taking over newly created ip\n"));
611 if (vnn->iface == NULL && have_ip) {
612 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
613 "but we have no interface assigned, has someone manually configured it? Ignore for now.\n",
614 ctdb_addr_to_str(&vnn->public_address)));
618 if (vnn->pnn != ctdb->pnn && have_ip && vnn->pnn != -1) {
619 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
620 "and we have it on iface[%s], but it was assigned to node %d"
621 "and we are node %d, banning ourself\n",
622 ctdb_addr_to_str(&vnn->public_address),
623 ctdb_vnn_iface_string(vnn), vnn->pnn, ctdb->pnn));
628 if (vnn->pnn == -1 && have_ip) {
629 vnn->pnn = ctdb->pnn;
630 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
631 "and we already have it on iface[%s], update local daemon\n",
632 ctdb_addr_to_str(&vnn->public_address),
633 ctdb_vnn_iface_string(vnn)));
638 if (vnn->iface->link_up) {
639 /* only move when the rebalance gains something */
640 if (vnn->iface->references > (best_iface->references + 1)) {
643 } else if (vnn->iface != best_iface) {
650 ctdb_vnn_unassign_iface(ctdb, vnn);
657 ret = ctdb_do_takeip(ctdb, c, vnn);
661 } else if (do_updateip) {
662 ret = ctdb_do_updateip(ctdb, c, vnn);
668 * The interface is up and the kernel known the ip
671 DEBUG(DEBUG_INFO,("Redundant takeover of IP %s/%u on interface %s (ip already held)\n",
672 ctdb_addr_to_str(&pip->addr),
673 vnn->public_netmask_bits,
674 ctdb_vnn_iface_string(vnn)));
678 /* tell ctdb_control.c that we will be replying asynchronously */
685 takeover an ip address old v4 style
687 int32_t ctdb_control_takeover_ipv4(struct ctdb_context *ctdb,
688 struct ctdb_req_control *c,
694 data.dsize = sizeof(struct ctdb_public_ip);
695 data.dptr = (uint8_t *)talloc_zero(c, struct ctdb_public_ip);
696 CTDB_NO_MEMORY(ctdb, data.dptr);
698 memcpy(data.dptr, indata.dptr, indata.dsize);
699 return ctdb_control_takeover_ip(ctdb, c, data, async_reply);
703 kill any clients that are registered with a IP that is being released
705 static void release_kill_clients(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
707 struct ctdb_client_ip *ip;
709 DEBUG(DEBUG_INFO,("release_kill_clients for ip %s\n",
710 ctdb_addr_to_str(addr)));
712 for (ip=ctdb->client_ip_list; ip; ip=ip->next) {
713 ctdb_sock_addr tmp_addr;
716 DEBUG(DEBUG_INFO,("checking for client %u with IP %s\n",
718 ctdb_addr_to_str(&ip->addr)));
720 if (ctdb_same_ip(&tmp_addr, addr)) {
721 struct ctdb_client *client = ctdb_reqid_find(ctdb,
724 DEBUG(DEBUG_INFO,("matched client %u with IP %s and pid %u\n",
726 ctdb_addr_to_str(&ip->addr),
729 if (client->pid != 0) {
730 DEBUG(DEBUG_INFO,(__location__ " Killing client pid %u for IP %s on client_id %u\n",
731 (unsigned)client->pid,
732 ctdb_addr_to_str(addr),
734 kill(client->pid, SIGKILL);
741 called when releaseip event finishes
743 static void release_ip_callback(struct ctdb_context *ctdb, int status,
746 struct takeover_callback_state *state =
747 talloc_get_type(private_data, struct takeover_callback_state);
750 if (status == -ETIME) {
754 /* send a message to all clients of this node telling them
755 that the cluster has been reconfigured and they should
756 release any sockets on this IP */
757 data.dptr = (uint8_t *)talloc_strdup(state, ctdb_addr_to_str(state->addr));
758 CTDB_NO_MEMORY_VOID(ctdb, data.dptr);
759 data.dsize = strlen((char *)data.dptr)+1;
761 DEBUG(DEBUG_INFO,(__location__ " sending RELEASE_IP for '%s'\n", data.dptr));
763 ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_RELEASE_IP, data);
765 /* kill clients that have registered with this IP */
766 release_kill_clients(ctdb, state->addr);
768 ctdb_vnn_unassign_iface(ctdb, state->vnn);
770 /* the control succeeded */
771 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
776 release an ip address
778 int32_t ctdb_control_release_ip(struct ctdb_context *ctdb,
779 struct ctdb_req_control *c,
784 struct takeover_callback_state *state;
785 struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
786 struct ctdb_vnn *vnn;
788 /* update our vnn list */
789 vnn = find_public_ip_vnn(ctdb, &pip->addr);
791 DEBUG(DEBUG_INFO,("releaseip called for an ip '%s' that is not a public address\n",
792 ctdb_addr_to_str(&pip->addr)));
797 /* stop any previous arps */
798 talloc_free(vnn->takeover_ctx);
799 vnn->takeover_ctx = NULL;
801 if (!ctdb_sys_have_ip(&pip->addr)) {
802 DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u on interface %s (ip not held)\n",
803 ctdb_addr_to_str(&pip->addr),
804 vnn->public_netmask_bits,
805 ctdb_vnn_iface_string(vnn)));
806 ctdb_vnn_unassign_iface(ctdb, vnn);
810 if (vnn->iface == NULL) {
811 DEBUG(DEBUG_ERR,(__location__ " release_ip of IP %s is known to the kernel, "
812 "but we have no interface assigned, has someone manually configured it? Ignore for now.\n",
813 ctdb_addr_to_str(&vnn->public_address)));
817 DEBUG(DEBUG_NOTICE,("Release of IP %s/%u on interface %s node:%d\n",
818 ctdb_addr_to_str(&pip->addr),
819 vnn->public_netmask_bits,
820 ctdb_vnn_iface_string(vnn),
823 state = talloc(ctdb, struct takeover_callback_state);
824 CTDB_NO_MEMORY(ctdb, state);
826 state->c = talloc_steal(state, c);
827 state->addr = talloc(state, ctdb_sock_addr);
828 CTDB_NO_MEMORY(ctdb, state->addr);
829 *state->addr = pip->addr;
832 ret = ctdb_event_script_callback(ctdb,
833 state, release_ip_callback, state,
835 CTDB_EVENT_RELEASE_IP,
837 ctdb_vnn_iface_string(vnn),
838 ctdb_addr_to_str(&pip->addr),
839 vnn->public_netmask_bits);
841 DEBUG(DEBUG_ERR,(__location__ " Failed to release IP %s on interface %s\n",
842 ctdb_addr_to_str(&pip->addr),
843 ctdb_vnn_iface_string(vnn)));
848 /* tell the control that we will be reply asynchronously */
854 release an ip address old v4 style
856 int32_t ctdb_control_release_ipv4(struct ctdb_context *ctdb,
857 struct ctdb_req_control *c,
863 data.dsize = sizeof(struct ctdb_public_ip);
864 data.dptr = (uint8_t *)talloc_zero(c, struct ctdb_public_ip);
865 CTDB_NO_MEMORY(ctdb, data.dptr);
867 memcpy(data.dptr, indata.dptr, indata.dsize);
868 return ctdb_control_release_ip(ctdb, c, data, async_reply);
872 static int ctdb_add_public_address(struct ctdb_context *ctdb,
873 ctdb_sock_addr *addr,
874 unsigned mask, const char *ifaces)
876 struct ctdb_vnn *vnn;
883 /* Verify that we dont have an entry for this ip yet */
884 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
885 if (ctdb_same_sockaddr(addr, &vnn->public_address)) {
886 DEBUG(DEBUG_CRIT,("Same ip '%s' specified multiple times in the public address list \n",
887 ctdb_addr_to_str(addr)));
892 /* create a new vnn structure for this ip address */
893 vnn = talloc_zero(ctdb, struct ctdb_vnn);
894 CTDB_NO_MEMORY_FATAL(ctdb, vnn);
895 vnn->ifaces = talloc_array(vnn, const char *, num + 2);
896 tmp = talloc_strdup(vnn, ifaces);
897 CTDB_NO_MEMORY_FATAL(ctdb, tmp);
898 for (iface = strtok(tmp, ","); iface; iface = strtok(NULL, ",")) {
899 vnn->ifaces = talloc_realloc(vnn, vnn->ifaces, const char *, num + 2);
900 CTDB_NO_MEMORY_FATAL(ctdb, vnn->ifaces);
901 vnn->ifaces[num] = talloc_strdup(vnn, iface);
902 CTDB_NO_MEMORY_FATAL(ctdb, vnn->ifaces[num]);
906 vnn->ifaces[num] = NULL;
907 vnn->public_address = *addr;
908 vnn->public_netmask_bits = mask;
910 if (ctdb_sys_have_ip(addr)) {
911 DEBUG(DEBUG_ERR,("We are already hosting public address '%s'. setting PNN to ourself:%d\n", ctdb_addr_to_str(addr), ctdb->pnn));
912 vnn->pnn = ctdb->pnn;
915 for (i=0; vnn->ifaces[i]; i++) {
916 ret = ctdb_add_local_iface(ctdb, vnn->ifaces[i]);
918 DEBUG(DEBUG_CRIT, (__location__ " failed to add iface[%s] "
919 "for public_address[%s]\n",
920 vnn->ifaces[i], ctdb_addr_to_str(addr)));
925 vnn->iface = ctdb_find_iface(ctdb, vnn->ifaces[i]);
929 DLIST_ADD(ctdb->vnn, vnn);
935 setup the event script directory
937 int ctdb_set_event_script_dir(struct ctdb_context *ctdb, const char *script_dir)
939 ctdb->event_script_dir = talloc_strdup(ctdb, script_dir);
940 CTDB_NO_MEMORY(ctdb, ctdb->event_script_dir);
945 setup the public address lists from a file
947 int ctdb_set_public_addresses(struct ctdb_context *ctdb, const char *alist)
953 lines = file_lines_load(alist, &nlines, ctdb);
955 ctdb_set_error(ctdb, "Failed to load public address list '%s'\n", alist);
958 while (nlines > 0 && strcmp(lines[nlines-1], "") == 0) {
962 for (i=0;i<nlines;i++) {
970 while ((*line == ' ') || (*line == '\t')) {
976 if (strcmp(line, "") == 0) {
979 tok = strtok(line, " \t");
981 tok = strtok(NULL, " \t");
983 if (NULL == ctdb->default_public_interface) {
984 DEBUG(DEBUG_CRIT,("No default public interface and no interface specified at line %u of public address list\n",
989 ifaces = ctdb->default_public_interface;
994 if (!addrstr || !parse_ip_mask(addrstr, ifaces, &addr, &mask)) {
995 DEBUG(DEBUG_CRIT,("Badly formed line %u in public address list\n", i+1));
999 if (ctdb_add_public_address(ctdb, &addr, mask, ifaces)) {
1000 DEBUG(DEBUG_CRIT,("Failed to add line %u to the public address list\n", i+1));
1010 int ctdb_set_single_public_ip(struct ctdb_context *ctdb,
1014 struct ctdb_vnn *svnn;
1015 struct ctdb_iface *cur = NULL;
1019 svnn = talloc_zero(ctdb, struct ctdb_vnn);
1020 CTDB_NO_MEMORY(ctdb, svnn);
1022 svnn->ifaces = talloc_array(svnn, const char *, 2);
1023 CTDB_NO_MEMORY(ctdb, svnn->ifaces);
1024 svnn->ifaces[0] = talloc_strdup(svnn->ifaces, iface);
1025 CTDB_NO_MEMORY(ctdb, svnn->ifaces[0]);
1026 svnn->ifaces[1] = NULL;
1028 ok = parse_ip(ip, iface, 0, &svnn->public_address);
1034 ret = ctdb_add_local_iface(ctdb, svnn->ifaces[0]);
1036 DEBUG(DEBUG_CRIT, (__location__ " failed to add iface[%s] "
1037 "for single_ip[%s]\n",
1039 ctdb_addr_to_str(&svnn->public_address)));
1044 /* assume the single public ip interface is initially "good" */
1045 cur = ctdb_find_iface(ctdb, iface);
1047 DEBUG(DEBUG_CRIT,("Can not find public interface %s used by --single-public-ip", iface));
1050 cur->link_up = true;
1052 ret = ctdb_vnn_assign_iface(ctdb, svnn);
1058 ctdb->single_ip_vnn = svnn;
1062 struct ctdb_public_ip_list {
1063 struct ctdb_public_ip_list *next;
1065 ctdb_sock_addr addr;
1069 /* Given a physical node, return the number of
1070 public addresses that is currently assigned to this node.
1072 static int node_ip_coverage(struct ctdb_context *ctdb,
1074 struct ctdb_public_ip_list *ips)
1078 for (;ips;ips=ips->next) {
1079 if (ips->pnn == pnn) {
1087 /* Check if this is a public ip known to the node, i.e. can that
1088 node takeover this ip ?
1090 static int can_node_serve_ip(struct ctdb_context *ctdb, int32_t pnn,
1091 struct ctdb_public_ip_list *ip)
1093 struct ctdb_all_public_ips *public_ips;
1096 public_ips = ctdb->nodes[pnn]->available_public_ips;
1098 if (public_ips == NULL) {
1102 for (i=0;i<public_ips->num;i++) {
1103 if (ctdb_same_ip(&ip->addr, &public_ips->ips[i].addr)) {
1104 /* yes, this node can serve this public ip */
1113 /* search the node lists list for a node to takeover this ip.
1114 pick the node that currently are serving the least number of ips
1115 so that the ips get spread out evenly.
1117 static int find_takeover_node(struct ctdb_context *ctdb,
1118 struct ctdb_node_map *nodemap, uint32_t mask,
1119 struct ctdb_public_ip_list *ip,
1120 struct ctdb_public_ip_list *all_ips)
1122 int pnn, min=0, num;
1126 for (i=0;i<nodemap->num;i++) {
1127 if (nodemap->nodes[i].flags & mask) {
1128 /* This node is not healty and can not be used to serve
1134 /* verify that this node can serve this ip */
1135 if (can_node_serve_ip(ctdb, i, ip)) {
1136 /* no it couldnt so skip to the next node */
1140 num = node_ip_coverage(ctdb, i, all_ips);
1141 /* was this the first node we checked ? */
1153 DEBUG(DEBUG_WARNING,(__location__ " Could not find node to take over public address '%s'\n",
1154 ctdb_addr_to_str(&ip->addr)));
1164 static uint32_t *ip_key(ctdb_sock_addr *ip)
1166 static uint32_t key[IP_KEYLEN];
1168 bzero(key, sizeof(key));
1170 switch (ip->sa.sa_family) {
1172 key[3] = htonl(ip->ip.sin_addr.s_addr);
1175 key[0] = htonl(ip->ip6.sin6_addr.s6_addr32[0]);
1176 key[1] = htonl(ip->ip6.sin6_addr.s6_addr32[1]);
1177 key[2] = htonl(ip->ip6.sin6_addr.s6_addr32[2]);
1178 key[3] = htonl(ip->ip6.sin6_addr.s6_addr32[3]);
1181 DEBUG(DEBUG_ERR, (__location__ " ERROR, unknown family passed :%u\n", ip->sa.sa_family));
1188 static void *add_ip_callback(void *parm, void *data)
1190 struct ctdb_public_ip_list *this_ip = parm;
1191 struct ctdb_public_ip_list *prev_ip = data;
1193 if (prev_ip == NULL) {
1196 if (this_ip->pnn == -1) {
1197 this_ip->pnn = prev_ip->pnn;
1203 void getips_count_callback(void *param, void *data)
1205 struct ctdb_public_ip_list **ip_list = (struct ctdb_public_ip_list **)param;
1206 struct ctdb_public_ip_list *new_ip = (struct ctdb_public_ip_list *)data;
1208 new_ip->next = *ip_list;
1212 static struct ctdb_public_ip_list *
1213 create_merged_ip_list(struct ctdb_context *ctdb)
1216 struct ctdb_public_ip_list *ip_list;
1217 struct ctdb_all_public_ips *public_ips;
1219 if (ctdb->ip_tree != NULL) {
1220 talloc_free(ctdb->ip_tree);
1221 ctdb->ip_tree = NULL;
1223 ctdb->ip_tree = trbt_create(ctdb, 0);
1225 for (i=0;i<ctdb->num_nodes;i++) {
1226 public_ips = ctdb->nodes[i]->known_public_ips;
1228 if (ctdb->nodes[i]->flags & NODE_FLAGS_DELETED) {
1232 /* there were no public ips for this node */
1233 if (public_ips == NULL) {
1237 for (j=0;j<public_ips->num;j++) {
1238 struct ctdb_public_ip_list *tmp_ip;
1240 tmp_ip = talloc_zero(ctdb->ip_tree, struct ctdb_public_ip_list);
1241 CTDB_NO_MEMORY_NULL(ctdb, tmp_ip);
1242 tmp_ip->pnn = public_ips->ips[j].pnn;
1243 tmp_ip->addr = public_ips->ips[j].addr;
1244 tmp_ip->next = NULL;
1246 trbt_insertarray32_callback(ctdb->ip_tree,
1247 IP_KEYLEN, ip_key(&public_ips->ips[j].addr),
1254 trbt_traversearray32(ctdb->ip_tree, IP_KEYLEN, getips_count_callback, &ip_list);
1260 * This is the length of the longtest common prefix between the IPs.
1261 * It is calculated by XOR-ing the 2 IPs together and counting the
1262 * number of leading zeroes. The implementation means that all
1263 * addresses end up being 128 bits long.
1264 * Not static, so we can easily link it into a unit test.
1266 * FIXME? Should we consider IPv4 and IPv6 separately given that the
1267 * 12 bytes of 0 prefix padding will hurt the algorithm if there are
1268 * lots of nodes and IP addresses?
1270 uint32_t ip_distance(ctdb_sock_addr *ip1, ctdb_sock_addr *ip2)
1272 uint32_t ip1_k[IP_KEYLEN];
1277 uint32_t distance = 0;
1279 memcpy(ip1_k, ip_key(ip1), sizeof(ip1_k));
1281 for (i=0; i<IP_KEYLEN; i++) {
1282 x = ip1_k[i] ^ t[i];
1286 /* Count number of leading zeroes.
1287 * FIXME? This could be optimised...
1289 while ((x & (1 << 31)) == 0) {
1299 /* Calculate the IP distance for the given IP relative to IPs on the
1300 given node. The ips argument is generally the all_ips variable
1301 used in the main part of the algorithm.
1302 * Not static, so we can easily link it into a unit test.
1304 uint32_t ip_distance_2_sum(ctdb_sock_addr *ip,
1305 struct ctdb_public_ip_list *ips,
1308 struct ctdb_public_ip_list *t;
1313 for (t=ips; t != NULL; t=t->next) {
1314 if (t->pnn != pnn) {
1318 /* Optimisation: We never calculate the distance
1319 * between an address and itself. This allows us to
1320 * calculate the effect of removing an address from a
1321 * node by simply calculating the distance between
1322 * that address and all of the exitsing addresses.
1323 * Moreover, we assume that we're only ever dealing
1324 * with addresses from all_ips so we can identify an
1325 * address via a pointer rather than doing a more
1326 * expensive address comparison. */
1327 if (&(t->addr) == ip) {
1331 d = ip_distance(ip, &(t->addr));
1332 sum += d * d; /* Cheaper than pulling in math.h :-) */
1338 /* Return the LCP2 imbalance metric for addresses currently assigned
1340 * Not static, so we can easily link it into a unit test.
1342 uint32_t lcp2_imbalance(struct ctdb_public_ip_list * all_ips, int pnn)
1344 struct ctdb_public_ip_list *t;
1346 uint32_t imbalance = 0;
1348 for (t=all_ips; t!=NULL; t=t->next) {
1349 if (t->pnn != pnn) {
1352 /* Pass the rest of the IPs rather than the whole
1355 imbalance += ip_distance_2_sum(&(t->addr), t->next, pnn);
1361 /* Allocate any unassigned IPs just by looping through the IPs and
1362 * finding the best node for each.
1363 * Not static, so we can easily link it into a unit test.
1365 void basic_allocate_unassigned(struct ctdb_context *ctdb,
1366 struct ctdb_node_map *nodemap,
1368 struct ctdb_public_ip_list *all_ips)
1370 struct ctdb_public_ip_list *tmp_ip;
1372 /* loop over all ip's and find a physical node to cover for
1375 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1376 if (tmp_ip->pnn == -1) {
1377 if (find_takeover_node(ctdb, nodemap, mask, tmp_ip, all_ips)) {
1378 DEBUG(DEBUG_WARNING,("Failed to find node to cover ip %s\n",
1379 ctdb_addr_to_str(&tmp_ip->addr)));
1385 /* Basic non-deterministic rebalancing algorithm.
1386 * Not static, so we can easily link it into a unit test.
1388 bool basic_failback(struct ctdb_context *ctdb,
1389 struct ctdb_node_map *nodemap,
1391 struct ctdb_public_ip_list *all_ips,
1396 int maxnode, maxnum=0, minnode, minnum=0, num;
1397 struct ctdb_public_ip_list *tmp_ip;
1399 /* for each ip address, loop over all nodes that can serve
1400 this ip and make sure that the difference between the node
1401 serving the most and the node serving the least ip's are
1404 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1405 if (tmp_ip->pnn == -1) {
1409 /* Get the highest and lowest number of ips's served by any
1410 valid node which can serve this ip.
1414 for (i=0;i<nodemap->num;i++) {
1415 if (nodemap->nodes[i].flags & mask) {
1419 /* only check nodes that can actually serve this ip */
1420 if (can_node_serve_ip(ctdb, i, tmp_ip)) {
1421 /* no it couldnt so skip to the next node */
1425 num = node_ip_coverage(ctdb, i, all_ips);
1426 if (maxnode == -1) {
1435 if (minnode == -1) {
1445 if (maxnode == -1) {
1446 DEBUG(DEBUG_WARNING,(__location__ " Could not find maxnode. May not be able to serve ip '%s'\n",
1447 ctdb_addr_to_str(&tmp_ip->addr)));
1452 /* If we want deterministic IPs then dont try to reallocate
1453 them to spread out the load.
1455 if (1 == ctdb->tunable.deterministic_public_ips) {
1459 /* if the spread between the smallest and largest coverage by
1460 a node is >=2 we steal one of the ips from the node with
1461 most coverage to even things out a bit.
1462 try to do this a limited number of times since we dont
1463 want to spend too much time balancing the ip coverage.
1465 if ( (maxnum > minnum+1)
1466 && (*retries < (num_ips + 5)) ){
1467 struct ctdb_public_ip_list *tmp;
1469 /* mark one of maxnode's vnn's as unassigned and try
1472 for (tmp=all_ips;tmp;tmp=tmp->next) {
1473 if (tmp->pnn == maxnode) {
1485 /* Do necessary LCP2 initialisation. Bury it in a function here so
1486 * that we can unit test it.
1487 * Not static, so we can easily link it into a unit test.
1489 void lcp2_init(struct ctdb_context * tmp_ctx,
1490 struct ctdb_node_map * nodemap,
1492 struct ctdb_public_ip_list *all_ips,
1493 uint32_t **lcp2_imbalances,
1494 bool **newly_healthy)
1497 struct ctdb_public_ip_list *tmp_ip;
1499 *newly_healthy = talloc_array(tmp_ctx, bool, nodemap->num);
1500 CTDB_NO_MEMORY_FATAL(tmp_ctx, *newly_healthy);
1501 *lcp2_imbalances = talloc_array(tmp_ctx, uint32_t, nodemap->num);
1502 CTDB_NO_MEMORY_FATAL(tmp_ctx, *lcp2_imbalances);
1504 for (i=0;i<nodemap->num;i++) {
1505 (*lcp2_imbalances)[i] = lcp2_imbalance(all_ips, i);
1506 /* First step: is the node "healthy"? */
1507 (*newly_healthy)[i] = ! (bool)(nodemap->nodes[i].flags & mask);
1510 /* 2nd step: if a ndoe has IPs assigned then it must have been
1511 * healthy before, so we remove it from consideration... */
1512 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1513 if (tmp_ip->pnn != -1) {
1514 (*newly_healthy)[tmp_ip->pnn] = false;
1519 /* Allocate any unassigned addresses using the LCP2 algorithm to find
1520 * the IP/node combination that will cost the least.
1521 * Not static, so we can easily link it into a unit test.
1523 void lcp2_allocate_unassigned(struct ctdb_context *ctdb,
1524 struct ctdb_node_map *nodemap,
1526 struct ctdb_public_ip_list *all_ips,
1527 uint32_t *lcp2_imbalances)
1529 struct ctdb_public_ip_list *tmp_ip;
1533 uint32_t mindsum, dstdsum, dstimbl, minimbl;
1534 struct ctdb_public_ip_list *minip;
1536 bool should_loop = true;
1537 bool have_unassigned = true;
1539 while (have_unassigned && should_loop) {
1540 should_loop = false;
1542 DEBUG(DEBUG_DEBUG,(" ----------------------------------------\n"));
1543 DEBUG(DEBUG_DEBUG,(" CONSIDERING MOVES (UNASSIGNED)\n"));
1549 /* loop over each unassigned ip. */
1550 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1551 if (tmp_ip->pnn != -1) {
1555 for (dstnode=0; dstnode < nodemap->num; dstnode++) {
1556 /* only check nodes that can actually serve this ip */
1557 if (can_node_serve_ip(ctdb, dstnode, tmp_ip)) {
1558 /* no it couldnt so skip to the next node */
1561 if (nodemap->nodes[dstnode].flags & mask) {
1565 dstdsum = ip_distance_2_sum(&(tmp_ip->addr), all_ips, dstnode);
1566 dstimbl = lcp2_imbalances[dstnode] + dstdsum;
1567 DEBUG(DEBUG_DEBUG,(" %s -> %d [+%d]\n",
1568 ctdb_addr_to_str(&(tmp_ip->addr)),
1570 dstimbl - lcp2_imbalances[dstnode]));
1573 if ((minnode == -1) || (dstdsum < mindsum)) {
1583 DEBUG(DEBUG_DEBUG,(" ----------------------------------------\n"));
1585 /* If we found one then assign it to the given node. */
1586 if (minnode != -1) {
1587 minip->pnn = minnode;
1588 lcp2_imbalances[minnode] = minimbl;
1589 DEBUG(DEBUG_INFO,(" %s -> %d [+%d]\n",
1590 ctdb_addr_to_str(&(minip->addr)),
1595 /* There might be a better way but at least this is clear. */
1596 have_unassigned = false;
1597 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1598 if (tmp_ip->pnn == -1) {
1599 have_unassigned = true;
1604 /* We know if we have an unassigned addresses so we might as
1607 if (have_unassigned) {
1608 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1609 if (tmp_ip->pnn == -1) {
1610 DEBUG(DEBUG_WARNING,("Failed to find node to cover ip %s\n",
1611 ctdb_addr_to_str(&tmp_ip->addr)));
1617 /* LCP2 algorithm for rebalancing the cluster. This finds the source
1618 * node with the highest LCP2 imbalance, and then determines the best
1619 * IP/destination node combination to move from the source node.
1621 * Not static, so we can easily link it into a unit test.
1623 bool lcp2_failback(struct ctdb_context *ctdb,
1624 struct ctdb_node_map *nodemap,
1626 struct ctdb_public_ip_list *all_ips,
1627 uint32_t *lcp2_imbalances,
1628 bool *newly_healthy)
1630 int srcnode, dstnode, mindstnode, i, num_newly_healthy;
1631 uint32_t srcimbl, srcdsum, maximbl, dstimbl, dstdsum;
1632 uint32_t minsrcimbl, mindstimbl, b;
1633 struct ctdb_public_ip_list *minip;
1634 struct ctdb_public_ip_list *tmp_ip;
1636 /* It is only worth continuing if we have suitable target
1637 * nodes to transfer IPs to. This check is much cheaper than
1640 num_newly_healthy = 0;
1641 for (i = 0; i < nodemap->num; i++) {
1642 if (newly_healthy[i]) {
1643 num_newly_healthy++;
1646 if (num_newly_healthy == 0) {
1650 /* Get the node with the highest imbalance metric. */
1653 for (i=0; i < nodemap->num; i++) {
1654 b = lcp2_imbalances[i];
1655 if ((srcnode == -1) || (b > maximbl)) {
1661 /* This means that all nodes had 0 or 1 addresses, so can't be
1668 /* Find an IP and destination node that best reduces imbalance. */
1674 DEBUG(DEBUG_DEBUG,(" ----------------------------------------\n"));
1675 DEBUG(DEBUG_DEBUG,(" CONSIDERING MOVES FROM %d [%d]\n", srcnode, maximbl));
1677 for (tmp_ip=all_ips; tmp_ip; tmp_ip=tmp_ip->next) {
1678 /* Only consider addresses on srcnode. */
1679 if (tmp_ip->pnn != srcnode) {
1683 /* What is this IP address costing the source node? */
1684 srcdsum = ip_distance_2_sum(&(tmp_ip->addr), all_ips, srcnode);
1685 srcimbl = maximbl - srcdsum;
1687 /* Consider this IP address would cost each potential
1688 * destination node. Destination nodes are limited to
1689 * those that are newly healthy, since we don't want
1690 * to do gratuitous failover of IPs just to make minor
1691 * balance improvements.
1693 for (dstnode=0; dstnode < nodemap->num; dstnode++) {
1694 if (! newly_healthy[dstnode]) {
1697 /* only check nodes that can actually serve this ip */
1698 if (can_node_serve_ip(ctdb, dstnode, tmp_ip)) {
1699 /* no it couldnt so skip to the next node */
1703 dstdsum = ip_distance_2_sum(&(tmp_ip->addr), all_ips, dstnode);
1704 dstimbl = lcp2_imbalances[dstnode] + dstdsum;
1705 DEBUG(DEBUG_DEBUG,(" %d [%d] -> %s -> %d [+%d]\n",
1706 srcnode, srcimbl - lcp2_imbalances[srcnode],
1707 ctdb_addr_to_str(&(tmp_ip->addr)),
1708 dstnode, dstimbl - lcp2_imbalances[dstnode]));
1710 if ((dstimbl < maximbl) && (dstdsum < srcdsum) && \
1711 ((mindstnode == -1) || \
1712 ((srcimbl + dstimbl) < (minsrcimbl + mindstimbl)))) {
1715 minsrcimbl = srcimbl;
1716 mindstnode = dstnode;
1717 mindstimbl = dstimbl;
1721 DEBUG(DEBUG_DEBUG,(" ----------------------------------------\n"));
1723 if (mindstnode != -1) {
1724 /* We found a move that makes things better... */
1725 DEBUG(DEBUG_INFO,("%d [%d] -> %s -> %d [+%d]\n",
1726 srcnode, minsrcimbl - lcp2_imbalances[srcnode],
1727 ctdb_addr_to_str(&(minip->addr)),
1728 mindstnode, mindstimbl - lcp2_imbalances[mindstnode]));
1731 lcp2_imbalances[srcnode] = srcimbl;
1732 lcp2_imbalances[mindstnode] = mindstimbl;
1733 minip->pnn = mindstnode;
1742 /* The calculation part of the IP allocation algorithm.
1743 * Not static, so we can easily link it into a unit test.
1745 void ctdb_takeover_run_core(struct ctdb_context *ctdb,
1746 struct ctdb_node_map *nodemap,
1747 struct ctdb_public_ip_list **all_ips_p)
1749 int i, num_healthy, retries, num_ips;
1751 struct ctdb_public_ip_list *all_ips, *tmp_ip;
1752 uint32_t *lcp2_imbalances;
1753 bool *newly_healthy;
1755 TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
1757 /* Count how many completely healthy nodes we have */
1759 for (i=0;i<nodemap->num;i++) {
1760 if (!(nodemap->nodes[i].flags & (NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED))) {
1765 if (num_healthy > 0) {
1766 /* We have healthy nodes, so only consider them for
1767 serving public addresses
1769 mask = NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED;
1771 /* We didnt have any completely healthy nodes so
1772 use "disabled" nodes as a fallback
1774 mask = NODE_FLAGS_INACTIVE;
1777 /* since nodes only know about those public addresses that
1778 can be served by that particular node, no single node has
1779 a full list of all public addresses that exist in the cluster.
1780 Walk over all node structures and create a merged list of
1781 all public addresses that exist in the cluster.
1783 keep the tree of ips around as ctdb->ip_tree
1785 all_ips = create_merged_ip_list(ctdb);
1786 *all_ips_p = all_ips; /* minimal code changes */
1788 /* Count how many ips we have */
1790 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1794 /* If we want deterministic ip allocations, i.e. that the ip addresses
1795 will always be allocated the same way for a specific set of
1796 available/unavailable nodes.
1798 if (1 == ctdb->tunable.deterministic_public_ips) {
1799 DEBUG(DEBUG_NOTICE,("Deterministic IPs enabled. Resetting all ip allocations\n"));
1800 for (i=0,tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next,i++) {
1801 tmp_ip->pnn = i%nodemap->num;
1806 /* mark all public addresses with a masked node as being served by
1809 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1810 if (tmp_ip->pnn == -1) {
1813 if (nodemap->nodes[tmp_ip->pnn].flags & mask) {
1818 /* verify that the assigned nodes can serve that public ip
1819 and set it to -1 if not
1821 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1822 if (tmp_ip->pnn == -1) {
1825 if (can_node_serve_ip(ctdb, tmp_ip->pnn, tmp_ip) != 0) {
1826 /* this node can not serve this ip. */
1831 if (1 == ctdb->tunable.lcp2_public_ip_assignment) {
1832 lcp2_init(tmp_ctx, nodemap, mask, all_ips, &lcp2_imbalances, &newly_healthy);
1835 /* now we must redistribute all public addresses with takeover node
1836 -1 among the nodes available
1840 if (1 == ctdb->tunable.lcp2_public_ip_assignment) {
1841 lcp2_allocate_unassigned(ctdb, nodemap, mask, all_ips, lcp2_imbalances);
1843 basic_allocate_unassigned(ctdb, nodemap, mask, all_ips);
1846 /* If we dont want ips to fail back after a node becomes healthy
1847 again, we wont even try to reallocat the ip addresses so that
1848 they are evenly spread out.
1849 This can NOT be used at the same time as DeterministicIPs !
1851 if (1 == ctdb->tunable.no_ip_failback) {
1852 if (1 == ctdb->tunable.deterministic_public_ips) {
1853 DEBUG(DEBUG_ERR, ("ERROR: You can not use 'DeterministicIPs' and 'NoIPFailback' at the same time\n"));
1859 /* now, try to make sure the ip adresses are evenly distributed
1862 if (1 == ctdb->tunable.lcp2_public_ip_assignment) {
1863 if (lcp2_failback(ctdb, nodemap, mask, all_ips, lcp2_imbalances, newly_healthy)) {
1867 if (basic_failback(ctdb, nodemap, mask, all_ips, num_ips, &retries)) {
1872 /* finished distributing the public addresses, now just send the
1873 info out to the nodes
1877 /* at this point ->pnn is the node which will own each IP
1878 or -1 if there is no node that can cover this ip
1885 make any IP alias changes for public addresses that are necessary
1887 int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap)
1890 struct ctdb_public_ip ip;
1891 struct ctdb_public_ipv4 ipv4;
1893 struct ctdb_public_ip_list *all_ips, *tmp_ip;
1895 struct timeval timeout;
1896 struct client_async_data *async_data;
1897 struct ctdb_client_control_state *state;
1898 TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
1901 * ip failover is completely disabled, just send out the
1902 * ipreallocated event.
1904 if (ctdb->tunable.disable_ip_failover != 0) {
1910 /* Do the IP reassignment calculations */
1911 ctdb_takeover_run_core(ctdb, nodemap, &all_ips);
1913 /* now tell all nodes to delete any alias that they should not
1914 have. This will be a NOOP on nodes that don't currently
1915 hold the given alias */
1916 async_data = talloc_zero(tmp_ctx, struct client_async_data);
1917 CTDB_NO_MEMORY_FATAL(ctdb, async_data);
1919 for (i=0;i<nodemap->num;i++) {
1920 /* don't talk to unconnected nodes, but do talk to banned nodes */
1921 if (nodemap->nodes[i].flags & NODE_FLAGS_DISCONNECTED) {
1925 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1926 if (tmp_ip->pnn == nodemap->nodes[i].pnn) {
1927 /* This node should be serving this
1928 vnn so dont tell it to release the ip
1932 if (tmp_ip->addr.sa.sa_family == AF_INET) {
1933 ipv4.pnn = tmp_ip->pnn;
1934 ipv4.sin = tmp_ip->addr.ip;
1936 timeout = TAKEOVER_TIMEOUT();
1937 data.dsize = sizeof(ipv4);
1938 data.dptr = (uint8_t *)&ipv4;
1939 state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
1940 0, CTDB_CONTROL_RELEASE_IPv4, 0,
1944 ip.pnn = tmp_ip->pnn;
1945 ip.addr = tmp_ip->addr;
1947 timeout = TAKEOVER_TIMEOUT();
1948 data.dsize = sizeof(ip);
1949 data.dptr = (uint8_t *)&ip;
1950 state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
1951 0, CTDB_CONTROL_RELEASE_IP, 0,
1956 if (state == NULL) {
1957 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_RELEASE_IP to node %u\n", nodemap->nodes[i].pnn));
1958 talloc_free(tmp_ctx);
1962 ctdb_client_async_add(async_data, state);
1965 if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1966 DEBUG(DEBUG_ERR,(__location__ " Async control CTDB_CONTROL_RELEASE_IP failed\n"));
1967 talloc_free(tmp_ctx);
1970 talloc_free(async_data);
1973 /* tell all nodes to get their own IPs */
1974 async_data = talloc_zero(tmp_ctx, struct client_async_data);
1975 CTDB_NO_MEMORY_FATAL(ctdb, async_data);
1976 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1977 if (tmp_ip->pnn == -1) {
1978 /* this IP won't be taken over */
1982 if (tmp_ip->addr.sa.sa_family == AF_INET) {
1983 ipv4.pnn = tmp_ip->pnn;
1984 ipv4.sin = tmp_ip->addr.ip;
1986 timeout = TAKEOVER_TIMEOUT();
1987 data.dsize = sizeof(ipv4);
1988 data.dptr = (uint8_t *)&ipv4;
1989 state = ctdb_control_send(ctdb, tmp_ip->pnn,
1990 0, CTDB_CONTROL_TAKEOVER_IPv4, 0,
1994 ip.pnn = tmp_ip->pnn;
1995 ip.addr = tmp_ip->addr;
1997 timeout = TAKEOVER_TIMEOUT();
1998 data.dsize = sizeof(ip);
1999 data.dptr = (uint8_t *)&ip;
2000 state = ctdb_control_send(ctdb, tmp_ip->pnn,
2001 0, CTDB_CONTROL_TAKEOVER_IP, 0,
2005 if (state == NULL) {
2006 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_TAKEOVER_IP to node %u\n", tmp_ip->pnn));
2007 talloc_free(tmp_ctx);
2011 ctdb_client_async_add(async_data, state);
2013 if (ctdb_client_async_wait(ctdb, async_data) != 0) {
2014 DEBUG(DEBUG_ERR,(__location__ " Async control CTDB_CONTROL_TAKEOVER_IP failed\n"));
2015 talloc_free(tmp_ctx);
2020 /* tell all nodes to update natwg */
2021 /* send the flags update natgw on all connected nodes */
2022 data.dptr = discard_const("ipreallocated");
2023 data.dsize = strlen((char *)data.dptr) + 1;
2024 nodes = list_of_connected_nodes(ctdb, nodemap, tmp_ctx, true);
2025 if (ctdb_client_async_control(ctdb, CTDB_CONTROL_RUN_EVENTSCRIPTS,
2026 nodes, 0, TAKEOVER_TIMEOUT(),
2030 DEBUG(DEBUG_ERR, (__location__ " ctdb_control to updatenatgw failed\n"));
2033 talloc_free(tmp_ctx);
2039 destroy a ctdb_client_ip structure
2041 static int ctdb_client_ip_destructor(struct ctdb_client_ip *ip)
2043 DEBUG(DEBUG_DEBUG,("destroying client tcp for %s:%u (client_id %u)\n",
2044 ctdb_addr_to_str(&ip->addr),
2045 ntohs(ip->addr.ip.sin_port),
2048 DLIST_REMOVE(ip->ctdb->client_ip_list, ip);
2053 called by a client to inform us of a TCP connection that it is managing
2054 that should tickled with an ACK when IP takeover is done
2055 we handle both the old ipv4 style of packets as well as the new ipv4/6
2058 int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id,
2061 struct ctdb_client *client = ctdb_reqid_find(ctdb, client_id, struct ctdb_client);
2062 struct ctdb_control_tcp *old_addr = NULL;
2063 struct ctdb_control_tcp_addr new_addr;
2064 struct ctdb_control_tcp_addr *tcp_sock = NULL;
2065 struct ctdb_tcp_list *tcp;
2066 struct ctdb_tcp_connection t;
2069 struct ctdb_client_ip *ip;
2070 struct ctdb_vnn *vnn;
2071 ctdb_sock_addr addr;
2073 switch (indata.dsize) {
2074 case sizeof(struct ctdb_control_tcp):
2075 old_addr = (struct ctdb_control_tcp *)indata.dptr;
2076 ZERO_STRUCT(new_addr);
2077 tcp_sock = &new_addr;
2078 tcp_sock->src.ip = old_addr->src;
2079 tcp_sock->dest.ip = old_addr->dest;
2081 case sizeof(struct ctdb_control_tcp_addr):
2082 tcp_sock = (struct ctdb_control_tcp_addr *)indata.dptr;
2085 DEBUG(DEBUG_ERR,(__location__ " Invalid data structure passed "
2086 "to ctdb_control_tcp_client. size was %d but "
2087 "only allowed sizes are %lu and %lu\n",
2089 (long unsigned)sizeof(struct ctdb_control_tcp),
2090 (long unsigned)sizeof(struct ctdb_control_tcp_addr)));
2094 addr = tcp_sock->src;
2095 ctdb_canonicalize_ip(&addr, &tcp_sock->src);
2096 addr = tcp_sock->dest;
2097 ctdb_canonicalize_ip(&addr, &tcp_sock->dest);
2100 memcpy(&addr, &tcp_sock->dest, sizeof(addr));
2101 vnn = find_public_ip_vnn(ctdb, &addr);
2103 switch (addr.sa.sa_family) {
2105 if (ntohl(addr.ip.sin_addr.s_addr) != INADDR_LOOPBACK) {
2106 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public address.\n",
2107 ctdb_addr_to_str(&addr)));
2111 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public ipv6 address.\n",
2112 ctdb_addr_to_str(&addr)));
2115 DEBUG(DEBUG_ERR,(__location__ " Unknown family type %d\n", addr.sa.sa_family));
2121 if (vnn->pnn != ctdb->pnn) {
2122 DEBUG(DEBUG_ERR,("Attempt to register tcp client for IP %s we don't hold - failing (client_id %u pid %u)\n",
2123 ctdb_addr_to_str(&addr),
2124 client_id, client->pid));
2125 /* failing this call will tell smbd to die */
2129 ip = talloc(client, struct ctdb_client_ip);
2130 CTDB_NO_MEMORY(ctdb, ip);
2134 ip->client_id = client_id;
2135 talloc_set_destructor(ip, ctdb_client_ip_destructor);
2136 DLIST_ADD(ctdb->client_ip_list, ip);
2138 tcp = talloc(client, struct ctdb_tcp_list);
2139 CTDB_NO_MEMORY(ctdb, tcp);
2141 tcp->connection.src_addr = tcp_sock->src;
2142 tcp->connection.dst_addr = tcp_sock->dest;
2144 DLIST_ADD(client->tcp_list, tcp);
2146 t.src_addr = tcp_sock->src;
2147 t.dst_addr = tcp_sock->dest;
2149 data.dptr = (uint8_t *)&t;
2150 data.dsize = sizeof(t);
2152 switch (addr.sa.sa_family) {
2154 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
2155 (unsigned)ntohs(tcp_sock->dest.ip.sin_port),
2156 ctdb_addr_to_str(&tcp_sock->src),
2157 (unsigned)ntohs(tcp_sock->src.ip.sin_port), client_id, client->pid));
2160 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
2161 (unsigned)ntohs(tcp_sock->dest.ip6.sin6_port),
2162 ctdb_addr_to_str(&tcp_sock->src),
2163 (unsigned)ntohs(tcp_sock->src.ip6.sin6_port), client_id, client->pid));
2166 DEBUG(DEBUG_ERR,(__location__ " Unknown family %d\n", addr.sa.sa_family));
2170 /* tell all nodes about this tcp connection */
2171 ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0,
2172 CTDB_CONTROL_TCP_ADD,
2173 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
2175 DEBUG(DEBUG_ERR,(__location__ " Failed to send CTDB_CONTROL_TCP_ADD\n"));
2183 find a tcp address on a list
2185 static struct ctdb_tcp_connection *ctdb_tcp_find(struct ctdb_tcp_array *array,
2186 struct ctdb_tcp_connection *tcp)
2190 if (array == NULL) {
2194 for (i=0;i<array->num;i++) {
2195 if (ctdb_same_sockaddr(&array->connections[i].src_addr, &tcp->src_addr) &&
2196 ctdb_same_sockaddr(&array->connections[i].dst_addr, &tcp->dst_addr)) {
2197 return &array->connections[i];
2206 called by a daemon to inform us of a TCP connection that one of its
2207 clients managing that should tickled with an ACK when IP takeover is
2210 int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata, bool tcp_update_needed)
2212 struct ctdb_tcp_connection *p = (struct ctdb_tcp_connection *)indata.dptr;
2213 struct ctdb_tcp_array *tcparray;
2214 struct ctdb_tcp_connection tcp;
2215 struct ctdb_vnn *vnn;
2217 vnn = find_public_ip_vnn(ctdb, &p->dst_addr);
2219 DEBUG(DEBUG_INFO,(__location__ " got TCP_ADD control for an address which is not a public address '%s'\n",
2220 ctdb_addr_to_str(&p->dst_addr)));
2226 tcparray = vnn->tcp_array;
2228 /* If this is the first tickle */
2229 if (tcparray == NULL) {
2230 tcparray = talloc_size(ctdb->nodes,
2231 offsetof(struct ctdb_tcp_array, connections) +
2232 sizeof(struct ctdb_tcp_connection) * 1);
2233 CTDB_NO_MEMORY(ctdb, tcparray);
2234 vnn->tcp_array = tcparray;
2237 tcparray->connections = talloc_size(tcparray, sizeof(struct ctdb_tcp_connection));
2238 CTDB_NO_MEMORY(ctdb, tcparray->connections);
2240 tcparray->connections[tcparray->num].src_addr = p->src_addr;
2241 tcparray->connections[tcparray->num].dst_addr = p->dst_addr;
2244 if (tcp_update_needed) {
2245 vnn->tcp_update_needed = true;
2251 /* Do we already have this tickle ?*/
2252 tcp.src_addr = p->src_addr;
2253 tcp.dst_addr = p->dst_addr;
2254 if (ctdb_tcp_find(vnn->tcp_array, &tcp) != NULL) {
2255 DEBUG(DEBUG_DEBUG,("Already had tickle info for %s:%u for vnn:%u\n",
2256 ctdb_addr_to_str(&tcp.dst_addr),
2257 ntohs(tcp.dst_addr.ip.sin_port),
2262 /* A new tickle, we must add it to the array */
2263 tcparray->connections = talloc_realloc(tcparray, tcparray->connections,
2264 struct ctdb_tcp_connection,
2266 CTDB_NO_MEMORY(ctdb, tcparray->connections);
2268 vnn->tcp_array = tcparray;
2269 tcparray->connections[tcparray->num].src_addr = p->src_addr;
2270 tcparray->connections[tcparray->num].dst_addr = p->dst_addr;
2273 DEBUG(DEBUG_INFO,("Added tickle info for %s:%u from vnn %u\n",
2274 ctdb_addr_to_str(&tcp.dst_addr),
2275 ntohs(tcp.dst_addr.ip.sin_port),
2278 if (tcp_update_needed) {
2279 vnn->tcp_update_needed = true;
2287 called by a daemon to inform us of a TCP connection that one of its
2288 clients managing that should tickled with an ACK when IP takeover is
2291 static void ctdb_remove_tcp_connection(struct ctdb_context *ctdb, struct ctdb_tcp_connection *conn)
2293 struct ctdb_tcp_connection *tcpp;
2294 struct ctdb_vnn *vnn = find_public_ip_vnn(ctdb, &conn->dst_addr);
2297 DEBUG(DEBUG_ERR,(__location__ " unable to find public address %s\n",
2298 ctdb_addr_to_str(&conn->dst_addr)));
2302 /* if the array is empty we cant remove it
2303 and we dont need to do anything
2305 if (vnn->tcp_array == NULL) {
2306 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist (array is empty) %s:%u\n",
2307 ctdb_addr_to_str(&conn->dst_addr),
2308 ntohs(conn->dst_addr.ip.sin_port)));
2313 /* See if we know this connection
2314 if we dont know this connection then we dont need to do anything
2316 tcpp = ctdb_tcp_find(vnn->tcp_array, conn);
2318 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist %s:%u\n",
2319 ctdb_addr_to_str(&conn->dst_addr),
2320 ntohs(conn->dst_addr.ip.sin_port)));
2325 /* We need to remove this entry from the array.
2326 Instead of allocating a new array and copying data to it
2327 we cheat and just copy the last entry in the existing array
2328 to the entry that is to be removed and just shring the
2331 *tcpp = vnn->tcp_array->connections[vnn->tcp_array->num - 1];
2332 vnn->tcp_array->num--;
2334 /* If we deleted the last entry we also need to remove the entire array
2336 if (vnn->tcp_array->num == 0) {
2337 talloc_free(vnn->tcp_array);
2338 vnn->tcp_array = NULL;
2341 vnn->tcp_update_needed = true;
2343 DEBUG(DEBUG_INFO,("Removed tickle info for %s:%u\n",
2344 ctdb_addr_to_str(&conn->src_addr),
2345 ntohs(conn->src_addr.ip.sin_port)));
2350 called by a daemon to inform us of a TCP connection that one of its
2351 clients used are no longer needed in the tickle database
2353 int32_t ctdb_control_tcp_remove(struct ctdb_context *ctdb, TDB_DATA indata)
2355 struct ctdb_tcp_connection *conn = (struct ctdb_tcp_connection *)indata.dptr;
2357 ctdb_remove_tcp_connection(ctdb, conn);
2364 called when a daemon restarts - send all tickes for all public addresses
2365 we are serving immediately to the new node.
2367 int32_t ctdb_control_startup(struct ctdb_context *ctdb, uint32_t vnn)
2369 /*XXX here we should send all tickes we are serving to the new node */
2375 called when a client structure goes away - hook to remove
2376 elements from the tcp_list in all daemons
2378 void ctdb_takeover_client_destructor_hook(struct ctdb_client *client)
2380 while (client->tcp_list) {
2381 struct ctdb_tcp_list *tcp = client->tcp_list;
2382 DLIST_REMOVE(client->tcp_list, tcp);
2383 ctdb_remove_tcp_connection(client->ctdb, &tcp->connection);
2389 release all IPs on shutdown
2391 void ctdb_release_all_ips(struct ctdb_context *ctdb)
2393 struct ctdb_vnn *vnn;
2395 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2396 if (!ctdb_sys_have_ip(&vnn->public_address)) {
2397 ctdb_vnn_unassign_iface(ctdb, vnn);
2403 ctdb_event_script_args(ctdb, CTDB_EVENT_RELEASE_IP, "%s %s %u",
2404 ctdb_vnn_iface_string(vnn),
2405 ctdb_addr_to_str(&vnn->public_address),
2406 vnn->public_netmask_bits);
2407 release_kill_clients(ctdb, &vnn->public_address);
2408 ctdb_vnn_unassign_iface(ctdb, vnn);
2414 get list of public IPs
2416 int32_t ctdb_control_get_public_ips(struct ctdb_context *ctdb,
2417 struct ctdb_req_control *c, TDB_DATA *outdata)
2420 struct ctdb_all_public_ips *ips;
2421 struct ctdb_vnn *vnn;
2422 bool only_available = false;
2424 if (c->flags & CTDB_PUBLIC_IP_FLAGS_ONLY_AVAILABLE) {
2425 only_available = true;
2428 /* count how many public ip structures we have */
2430 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2434 len = offsetof(struct ctdb_all_public_ips, ips) +
2435 num*sizeof(struct ctdb_public_ip);
2436 ips = talloc_zero_size(outdata, len);
2437 CTDB_NO_MEMORY(ctdb, ips);
2440 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2441 if (only_available && !ctdb_vnn_available(ctdb, vnn)) {
2444 ips->ips[i].pnn = vnn->pnn;
2445 ips->ips[i].addr = vnn->public_address;
2449 len = offsetof(struct ctdb_all_public_ips, ips) +
2450 i*sizeof(struct ctdb_public_ip);
2452 outdata->dsize = len;
2453 outdata->dptr = (uint8_t *)ips;
2460 get list of public IPs, old ipv4 style. only returns ipv4 addresses
2462 int32_t ctdb_control_get_public_ipsv4(struct ctdb_context *ctdb,
2463 struct ctdb_req_control *c, TDB_DATA *outdata)
2466 struct ctdb_all_public_ipsv4 *ips;
2467 struct ctdb_vnn *vnn;
2469 /* count how many public ip structures we have */
2471 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2472 if (vnn->public_address.sa.sa_family != AF_INET) {
2478 len = offsetof(struct ctdb_all_public_ipsv4, ips) +
2479 num*sizeof(struct ctdb_public_ipv4);
2480 ips = talloc_zero_size(outdata, len);
2481 CTDB_NO_MEMORY(ctdb, ips);
2483 outdata->dsize = len;
2484 outdata->dptr = (uint8_t *)ips;
2488 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2489 if (vnn->public_address.sa.sa_family != AF_INET) {
2492 ips->ips[i].pnn = vnn->pnn;
2493 ips->ips[i].sin = vnn->public_address.ip;
2500 int32_t ctdb_control_get_public_ip_info(struct ctdb_context *ctdb,
2501 struct ctdb_req_control *c,
2506 ctdb_sock_addr *addr;
2507 struct ctdb_control_public_ip_info *info;
2508 struct ctdb_vnn *vnn;
2510 addr = (ctdb_sock_addr *)indata.dptr;
2512 vnn = find_public_ip_vnn(ctdb, addr);
2514 /* if it is not a public ip it could be our 'single ip' */
2515 if (ctdb->single_ip_vnn) {
2516 if (ctdb_same_ip(&ctdb->single_ip_vnn->public_address, addr)) {
2517 vnn = ctdb->single_ip_vnn;
2522 DEBUG(DEBUG_ERR,(__location__ " Could not get public ip info, "
2523 "'%s'not a public address\n",
2524 ctdb_addr_to_str(addr)));
2528 /* count how many public ip structures we have */
2530 for (;vnn->ifaces[num];) {
2534 len = offsetof(struct ctdb_control_public_ip_info, ifaces) +
2535 num*sizeof(struct ctdb_control_iface_info);
2536 info = talloc_zero_size(outdata, len);
2537 CTDB_NO_MEMORY(ctdb, info);
2539 info->ip.addr = vnn->public_address;
2540 info->ip.pnn = vnn->pnn;
2541 info->active_idx = 0xFFFFFFFF;
2543 for (i=0; vnn->ifaces[i]; i++) {
2544 struct ctdb_iface *cur;
2546 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
2548 DEBUG(DEBUG_CRIT, (__location__ " internal error iface[%s] unknown\n",
2552 if (vnn->iface == cur) {
2553 info->active_idx = i;
2555 strcpy(info->ifaces[i].name, cur->name);
2556 info->ifaces[i].link_state = cur->link_up;
2557 info->ifaces[i].references = cur->references;
2560 len = offsetof(struct ctdb_control_public_ip_info, ifaces) +
2561 i*sizeof(struct ctdb_control_iface_info);
2563 outdata->dsize = len;
2564 outdata->dptr = (uint8_t *)info;
2569 int32_t ctdb_control_get_ifaces(struct ctdb_context *ctdb,
2570 struct ctdb_req_control *c,
2574 struct ctdb_control_get_ifaces *ifaces;
2575 struct ctdb_iface *cur;
2577 /* count how many public ip structures we have */
2579 for (cur=ctdb->ifaces;cur;cur=cur->next) {
2583 len = offsetof(struct ctdb_control_get_ifaces, ifaces) +
2584 num*sizeof(struct ctdb_control_iface_info);
2585 ifaces = talloc_zero_size(outdata, len);
2586 CTDB_NO_MEMORY(ctdb, ifaces);
2589 for (cur=ctdb->ifaces;cur;cur=cur->next) {
2590 strcpy(ifaces->ifaces[i].name, cur->name);
2591 ifaces->ifaces[i].link_state = cur->link_up;
2592 ifaces->ifaces[i].references = cur->references;
2596 len = offsetof(struct ctdb_control_get_ifaces, ifaces) +
2597 i*sizeof(struct ctdb_control_iface_info);
2599 outdata->dsize = len;
2600 outdata->dptr = (uint8_t *)ifaces;
2605 int32_t ctdb_control_set_iface_link(struct ctdb_context *ctdb,
2606 struct ctdb_req_control *c,
2609 struct ctdb_control_iface_info *info;
2610 struct ctdb_iface *iface;
2611 bool link_up = false;
2613 info = (struct ctdb_control_iface_info *)indata.dptr;
2615 if (info->name[CTDB_IFACE_SIZE] != '\0') {
2616 int len = strnlen(info->name, CTDB_IFACE_SIZE);
2617 DEBUG(DEBUG_ERR, (__location__ " name[%*.*s] not terminated\n",
2618 len, len, info->name));
2622 switch (info->link_state) {
2630 DEBUG(DEBUG_ERR, (__location__ " link_state[%u] invalid\n",
2631 (unsigned int)info->link_state));
2635 if (info->references != 0) {
2636 DEBUG(DEBUG_ERR, (__location__ " references[%u] should be 0\n",
2637 (unsigned int)info->references));
2641 iface = ctdb_find_iface(ctdb, info->name);
2642 if (iface == NULL) {
2643 DEBUG(DEBUG_ERR, (__location__ "iface[%s] is unknown\n",
2648 if (link_up == iface->link_up) {
2652 DEBUG(iface->link_up?DEBUG_ERR:DEBUG_NOTICE,
2653 ("iface[%s] has changed it's link status %s => %s\n",
2655 iface->link_up?"up":"down",
2656 link_up?"up":"down"));
2658 iface->link_up = link_up;
2664 structure containing the listening socket and the list of tcp connections
2665 that the ctdb daemon is to kill
2667 struct ctdb_kill_tcp {
2668 struct ctdb_vnn *vnn;
2669 struct ctdb_context *ctdb;
2671 struct fd_event *fde;
2672 trbt_tree_t *connections;
2677 a tcp connection that is to be killed
2679 struct ctdb_killtcp_con {
2680 ctdb_sock_addr src_addr;
2681 ctdb_sock_addr dst_addr;
2683 struct ctdb_kill_tcp *killtcp;
2686 /* this function is used to create a key to represent this socketpair
2687 in the killtcp tree.
2688 this key is used to insert and lookup matching socketpairs that are
2689 to be tickled and RST
2691 #define KILLTCP_KEYLEN 10
2692 static uint32_t *killtcp_key(ctdb_sock_addr *src, ctdb_sock_addr *dst)
2694 static uint32_t key[KILLTCP_KEYLEN];
2696 bzero(key, sizeof(key));
2698 if (src->sa.sa_family != dst->sa.sa_family) {
2699 DEBUG(DEBUG_ERR, (__location__ " ERROR, different families passed :%u vs %u\n", src->sa.sa_family, dst->sa.sa_family));
2703 switch (src->sa.sa_family) {
2705 key[0] = dst->ip.sin_addr.s_addr;
2706 key[1] = src->ip.sin_addr.s_addr;
2707 key[2] = dst->ip.sin_port;
2708 key[3] = src->ip.sin_port;
2711 key[0] = dst->ip6.sin6_addr.s6_addr32[3];
2712 key[1] = src->ip6.sin6_addr.s6_addr32[3];
2713 key[2] = dst->ip6.sin6_addr.s6_addr32[2];
2714 key[3] = src->ip6.sin6_addr.s6_addr32[2];
2715 key[4] = dst->ip6.sin6_addr.s6_addr32[1];
2716 key[5] = src->ip6.sin6_addr.s6_addr32[1];
2717 key[6] = dst->ip6.sin6_addr.s6_addr32[0];
2718 key[7] = src->ip6.sin6_addr.s6_addr32[0];
2719 key[8] = dst->ip6.sin6_port;
2720 key[9] = src->ip6.sin6_port;
2723 DEBUG(DEBUG_ERR, (__location__ " ERROR, unknown family passed :%u\n", src->sa.sa_family));
2731 called when we get a read event on the raw socket
2733 static void capture_tcp_handler(struct event_context *ev, struct fd_event *fde,
2734 uint16_t flags, void *private_data)
2736 struct ctdb_kill_tcp *killtcp = talloc_get_type(private_data, struct ctdb_kill_tcp);
2737 struct ctdb_killtcp_con *con;
2738 ctdb_sock_addr src, dst;
2739 uint32_t ack_seq, seq;
2741 if (!(flags & EVENT_FD_READ)) {
2745 if (ctdb_sys_read_tcp_packet(killtcp->capture_fd,
2746 killtcp->private_data,
2748 &ack_seq, &seq) != 0) {
2749 /* probably a non-tcp ACK packet */
2753 /* check if we have this guy in our list of connections
2756 con = trbt_lookuparray32(killtcp->connections,
2757 KILLTCP_KEYLEN, killtcp_key(&src, &dst));
2759 /* no this was some other packet we can just ignore */
2763 /* This one has been tickled !
2764 now reset him and remove him from the list.
2766 DEBUG(DEBUG_INFO, ("sending a tcp reset to kill connection :%d -> %s:%d\n",
2767 ntohs(con->dst_addr.ip.sin_port),
2768 ctdb_addr_to_str(&con->src_addr),
2769 ntohs(con->src_addr.ip.sin_port)));
2771 ctdb_sys_send_tcp(&con->dst_addr, &con->src_addr, ack_seq, seq, 1);
2776 /* when traversing the list of all tcp connections to send tickle acks to
2777 (so that we can capture the ack coming back and kill the connection
2779 this callback is called for each connection we are currently trying to kill
2781 static void tickle_connection_traverse(void *param, void *data)
2783 struct ctdb_killtcp_con *con = talloc_get_type(data, struct ctdb_killtcp_con);
2785 /* have tried too many times, just give up */
2786 if (con->count >= 5) {
2787 /* can't delete in traverse: reparent to delete_cons */
2788 talloc_steal(param, con);
2792 /* othervise, try tickling it again */
2795 (ctdb_sock_addr *)&con->dst_addr,
2796 (ctdb_sock_addr *)&con->src_addr,
2802 called every second until all sentenced connections have been reset
2804 static void ctdb_tickle_sentenced_connections(struct event_context *ev, struct timed_event *te,
2805 struct timeval t, void *private_data)
2807 struct ctdb_kill_tcp *killtcp = talloc_get_type(private_data, struct ctdb_kill_tcp);
2808 void *delete_cons = talloc_new(NULL);
2810 /* loop over all connections sending tickle ACKs */
2811 trbt_traversearray32(killtcp->connections, KILLTCP_KEYLEN, tickle_connection_traverse, delete_cons);
2813 /* now we've finished traverse, it's safe to do deletion. */
2814 talloc_free(delete_cons);
2816 /* If there are no more connections to kill we can remove the
2817 entire killtcp structure
2819 if ( (killtcp->connections == NULL) ||
2820 (killtcp->connections->root == NULL) ) {
2821 talloc_free(killtcp);
2825 /* try tickling them again in a seconds time
2827 event_add_timed(killtcp->ctdb->ev, killtcp, timeval_current_ofs(1, 0),
2828 ctdb_tickle_sentenced_connections, killtcp);
2832 destroy the killtcp structure
2834 static int ctdb_killtcp_destructor(struct ctdb_kill_tcp *killtcp)
2837 killtcp->vnn->killtcp = NULL;
2843 /* nothing fancy here, just unconditionally replace any existing
2844 connection structure with the new one.
2846 dont even free the old one if it did exist, that one is talloc_stolen
2847 by the same node in the tree anyway and will be deleted when the new data
2850 static void *add_killtcp_callback(void *parm, void *data)
2856 add a tcp socket to the list of connections we want to RST
2858 static int ctdb_killtcp_add_connection(struct ctdb_context *ctdb,
2862 ctdb_sock_addr src, dst;
2863 struct ctdb_kill_tcp *killtcp;
2864 struct ctdb_killtcp_con *con;
2865 struct ctdb_vnn *vnn;
2867 ctdb_canonicalize_ip(s, &src);
2868 ctdb_canonicalize_ip(d, &dst);
2870 vnn = find_public_ip_vnn(ctdb, &dst);
2872 vnn = find_public_ip_vnn(ctdb, &src);
2875 /* if it is not a public ip it could be our 'single ip' */
2876 if (ctdb->single_ip_vnn) {
2877 if (ctdb_same_ip(&ctdb->single_ip_vnn->public_address, &dst)) {
2878 vnn = ctdb->single_ip_vnn;
2883 DEBUG(DEBUG_ERR,(__location__ " Could not killtcp, not a public address\n"));
2887 killtcp = vnn->killtcp;
2889 /* If this is the first connection to kill we must allocate
2892 if (killtcp == NULL) {
2893 killtcp = talloc_zero(ctdb, struct ctdb_kill_tcp);
2894 CTDB_NO_MEMORY(ctdb, killtcp);
2897 killtcp->ctdb = ctdb;
2898 killtcp->capture_fd = -1;
2899 killtcp->connections = trbt_create(killtcp, 0);
2901 vnn->killtcp = killtcp;
2902 talloc_set_destructor(killtcp, ctdb_killtcp_destructor);
2907 /* create a structure that describes this connection we want to
2908 RST and store it in killtcp->connections
2910 con = talloc(killtcp, struct ctdb_killtcp_con);
2911 CTDB_NO_MEMORY(ctdb, con);
2912 con->src_addr = src;
2913 con->dst_addr = dst;
2915 con->killtcp = killtcp;
2918 trbt_insertarray32_callback(killtcp->connections,
2919 KILLTCP_KEYLEN, killtcp_key(&con->dst_addr, &con->src_addr),
2920 add_killtcp_callback, con);
2923 If we dont have a socket to listen on yet we must create it
2925 if (killtcp->capture_fd == -1) {
2926 const char *iface = ctdb_vnn_iface_string(vnn);
2927 killtcp->capture_fd = ctdb_sys_open_capture_socket(iface, &killtcp->private_data);
2928 if (killtcp->capture_fd == -1) {
2929 DEBUG(DEBUG_CRIT,(__location__ " Failed to open capturing "
2930 "socket on iface '%s' for killtcp (%s)\n",
2931 iface, strerror(errno)));
2937 if (killtcp->fde == NULL) {
2938 killtcp->fde = event_add_fd(ctdb->ev, killtcp, killtcp->capture_fd,
2940 capture_tcp_handler, killtcp);
2941 tevent_fd_set_auto_close(killtcp->fde);
2943 /* We also need to set up some events to tickle all these connections
2944 until they are all reset
2946 event_add_timed(ctdb->ev, killtcp, timeval_current_ofs(1, 0),
2947 ctdb_tickle_sentenced_connections, killtcp);
2950 /* tickle him once now */
2959 talloc_free(vnn->killtcp);
2960 vnn->killtcp = NULL;
2965 kill a TCP connection.
2967 int32_t ctdb_control_kill_tcp(struct ctdb_context *ctdb, TDB_DATA indata)
2969 struct ctdb_control_killtcp *killtcp = (struct ctdb_control_killtcp *)indata.dptr;
2971 return ctdb_killtcp_add_connection(ctdb, &killtcp->src_addr, &killtcp->dst_addr);
2975 called by a daemon to inform us of the entire list of TCP tickles for
2976 a particular public address.
2977 this control should only be sent by the node that is currently serving
2978 that public address.
2980 int32_t ctdb_control_set_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata)
2982 struct ctdb_control_tcp_tickle_list *list = (struct ctdb_control_tcp_tickle_list *)indata.dptr;
2983 struct ctdb_tcp_array *tcparray;
2984 struct ctdb_vnn *vnn;
2986 /* We must at least have tickles.num or else we cant verify the size
2987 of the received data blob
2989 if (indata.dsize < offsetof(struct ctdb_control_tcp_tickle_list,
2990 tickles.connections)) {
2991 DEBUG(DEBUG_ERR,("Bad indata in ctdb_control_set_tcp_tickle_list. Not enough data for the tickle.num field\n"));
2995 /* verify that the size of data matches what we expect */
2996 if (indata.dsize < offsetof(struct ctdb_control_tcp_tickle_list,
2997 tickles.connections)
2998 + sizeof(struct ctdb_tcp_connection)
2999 * list->tickles.num) {
3000 DEBUG(DEBUG_ERR,("Bad indata in ctdb_control_set_tcp_tickle_list\n"));
3004 vnn = find_public_ip_vnn(ctdb, &list->addr);
3006 DEBUG(DEBUG_INFO,(__location__ " Could not set tcp tickle list, '%s' is not a public address\n",
3007 ctdb_addr_to_str(&list->addr)));
3012 /* remove any old ticklelist we might have */
3013 talloc_free(vnn->tcp_array);
3014 vnn->tcp_array = NULL;
3016 tcparray = talloc(ctdb->nodes, struct ctdb_tcp_array);
3017 CTDB_NO_MEMORY(ctdb, tcparray);
3019 tcparray->num = list->tickles.num;
3021 tcparray->connections = talloc_array(tcparray, struct ctdb_tcp_connection, tcparray->num);
3022 CTDB_NO_MEMORY(ctdb, tcparray->connections);
3024 memcpy(tcparray->connections, &list->tickles.connections[0],
3025 sizeof(struct ctdb_tcp_connection)*tcparray->num);
3027 /* We now have a new fresh tickle list array for this vnn */
3028 vnn->tcp_array = talloc_steal(vnn, tcparray);
3034 called to return the full list of tickles for the puclic address associated
3035 with the provided vnn
3037 int32_t ctdb_control_get_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata)
3039 ctdb_sock_addr *addr = (ctdb_sock_addr *)indata.dptr;
3040 struct ctdb_control_tcp_tickle_list *list;
3041 struct ctdb_tcp_array *tcparray;
3043 struct ctdb_vnn *vnn;
3045 vnn = find_public_ip_vnn(ctdb, addr);
3047 DEBUG(DEBUG_ERR,(__location__ " Could not get tcp tickle list, '%s' is not a public address\n",
3048 ctdb_addr_to_str(addr)));
3053 tcparray = vnn->tcp_array;
3055 num = tcparray->num;
3060 outdata->dsize = offsetof(struct ctdb_control_tcp_tickle_list,
3061 tickles.connections)
3062 + sizeof(struct ctdb_tcp_connection) * num;
3064 outdata->dptr = talloc_size(outdata, outdata->dsize);
3065 CTDB_NO_MEMORY(ctdb, outdata->dptr);
3066 list = (struct ctdb_control_tcp_tickle_list *)outdata->dptr;
3069 list->tickles.num = num;
3071 memcpy(&list->tickles.connections[0], tcparray->connections,
3072 sizeof(struct ctdb_tcp_connection) * num);
3080 set the list of all tcp tickles for a public address
3082 static int ctdb_ctrl_set_tcp_tickles(struct ctdb_context *ctdb,
3083 struct timeval timeout, uint32_t destnode,
3084 ctdb_sock_addr *addr,
3085 struct ctdb_tcp_array *tcparray)
3089 struct ctdb_control_tcp_tickle_list *list;
3092 num = tcparray->num;
3097 data.dsize = offsetof(struct ctdb_control_tcp_tickle_list,
3098 tickles.connections) +
3099 sizeof(struct ctdb_tcp_connection) * num;
3100 data.dptr = talloc_size(ctdb, data.dsize);
3101 CTDB_NO_MEMORY(ctdb, data.dptr);
3103 list = (struct ctdb_control_tcp_tickle_list *)data.dptr;
3105 list->tickles.num = num;
3107 memcpy(&list->tickles.connections[0], tcparray->connections, sizeof(struct ctdb_tcp_connection) * num);
3110 ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0,
3111 CTDB_CONTROL_SET_TCP_TICKLE_LIST,
3112 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
3114 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set tcp tickles failed\n"));
3118 talloc_free(data.dptr);
3125 perform tickle updates if required
3127 static void ctdb_update_tcp_tickles(struct event_context *ev,
3128 struct timed_event *te,
3129 struct timeval t, void *private_data)
3131 struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
3133 struct ctdb_vnn *vnn;
3135 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
3136 /* we only send out updates for public addresses that
3139 if (ctdb->pnn != vnn->pnn) {
3142 /* We only send out the updates if we need to */
3143 if (!vnn->tcp_update_needed) {
3146 ret = ctdb_ctrl_set_tcp_tickles(ctdb,
3148 CTDB_BROADCAST_CONNECTED,
3149 &vnn->public_address,
3152 DEBUG(DEBUG_ERR,("Failed to send the tickle update for public address %s\n",
3153 ctdb_addr_to_str(&vnn->public_address)));
3157 event_add_timed(ctdb->ev, ctdb->tickle_update_context,
3158 timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
3159 ctdb_update_tcp_tickles, ctdb);
3164 start periodic update of tcp tickles
3166 void ctdb_start_tcp_tickle_update(struct ctdb_context *ctdb)
3168 ctdb->tickle_update_context = talloc_new(ctdb);
3170 event_add_timed(ctdb->ev, ctdb->tickle_update_context,
3171 timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
3172 ctdb_update_tcp_tickles, ctdb);
3178 struct control_gratious_arp {
3179 struct ctdb_context *ctdb;
3180 ctdb_sock_addr addr;
3186 send a control_gratuitous arp
3188 static void send_gratious_arp(struct event_context *ev, struct timed_event *te,
3189 struct timeval t, void *private_data)
3192 struct control_gratious_arp *arp = talloc_get_type(private_data,
3193 struct control_gratious_arp);
3195 ret = ctdb_sys_send_arp(&arp->addr, arp->iface);
3197 DEBUG(DEBUG_ERR,(__location__ " sending of gratious arp on iface '%s' failed (%s)\n",
3198 arp->iface, strerror(errno)));
3203 if (arp->count == CTDB_ARP_REPEAT) {
3208 event_add_timed(arp->ctdb->ev, arp,
3209 timeval_current_ofs(CTDB_ARP_INTERVAL, 0),
3210 send_gratious_arp, arp);
3217 int32_t ctdb_control_send_gratious_arp(struct ctdb_context *ctdb, TDB_DATA indata)
3219 struct ctdb_control_gratious_arp *gratious_arp = (struct ctdb_control_gratious_arp *)indata.dptr;
3220 struct control_gratious_arp *arp;
3222 /* verify the size of indata */
3223 if (indata.dsize < offsetof(struct ctdb_control_gratious_arp, iface)) {
3224 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_gratious_arp structure. Got %u require %u bytes\n",
3225 (unsigned)indata.dsize,
3226 (unsigned)offsetof(struct ctdb_control_gratious_arp, iface)));
3230 ( offsetof(struct ctdb_control_gratious_arp, iface)
3231 + gratious_arp->len ) ){
3233 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
3234 "but should be %u bytes\n",
3235 (unsigned)indata.dsize,
3236 (unsigned)(offsetof(struct ctdb_control_gratious_arp, iface)+gratious_arp->len)));
3241 arp = talloc(ctdb, struct control_gratious_arp);
3242 CTDB_NO_MEMORY(ctdb, arp);
3245 arp->addr = gratious_arp->addr;
3246 arp->iface = talloc_strdup(arp, gratious_arp->iface);
3247 CTDB_NO_MEMORY(ctdb, arp->iface);
3250 event_add_timed(arp->ctdb->ev, arp,
3251 timeval_zero(), send_gratious_arp, arp);
3256 int32_t ctdb_control_add_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
3258 struct ctdb_control_ip_iface *pub = (struct ctdb_control_ip_iface *)indata.dptr;
3261 /* verify the size of indata */
3262 if (indata.dsize < offsetof(struct ctdb_control_ip_iface, iface)) {
3263 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_ip_iface structure\n"));
3267 ( offsetof(struct ctdb_control_ip_iface, iface)
3270 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
3271 "but should be %u bytes\n",
3272 (unsigned)indata.dsize,
3273 (unsigned)(offsetof(struct ctdb_control_ip_iface, iface)+pub->len)));
3277 ret = ctdb_add_public_address(ctdb, &pub->addr, pub->mask, &pub->iface[0]);
3280 DEBUG(DEBUG_ERR,(__location__ " Failed to add public address\n"));
3288 called when releaseip event finishes for del_public_address
3290 static void delete_ip_callback(struct ctdb_context *ctdb, int status,
3293 talloc_free(private_data);
3296 int32_t ctdb_control_del_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
3298 struct ctdb_control_ip_iface *pub = (struct ctdb_control_ip_iface *)indata.dptr;
3299 struct ctdb_vnn *vnn;
3302 /* verify the size of indata */
3303 if (indata.dsize < offsetof(struct ctdb_control_ip_iface, iface)) {
3304 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_ip_iface structure\n"));
3308 ( offsetof(struct ctdb_control_ip_iface, iface)
3311 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
3312 "but should be %u bytes\n",
3313 (unsigned)indata.dsize,
3314 (unsigned)(offsetof(struct ctdb_control_ip_iface, iface)+pub->len)));
3318 /* walk over all public addresses until we find a match */
3319 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
3320 if (ctdb_same_ip(&vnn->public_address, &pub->addr)) {
3321 TALLOC_CTX *mem_ctx;
3323 DLIST_REMOVE(ctdb->vnn, vnn);
3324 if (vnn->iface != NULL) {
3325 ctdb_vnn_unassign_iface(ctdb, vnn);
3327 if (vnn->pnn != ctdb->pnn) {
3332 mem_ctx = talloc_new(ctdb);
3333 talloc_steal(mem_ctx, vnn);
3334 ret = ctdb_event_script_callback(ctdb,
3335 mem_ctx, delete_ip_callback, mem_ctx,
3337 CTDB_EVENT_RELEASE_IP,
3339 ctdb_vnn_iface_string(vnn),
3340 ctdb_addr_to_str(&vnn->public_address),
3341 vnn->public_netmask_bits);
3352 /* This function is called from the recovery daemon to verify that a remote
3353 node has the expected ip allocation.
3354 This is verified against ctdb->ip_tree
3356 int verify_remote_ip_allocation(struct ctdb_context *ctdb, struct ctdb_all_public_ips *ips)
3358 struct ctdb_public_ip_list *tmp_ip;
3361 if (ctdb->ip_tree == NULL) {
3362 /* dont know the expected allocation yet, assume remote node
3371 for (i=0; i<ips->num; i++) {
3372 tmp_ip = trbt_lookuparray32(ctdb->ip_tree, IP_KEYLEN, ip_key(&ips->ips[i].addr));
3373 if (tmp_ip == NULL) {
3374 DEBUG(DEBUG_ERR,(__location__ " Could not find host for address %s, reassign ips\n", ctdb_addr_to_str(&ips->ips[i].addr)));
3378 if (tmp_ip->pnn == -1 || ips->ips[i].pnn == -1) {
3382 if (tmp_ip->pnn != ips->ips[i].pnn) {
3383 DEBUG(DEBUG_ERR,("Inconsistent ip allocation. Trigger reallocation. Thinks %s is held by node %u while it is held by node %u\n", ctdb_addr_to_str(&ips->ips[i].addr), ips->ips[i].pnn, tmp_ip->pnn));
3391 int update_ip_assignment_tree(struct ctdb_context *ctdb, struct ctdb_public_ip *ip)
3393 struct ctdb_public_ip_list *tmp_ip;
3395 if (ctdb->ip_tree == NULL) {
3396 DEBUG(DEBUG_ERR,("No ctdb->ip_tree yet. Failed to update ip assignment\n"));
3400 tmp_ip = trbt_lookuparray32(ctdb->ip_tree, IP_KEYLEN, ip_key(&ip->addr));
3401 if (tmp_ip == NULL) {
3402 DEBUG(DEBUG_ERR,(__location__ " Could not find record for address %s, update ip\n", ctdb_addr_to_str(&ip->addr)));
3406 DEBUG(DEBUG_NOTICE,("Updated ip assignment tree for ip : %s from node %u to node %u\n", ctdb_addr_to_str(&ip->addr), tmp_ip->pnn, ip->pnn));
3407 tmp_ip->pnn = ip->pnn;