4 Copyright (C) Ronnie Sahlberg 2007
5 Copyright (C) Andrew Tridgell 2007
6 Copyright (C) Martin Schwenke 2011
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, see <http://www.gnu.org/licenses/>.
22 #include "lib/tevent/tevent.h"
23 #include "lib/tdb/include/tdb.h"
24 #include "lib/util/dlinklist.h"
25 #include "system/network.h"
26 #include "system/filesys.h"
27 #include "system/wait.h"
28 #include "../include/ctdb_private.h"
29 #include "../common/rb_tree.h"
32 #define TAKEOVER_TIMEOUT() timeval_current_ofs(ctdb->tunable.takeover_timeout,0)
34 #define CTDB_ARP_INTERVAL 1
35 #define CTDB_ARP_REPEAT 3
38 struct ctdb_iface *prev, *next;
44 static const char *ctdb_vnn_iface_string(const struct ctdb_vnn *vnn)
47 return vnn->iface->name;
53 static int ctdb_add_local_iface(struct ctdb_context *ctdb, const char *iface)
57 /* Verify that we dont have an entry for this ip yet */
58 for (i=ctdb->ifaces;i;i=i->next) {
59 if (strcmp(i->name, iface) == 0) {
64 /* create a new structure for this interface */
65 i = talloc_zero(ctdb, struct ctdb_iface);
66 CTDB_NO_MEMORY_FATAL(ctdb, i);
67 i->name = talloc_strdup(i, iface);
68 CTDB_NO_MEMORY(ctdb, i->name);
71 DLIST_ADD(ctdb->ifaces, i);
76 static struct ctdb_iface *ctdb_find_iface(struct ctdb_context *ctdb,
81 /* Verify that we dont have an entry for this ip yet */
82 for (i=ctdb->ifaces;i;i=i->next) {
83 if (strcmp(i->name, iface) == 0) {
91 static struct ctdb_iface *ctdb_vnn_best_iface(struct ctdb_context *ctdb,
95 struct ctdb_iface *cur = NULL;
96 struct ctdb_iface *best = NULL;
98 for (i=0; vnn->ifaces[i]; i++) {
100 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
114 if (cur->references < best->references) {
123 static int32_t ctdb_vnn_assign_iface(struct ctdb_context *ctdb,
124 struct ctdb_vnn *vnn)
126 struct ctdb_iface *best = NULL;
129 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
130 "still assigned to iface '%s'\n",
131 ctdb_addr_to_str(&vnn->public_address),
132 ctdb_vnn_iface_string(vnn)));
136 best = ctdb_vnn_best_iface(ctdb, vnn);
138 DEBUG(DEBUG_ERR, (__location__ " public address '%s' "
139 "cannot assign to iface any iface\n",
140 ctdb_addr_to_str(&vnn->public_address)));
146 vnn->pnn = ctdb->pnn;
148 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
149 "now assigned to iface '%s' refs[%d]\n",
150 ctdb_addr_to_str(&vnn->public_address),
151 ctdb_vnn_iface_string(vnn),
156 static void ctdb_vnn_unassign_iface(struct ctdb_context *ctdb,
157 struct ctdb_vnn *vnn)
159 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
160 "now unassigned (old iface '%s' refs[%d])\n",
161 ctdb_addr_to_str(&vnn->public_address),
162 ctdb_vnn_iface_string(vnn),
163 vnn->iface?vnn->iface->references:0));
165 vnn->iface->references--;
168 if (vnn->pnn == ctdb->pnn) {
173 static bool ctdb_vnn_available(struct ctdb_context *ctdb,
174 struct ctdb_vnn *vnn)
178 if (vnn->iface && vnn->iface->link_up) {
182 for (i=0; vnn->ifaces[i]; i++) {
183 struct ctdb_iface *cur;
185 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
198 struct ctdb_takeover_arp {
199 struct ctdb_context *ctdb;
202 struct ctdb_tcp_array *tcparray;
203 struct ctdb_vnn *vnn;
208 lists of tcp endpoints
210 struct ctdb_tcp_list {
211 struct ctdb_tcp_list *prev, *next;
212 struct ctdb_tcp_connection connection;
216 list of clients to kill on IP release
218 struct ctdb_client_ip {
219 struct ctdb_client_ip *prev, *next;
220 struct ctdb_context *ctdb;
227 send a gratuitous arp
229 static void ctdb_control_send_arp(struct event_context *ev, struct timed_event *te,
230 struct timeval t, void *private_data)
232 struct ctdb_takeover_arp *arp = talloc_get_type(private_data,
233 struct ctdb_takeover_arp);
235 struct ctdb_tcp_array *tcparray;
236 const char *iface = ctdb_vnn_iface_string(arp->vnn);
238 ret = ctdb_sys_send_arp(&arp->addr, iface);
240 DEBUG(DEBUG_CRIT,(__location__ " sending of arp failed on iface '%s' (%s)\n",
241 iface, strerror(errno)));
244 tcparray = arp->tcparray;
246 for (i=0;i<tcparray->num;i++) {
247 struct ctdb_tcp_connection *tcon;
249 tcon = &tcparray->connections[i];
250 DEBUG(DEBUG_INFO,("sending tcp tickle ack for %u->%s:%u\n",
251 (unsigned)ntohs(tcon->dst_addr.ip.sin_port),
252 ctdb_addr_to_str(&tcon->src_addr),
253 (unsigned)ntohs(tcon->src_addr.ip.sin_port)));
254 ret = ctdb_sys_send_tcp(
259 DEBUG(DEBUG_CRIT,(__location__ " Failed to send tcp tickle ack for %s\n",
260 ctdb_addr_to_str(&tcon->src_addr)));
267 if (arp->count == CTDB_ARP_REPEAT) {
272 event_add_timed(arp->ctdb->ev, arp->vnn->takeover_ctx,
273 timeval_current_ofs(CTDB_ARP_INTERVAL, 100000),
274 ctdb_control_send_arp, arp);
277 static int32_t ctdb_announce_vnn_iface(struct ctdb_context *ctdb,
278 struct ctdb_vnn *vnn)
280 struct ctdb_takeover_arp *arp;
281 struct ctdb_tcp_array *tcparray;
283 if (!vnn->takeover_ctx) {
284 vnn->takeover_ctx = talloc_new(vnn);
285 if (!vnn->takeover_ctx) {
290 arp = talloc_zero(vnn->takeover_ctx, struct ctdb_takeover_arp);
296 arp->addr = vnn->public_address;
299 tcparray = vnn->tcp_array;
301 /* add all of the known tcp connections for this IP to the
302 list of tcp connections to send tickle acks for */
303 arp->tcparray = talloc_steal(arp, tcparray);
305 vnn->tcp_array = NULL;
306 vnn->tcp_update_needed = true;
309 event_add_timed(arp->ctdb->ev, vnn->takeover_ctx,
310 timeval_zero(), ctdb_control_send_arp, arp);
315 struct takeover_callback_state {
316 struct ctdb_req_control *c;
317 ctdb_sock_addr *addr;
318 struct ctdb_vnn *vnn;
321 struct ctdb_do_takeip_state {
322 struct ctdb_req_control *c;
323 struct ctdb_vnn *vnn;
327 called when takeip event finishes
329 static void ctdb_do_takeip_callback(struct ctdb_context *ctdb, int status,
332 struct ctdb_do_takeip_state *state =
333 talloc_get_type(private_data, struct ctdb_do_takeip_state);
338 struct ctdb_node *node = ctdb->nodes[ctdb->pnn];
340 if (status == -ETIME) {
343 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
344 ctdb_addr_to_str(&state->vnn->public_address),
345 ctdb_vnn_iface_string(state->vnn)));
346 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
348 node->flags |= NODE_FLAGS_UNHEALTHY;
353 ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
355 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
360 data.dptr = (uint8_t *)ctdb_addr_to_str(&state->vnn->public_address);
361 data.dsize = strlen((char *)data.dptr) + 1;
362 DEBUG(DEBUG_INFO,(__location__ " sending TAKE_IP for '%s'\n", data.dptr));
364 ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_TAKE_IP, data);
367 /* the control succeeded */
368 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
374 take over an ip address
376 static int32_t ctdb_do_takeip(struct ctdb_context *ctdb,
377 struct ctdb_req_control *c,
378 struct ctdb_vnn *vnn)
381 struct ctdb_do_takeip_state *state;
383 ret = ctdb_vnn_assign_iface(ctdb, vnn);
385 DEBUG(DEBUG_ERR,("Takeover of IP %s/%u failed to "
386 "assin a usable interface\n",
387 ctdb_addr_to_str(&vnn->public_address),
388 vnn->public_netmask_bits));
392 state = talloc(vnn, struct ctdb_do_takeip_state);
393 CTDB_NO_MEMORY(ctdb, state);
395 state->c = talloc_steal(ctdb, c);
398 DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u on interface %s\n",
399 ctdb_addr_to_str(&vnn->public_address),
400 vnn->public_netmask_bits,
401 ctdb_vnn_iface_string(vnn)));
403 ret = ctdb_event_script_callback(ctdb,
405 ctdb_do_takeip_callback,
410 ctdb_vnn_iface_string(vnn),
411 ctdb_addr_to_str(&vnn->public_address),
412 vnn->public_netmask_bits);
415 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
416 ctdb_addr_to_str(&vnn->public_address),
417 ctdb_vnn_iface_string(vnn)));
425 struct ctdb_do_updateip_state {
426 struct ctdb_req_control *c;
427 struct ctdb_iface *old;
428 struct ctdb_vnn *vnn;
432 called when updateip event finishes
434 static void ctdb_do_updateip_callback(struct ctdb_context *ctdb, int status,
437 struct ctdb_do_updateip_state *state =
438 talloc_get_type(private_data, struct ctdb_do_updateip_state);
442 if (status == -ETIME) {
445 DEBUG(DEBUG_ERR,(__location__ " Failed to move IP %s from interface %s to %s\n",
446 ctdb_addr_to_str(&state->vnn->public_address),
448 ctdb_vnn_iface_string(state->vnn)));
451 * All we can do is reset the old interface
452 * and let the next run fix it
454 ctdb_vnn_unassign_iface(ctdb, state->vnn);
455 state->vnn->iface = state->old;
456 state->vnn->iface->references++;
458 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
463 ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
465 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
470 /* the control succeeded */
471 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
477 update (move) an ip address
479 static int32_t ctdb_do_updateip(struct ctdb_context *ctdb,
480 struct ctdb_req_control *c,
481 struct ctdb_vnn *vnn)
484 struct ctdb_do_updateip_state *state;
485 struct ctdb_iface *old = vnn->iface;
488 ctdb_vnn_unassign_iface(ctdb, vnn);
489 ret = ctdb_vnn_assign_iface(ctdb, vnn);
491 DEBUG(DEBUG_ERR,("update of IP %s/%u failed to "
492 "assin a usable interface (old iface '%s')\n",
493 ctdb_addr_to_str(&vnn->public_address),
494 vnn->public_netmask_bits,
499 new_name = ctdb_vnn_iface_string(vnn);
500 if (old->name != NULL && new_name != NULL && !strcmp(old->name, new_name)) {
501 /* A benign update from one interface onto itself.
502 * no need to run the eventscripts in this case, just return
505 ctdb_request_control_reply(ctdb, c, NULL, 0, NULL);
509 state = talloc(vnn, struct ctdb_do_updateip_state);
510 CTDB_NO_MEMORY(ctdb, state);
512 state->c = talloc_steal(ctdb, c);
516 DEBUG(DEBUG_NOTICE,("Update of IP %s/%u from "
517 "interface %s to %s\n",
518 ctdb_addr_to_str(&vnn->public_address),
519 vnn->public_netmask_bits,
523 ret = ctdb_event_script_callback(ctdb,
525 ctdb_do_updateip_callback,
528 CTDB_EVENT_UPDATE_IP,
532 ctdb_addr_to_str(&vnn->public_address),
533 vnn->public_netmask_bits);
535 DEBUG(DEBUG_ERR,(__location__ " Failed update IP %s from interface %s to %s\n",
536 ctdb_addr_to_str(&vnn->public_address),
537 old->name, new_name));
546 Find the vnn of the node that has a public ip address
547 returns -1 if the address is not known as a public address
549 static struct ctdb_vnn *find_public_ip_vnn(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
551 struct ctdb_vnn *vnn;
553 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
554 if (ctdb_same_ip(&vnn->public_address, addr)) {
563 take over an ip address
565 int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb,
566 struct ctdb_req_control *c,
571 struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
572 struct ctdb_vnn *vnn;
573 bool have_ip = false;
574 bool do_updateip = false;
575 bool do_takeip = false;
576 struct ctdb_iface *best_iface = NULL;
578 if (pip->pnn != ctdb->pnn) {
579 DEBUG(DEBUG_ERR,(__location__" takeoverip called for an ip '%s' "
580 "with pnn %d, but we're node %d\n",
581 ctdb_addr_to_str(&pip->addr),
582 pip->pnn, ctdb->pnn));
586 /* update out vnn list */
587 vnn = find_public_ip_vnn(ctdb, &pip->addr);
589 DEBUG(DEBUG_INFO,("takeoverip called for an ip '%s' that is not a public address\n",
590 ctdb_addr_to_str(&pip->addr)));
594 have_ip = ctdb_sys_have_ip(&pip->addr);
595 best_iface = ctdb_vnn_best_iface(ctdb, vnn);
596 if (best_iface == NULL) {
597 DEBUG(DEBUG_ERR,("takeoverip of IP %s/%u failed to find"
598 "a usable interface (old %s, have_ip %d)\n",
599 ctdb_addr_to_str(&vnn->public_address),
600 vnn->public_netmask_bits,
601 ctdb_vnn_iface_string(vnn),
606 if (vnn->iface == NULL && vnn->pnn == -1 && have_ip && best_iface != NULL) {
607 DEBUG(DEBUG_ERR,("Taking over newly created ip\n"));
611 if (vnn->iface == NULL && have_ip) {
612 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
613 "but we have no interface assigned, has someone manually configured it? Ignore for now.\n",
614 ctdb_addr_to_str(&vnn->public_address)));
618 if (vnn->pnn != ctdb->pnn && have_ip && vnn->pnn != -1) {
619 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
620 "and we have it on iface[%s], but it was assigned to node %d"
621 "and we are node %d, banning ourself\n",
622 ctdb_addr_to_str(&vnn->public_address),
623 ctdb_vnn_iface_string(vnn), vnn->pnn, ctdb->pnn));
628 if (vnn->pnn == -1 && have_ip) {
629 vnn->pnn = ctdb->pnn;
630 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
631 "and we already have it on iface[%s], update local daemon\n",
632 ctdb_addr_to_str(&vnn->public_address),
633 ctdb_vnn_iface_string(vnn)));
638 if (vnn->iface->link_up) {
639 /* only move when the rebalance gains something */
640 if (vnn->iface->references > (best_iface->references + 1)) {
643 } else if (vnn->iface != best_iface) {
650 ctdb_vnn_unassign_iface(ctdb, vnn);
657 ret = ctdb_do_takeip(ctdb, c, vnn);
661 } else if (do_updateip) {
662 ret = ctdb_do_updateip(ctdb, c, vnn);
668 * The interface is up and the kernel known the ip
671 DEBUG(DEBUG_INFO,("Redundant takeover of IP %s/%u on interface %s (ip already held)\n",
672 ctdb_addr_to_str(&pip->addr),
673 vnn->public_netmask_bits,
674 ctdb_vnn_iface_string(vnn)));
678 /* tell ctdb_control.c that we will be replying asynchronously */
685 takeover an ip address old v4 style
687 int32_t ctdb_control_takeover_ipv4(struct ctdb_context *ctdb,
688 struct ctdb_req_control *c,
694 data.dsize = sizeof(struct ctdb_public_ip);
695 data.dptr = (uint8_t *)talloc_zero(c, struct ctdb_public_ip);
696 CTDB_NO_MEMORY(ctdb, data.dptr);
698 memcpy(data.dptr, indata.dptr, indata.dsize);
699 return ctdb_control_takeover_ip(ctdb, c, data, async_reply);
703 kill any clients that are registered with a IP that is being released
705 static void release_kill_clients(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
707 struct ctdb_client_ip *ip;
709 DEBUG(DEBUG_INFO,("release_kill_clients for ip %s\n",
710 ctdb_addr_to_str(addr)));
712 for (ip=ctdb->client_ip_list; ip; ip=ip->next) {
713 ctdb_sock_addr tmp_addr;
716 DEBUG(DEBUG_INFO,("checking for client %u with IP %s\n",
718 ctdb_addr_to_str(&ip->addr)));
720 if (ctdb_same_ip(&tmp_addr, addr)) {
721 struct ctdb_client *client = ctdb_reqid_find(ctdb,
724 DEBUG(DEBUG_INFO,("matched client %u with IP %s and pid %u\n",
726 ctdb_addr_to_str(&ip->addr),
729 if (client->pid != 0) {
730 DEBUG(DEBUG_INFO,(__location__ " Killing client pid %u for IP %s on client_id %u\n",
731 (unsigned)client->pid,
732 ctdb_addr_to_str(addr),
734 kill(client->pid, SIGKILL);
741 called when releaseip event finishes
743 static void release_ip_callback(struct ctdb_context *ctdb, int status,
746 struct takeover_callback_state *state =
747 talloc_get_type(private_data, struct takeover_callback_state);
750 if (status == -ETIME) {
754 /* send a message to all clients of this node telling them
755 that the cluster has been reconfigured and they should
756 release any sockets on this IP */
757 data.dptr = (uint8_t *)talloc_strdup(state, ctdb_addr_to_str(state->addr));
758 CTDB_NO_MEMORY_VOID(ctdb, data.dptr);
759 data.dsize = strlen((char *)data.dptr)+1;
761 DEBUG(DEBUG_INFO,(__location__ " sending RELEASE_IP for '%s'\n", data.dptr));
763 ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_RELEASE_IP, data);
765 /* kill clients that have registered with this IP */
766 release_kill_clients(ctdb, state->addr);
768 ctdb_vnn_unassign_iface(ctdb, state->vnn);
770 /* the control succeeded */
771 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
776 release an ip address
778 int32_t ctdb_control_release_ip(struct ctdb_context *ctdb,
779 struct ctdb_req_control *c,
784 struct takeover_callback_state *state;
785 struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
786 struct ctdb_vnn *vnn;
788 /* update our vnn list */
789 vnn = find_public_ip_vnn(ctdb, &pip->addr);
791 DEBUG(DEBUG_INFO,("releaseip called for an ip '%s' that is not a public address\n",
792 ctdb_addr_to_str(&pip->addr)));
797 /* stop any previous arps */
798 talloc_free(vnn->takeover_ctx);
799 vnn->takeover_ctx = NULL;
801 if (!ctdb_sys_have_ip(&pip->addr)) {
802 DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u on interface %s (ip not held)\n",
803 ctdb_addr_to_str(&pip->addr),
804 vnn->public_netmask_bits,
805 ctdb_vnn_iface_string(vnn)));
806 ctdb_vnn_unassign_iface(ctdb, vnn);
810 if (vnn->iface == NULL) {
811 DEBUG(DEBUG_ERR,(__location__ " release_ip of IP %s is known to the kernel, "
812 "but we have no interface assigned, has someone manually configured it? Ignore for now.\n",
813 ctdb_addr_to_str(&vnn->public_address)));
817 DEBUG(DEBUG_NOTICE,("Release of IP %s/%u on interface %s node:%d\n",
818 ctdb_addr_to_str(&pip->addr),
819 vnn->public_netmask_bits,
820 ctdb_vnn_iface_string(vnn),
823 state = talloc(ctdb, struct takeover_callback_state);
824 CTDB_NO_MEMORY(ctdb, state);
826 state->c = talloc_steal(state, c);
827 state->addr = talloc(state, ctdb_sock_addr);
828 CTDB_NO_MEMORY(ctdb, state->addr);
829 *state->addr = pip->addr;
832 ret = ctdb_event_script_callback(ctdb,
833 state, release_ip_callback, state,
835 CTDB_EVENT_RELEASE_IP,
837 ctdb_vnn_iface_string(vnn),
838 ctdb_addr_to_str(&pip->addr),
839 vnn->public_netmask_bits);
841 DEBUG(DEBUG_ERR,(__location__ " Failed to release IP %s on interface %s\n",
842 ctdb_addr_to_str(&pip->addr),
843 ctdb_vnn_iface_string(vnn)));
848 /* tell the control that we will be reply asynchronously */
854 release an ip address old v4 style
856 int32_t ctdb_control_release_ipv4(struct ctdb_context *ctdb,
857 struct ctdb_req_control *c,
863 data.dsize = sizeof(struct ctdb_public_ip);
864 data.dptr = (uint8_t *)talloc_zero(c, struct ctdb_public_ip);
865 CTDB_NO_MEMORY(ctdb, data.dptr);
867 memcpy(data.dptr, indata.dptr, indata.dsize);
868 return ctdb_control_release_ip(ctdb, c, data, async_reply);
872 static int ctdb_add_public_address(struct ctdb_context *ctdb,
873 ctdb_sock_addr *addr,
874 unsigned mask, const char *ifaces)
876 struct ctdb_vnn *vnn;
883 /* Verify that we dont have an entry for this ip yet */
884 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
885 if (ctdb_same_sockaddr(addr, &vnn->public_address)) {
886 DEBUG(DEBUG_CRIT,("Same ip '%s' specified multiple times in the public address list \n",
887 ctdb_addr_to_str(addr)));
892 /* create a new vnn structure for this ip address */
893 vnn = talloc_zero(ctdb, struct ctdb_vnn);
894 CTDB_NO_MEMORY_FATAL(ctdb, vnn);
895 vnn->ifaces = talloc_array(vnn, const char *, num + 2);
896 tmp = talloc_strdup(vnn, ifaces);
897 CTDB_NO_MEMORY_FATAL(ctdb, tmp);
898 for (iface = strtok(tmp, ","); iface; iface = strtok(NULL, ",")) {
899 vnn->ifaces = talloc_realloc(vnn, vnn->ifaces, const char *, num + 2);
900 CTDB_NO_MEMORY_FATAL(ctdb, vnn->ifaces);
901 vnn->ifaces[num] = talloc_strdup(vnn, iface);
902 CTDB_NO_MEMORY_FATAL(ctdb, vnn->ifaces[num]);
906 vnn->ifaces[num] = NULL;
907 vnn->public_address = *addr;
908 vnn->public_netmask_bits = mask;
910 if (ctdb_sys_have_ip(addr)) {
911 DEBUG(DEBUG_ERR,("We are already hosting public address '%s'. setting PNN to ourself:%d\n", ctdb_addr_to_str(addr), ctdb->pnn));
912 vnn->pnn = ctdb->pnn;
915 for (i=0; vnn->ifaces[i]; i++) {
916 ret = ctdb_add_local_iface(ctdb, vnn->ifaces[i]);
918 DEBUG(DEBUG_CRIT, (__location__ " failed to add iface[%s] "
919 "for public_address[%s]\n",
920 vnn->ifaces[i], ctdb_addr_to_str(addr)));
925 vnn->iface = ctdb_find_iface(ctdb, vnn->ifaces[i]);
929 DLIST_ADD(ctdb->vnn, vnn);
935 setup the event script directory
937 int ctdb_set_event_script_dir(struct ctdb_context *ctdb, const char *script_dir)
939 ctdb->event_script_dir = talloc_strdup(ctdb, script_dir);
940 CTDB_NO_MEMORY(ctdb, ctdb->event_script_dir);
945 setup the public address lists from a file
947 int ctdb_set_public_addresses(struct ctdb_context *ctdb, const char *alist)
953 lines = file_lines_load(alist, &nlines, ctdb);
955 ctdb_set_error(ctdb, "Failed to load public address list '%s'\n", alist);
958 while (nlines > 0 && strcmp(lines[nlines-1], "") == 0) {
962 for (i=0;i<nlines;i++) {
970 while ((*line == ' ') || (*line == '\t')) {
976 if (strcmp(line, "") == 0) {
979 tok = strtok(line, " \t");
981 tok = strtok(NULL, " \t");
983 if (NULL == ctdb->default_public_interface) {
984 DEBUG(DEBUG_CRIT,("No default public interface and no interface specified at line %u of public address list\n",
989 ifaces = ctdb->default_public_interface;
994 if (!addrstr || !parse_ip_mask(addrstr, ifaces, &addr, &mask)) {
995 DEBUG(DEBUG_CRIT,("Badly formed line %u in public address list\n", i+1));
999 if (ctdb_add_public_address(ctdb, &addr, mask, ifaces)) {
1000 DEBUG(DEBUG_CRIT,("Failed to add line %u to the public address list\n", i+1));
1010 int ctdb_set_single_public_ip(struct ctdb_context *ctdb,
1014 struct ctdb_vnn *svnn;
1015 struct ctdb_iface *cur = NULL;
1019 svnn = talloc_zero(ctdb, struct ctdb_vnn);
1020 CTDB_NO_MEMORY(ctdb, svnn);
1022 svnn->ifaces = talloc_array(svnn, const char *, 2);
1023 CTDB_NO_MEMORY(ctdb, svnn->ifaces);
1024 svnn->ifaces[0] = talloc_strdup(svnn->ifaces, iface);
1025 CTDB_NO_MEMORY(ctdb, svnn->ifaces[0]);
1026 svnn->ifaces[1] = NULL;
1028 ok = parse_ip(ip, iface, 0, &svnn->public_address);
1034 ret = ctdb_add_local_iface(ctdb, svnn->ifaces[0]);
1036 DEBUG(DEBUG_CRIT, (__location__ " failed to add iface[%s] "
1037 "for single_ip[%s]\n",
1039 ctdb_addr_to_str(&svnn->public_address)));
1044 /* assume the single public ip interface is initially "good" */
1045 cur = ctdb_find_iface(ctdb, iface);
1047 DEBUG(DEBUG_CRIT,("Can not find public interface %s used by --single-public-ip", iface));
1050 cur->link_up = true;
1052 ret = ctdb_vnn_assign_iface(ctdb, svnn);
1058 ctdb->single_ip_vnn = svnn;
1062 /* Given a physical node, return the number of
1063 public addresses that is currently assigned to this node.
1065 static int node_ip_coverage(struct ctdb_context *ctdb,
1067 struct ctdb_public_ip_list *ips)
1071 for (;ips;ips=ips->next) {
1072 if (ips->pnn == pnn) {
1080 /* Check if this is a public ip known to the node, i.e. can that
1081 node takeover this ip ?
1083 static int can_node_serve_ip(struct ctdb_context *ctdb, int32_t pnn,
1084 struct ctdb_public_ip_list *ip)
1086 struct ctdb_all_public_ips *public_ips;
1089 public_ips = ctdb->nodes[pnn]->available_public_ips;
1091 if (public_ips == NULL) {
1095 for (i=0;i<public_ips->num;i++) {
1096 if (ctdb_same_ip(&ip->addr, &public_ips->ips[i].addr)) {
1097 /* yes, this node can serve this public ip */
1106 /* search the node lists list for a node to takeover this ip.
1107 pick the node that currently are serving the least number of ips
1108 so that the ips get spread out evenly.
1110 static int find_takeover_node(struct ctdb_context *ctdb,
1111 struct ctdb_node_map *nodemap, uint32_t mask,
1112 struct ctdb_public_ip_list *ip,
1113 struct ctdb_public_ip_list *all_ips)
1115 int pnn, min=0, num;
1119 for (i=0;i<nodemap->num;i++) {
1120 if (nodemap->nodes[i].flags & mask) {
1121 /* This node is not healty and can not be used to serve
1127 /* verify that this node can serve this ip */
1128 if (can_node_serve_ip(ctdb, i, ip)) {
1129 /* no it couldnt so skip to the next node */
1133 num = node_ip_coverage(ctdb, i, all_ips);
1134 /* was this the first node we checked ? */
1146 DEBUG(DEBUG_WARNING,(__location__ " Could not find node to take over public address '%s'\n",
1147 ctdb_addr_to_str(&ip->addr)));
1157 static uint32_t *ip_key(ctdb_sock_addr *ip)
1159 static uint32_t key[IP_KEYLEN];
1161 bzero(key, sizeof(key));
1163 switch (ip->sa.sa_family) {
1165 key[3] = htonl(ip->ip.sin_addr.s_addr);
1168 key[0] = htonl(ip->ip6.sin6_addr.s6_addr32[0]);
1169 key[1] = htonl(ip->ip6.sin6_addr.s6_addr32[1]);
1170 key[2] = htonl(ip->ip6.sin6_addr.s6_addr32[2]);
1171 key[3] = htonl(ip->ip6.sin6_addr.s6_addr32[3]);
1174 DEBUG(DEBUG_ERR, (__location__ " ERROR, unknown family passed :%u\n", ip->sa.sa_family));
1181 static void *add_ip_callback(void *parm, void *data)
1183 struct ctdb_public_ip_list *this_ip = parm;
1184 struct ctdb_public_ip_list *prev_ip = data;
1186 if (prev_ip == NULL) {
1189 if (this_ip->pnn == -1) {
1190 this_ip->pnn = prev_ip->pnn;
1196 void getips_count_callback(void *param, void *data)
1198 struct ctdb_public_ip_list **ip_list = (struct ctdb_public_ip_list **)param;
1199 struct ctdb_public_ip_list *new_ip = (struct ctdb_public_ip_list *)data;
1201 new_ip->next = *ip_list;
1205 static struct ctdb_public_ip_list *
1206 create_merged_ip_list(struct ctdb_context *ctdb)
1209 struct ctdb_public_ip_list *ip_list;
1210 struct ctdb_all_public_ips *public_ips;
1212 if (ctdb->ip_tree != NULL) {
1213 talloc_free(ctdb->ip_tree);
1214 ctdb->ip_tree = NULL;
1216 ctdb->ip_tree = trbt_create(ctdb, 0);
1218 for (i=0;i<ctdb->num_nodes;i++) {
1219 public_ips = ctdb->nodes[i]->known_public_ips;
1221 if (ctdb->nodes[i]->flags & NODE_FLAGS_DELETED) {
1225 /* there were no public ips for this node */
1226 if (public_ips == NULL) {
1230 for (j=0;j<public_ips->num;j++) {
1231 struct ctdb_public_ip_list *tmp_ip;
1233 tmp_ip = talloc_zero(ctdb->ip_tree, struct ctdb_public_ip_list);
1234 CTDB_NO_MEMORY_NULL(ctdb, tmp_ip);
1235 tmp_ip->pnn = public_ips->ips[j].pnn;
1236 tmp_ip->addr = public_ips->ips[j].addr;
1237 tmp_ip->next = NULL;
1239 trbt_insertarray32_callback(ctdb->ip_tree,
1240 IP_KEYLEN, ip_key(&public_ips->ips[j].addr),
1247 trbt_traversearray32(ctdb->ip_tree, IP_KEYLEN, getips_count_callback, &ip_list);
1253 * This is the length of the longtest common prefix between the IPs.
1254 * It is calculated by XOR-ing the 2 IPs together and counting the
1255 * number of leading zeroes. The implementation means that all
1256 * addresses end up being 128 bits long.
1257 * Not static, so we can easily link it into a unit test.
1259 * FIXME? Should we consider IPv4 and IPv6 separately given that the
1260 * 12 bytes of 0 prefix padding will hurt the algorithm if there are
1261 * lots of nodes and IP addresses?
1263 uint32_t ip_distance(ctdb_sock_addr *ip1, ctdb_sock_addr *ip2)
1265 uint32_t ip1_k[IP_KEYLEN];
1270 uint32_t distance = 0;
1272 memcpy(ip1_k, ip_key(ip1), sizeof(ip1_k));
1274 for (i=0; i<IP_KEYLEN; i++) {
1275 x = ip1_k[i] ^ t[i];
1279 /* Count number of leading zeroes.
1280 * FIXME? This could be optimised...
1282 while ((x & (1 << 31)) == 0) {
1292 /* Calculate the IP distance for the given IP relative to IPs on the
1293 given node. The ips argument is generally the all_ips variable
1294 used in the main part of the algorithm.
1295 * Not static, so we can easily link it into a unit test.
1297 uint32_t ip_distance_2_sum(ctdb_sock_addr *ip,
1298 struct ctdb_public_ip_list *ips,
1301 struct ctdb_public_ip_list *t;
1306 for (t=ips; t != NULL; t=t->next) {
1307 if (t->pnn != pnn) {
1311 /* Optimisation: We never calculate the distance
1312 * between an address and itself. This allows us to
1313 * calculate the effect of removing an address from a
1314 * node by simply calculating the distance between
1315 * that address and all of the exitsing addresses.
1316 * Moreover, we assume that we're only ever dealing
1317 * with addresses from all_ips so we can identify an
1318 * address via a pointer rather than doing a more
1319 * expensive address comparison. */
1320 if (&(t->addr) == ip) {
1324 d = ip_distance(ip, &(t->addr));
1325 sum += d * d; /* Cheaper than pulling in math.h :-) */
1331 /* Return the LCP2 imbalance metric for addresses currently assigned
1333 * Not static, so we can easily link it into a unit test.
1335 uint32_t lcp2_imbalance(struct ctdb_public_ip_list * all_ips, int pnn)
1337 struct ctdb_public_ip_list *t;
1339 uint32_t imbalance = 0;
1341 for (t=all_ips; t!=NULL; t=t->next) {
1342 if (t->pnn != pnn) {
1345 /* Pass the rest of the IPs rather than the whole
1348 imbalance += ip_distance_2_sum(&(t->addr), t->next, pnn);
1354 /* Allocate any unassigned IPs just by looping through the IPs and
1355 * finding the best node for each.
1356 * Not static, so we can easily link it into a unit test.
1358 void basic_allocate_unassigned(struct ctdb_context *ctdb,
1359 struct ctdb_node_map *nodemap,
1361 struct ctdb_public_ip_list *all_ips)
1363 struct ctdb_public_ip_list *tmp_ip;
1365 /* loop over all ip's and find a physical node to cover for
1368 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1369 if (tmp_ip->pnn == -1) {
1370 if (find_takeover_node(ctdb, nodemap, mask, tmp_ip, all_ips)) {
1371 DEBUG(DEBUG_WARNING,("Failed to find node to cover ip %s\n",
1372 ctdb_addr_to_str(&tmp_ip->addr)));
1378 /* Basic non-deterministic rebalancing algorithm.
1379 * Not static, so we can easily link it into a unit test.
1381 bool basic_failback(struct ctdb_context *ctdb,
1382 struct ctdb_node_map *nodemap,
1384 struct ctdb_public_ip_list *all_ips,
1389 int maxnode, maxnum=0, minnode, minnum=0, num;
1390 struct ctdb_public_ip_list *tmp_ip;
1392 /* for each ip address, loop over all nodes that can serve
1393 this ip and make sure that the difference between the node
1394 serving the most and the node serving the least ip's are
1397 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1398 if (tmp_ip->pnn == -1) {
1402 /* Get the highest and lowest number of ips's served by any
1403 valid node which can serve this ip.
1407 for (i=0;i<nodemap->num;i++) {
1408 if (nodemap->nodes[i].flags & mask) {
1412 /* only check nodes that can actually serve this ip */
1413 if (can_node_serve_ip(ctdb, i, tmp_ip)) {
1414 /* no it couldnt so skip to the next node */
1418 num = node_ip_coverage(ctdb, i, all_ips);
1419 if (maxnode == -1) {
1428 if (minnode == -1) {
1438 if (maxnode == -1) {
1439 DEBUG(DEBUG_WARNING,(__location__ " Could not find maxnode. May not be able to serve ip '%s'\n",
1440 ctdb_addr_to_str(&tmp_ip->addr)));
1445 /* If we want deterministic IPs then dont try to reallocate
1446 them to spread out the load.
1448 if (1 == ctdb->tunable.deterministic_public_ips) {
1452 /* if the spread between the smallest and largest coverage by
1453 a node is >=2 we steal one of the ips from the node with
1454 most coverage to even things out a bit.
1455 try to do this a limited number of times since we dont
1456 want to spend too much time balancing the ip coverage.
1458 if ( (maxnum > minnum+1)
1459 && (*retries < (num_ips + 5)) ){
1460 struct ctdb_public_ip_list *tmp;
1462 /* mark one of maxnode's vnn's as unassigned and try
1465 for (tmp=all_ips;tmp;tmp=tmp->next) {
1466 if (tmp->pnn == maxnode) {
1478 /* Do necessary LCP2 initialisation. Bury it in a function here so
1479 * that we can unit test it.
1480 * Not static, so we can easily link it into a unit test.
1482 void lcp2_init(struct ctdb_context * tmp_ctx,
1483 struct ctdb_node_map * nodemap,
1485 struct ctdb_public_ip_list *all_ips,
1486 uint32_t **lcp2_imbalances,
1487 bool **newly_healthy)
1490 struct ctdb_public_ip_list *tmp_ip;
1492 *newly_healthy = talloc_array(tmp_ctx, bool, nodemap->num);
1493 CTDB_NO_MEMORY_FATAL(tmp_ctx, *newly_healthy);
1494 *lcp2_imbalances = talloc_array(tmp_ctx, uint32_t, nodemap->num);
1495 CTDB_NO_MEMORY_FATAL(tmp_ctx, *lcp2_imbalances);
1497 for (i=0;i<nodemap->num;i++) {
1498 (*lcp2_imbalances)[i] = lcp2_imbalance(all_ips, i);
1499 /* First step: is the node "healthy"? */
1500 (*newly_healthy)[i] = ! (bool)(nodemap->nodes[i].flags & mask);
1503 /* 2nd step: if a ndoe has IPs assigned then it must have been
1504 * healthy before, so we remove it from consideration... */
1505 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1506 if (tmp_ip->pnn != -1) {
1507 (*newly_healthy)[tmp_ip->pnn] = false;
1512 /* Allocate any unassigned addresses using the LCP2 algorithm to find
1513 * the IP/node combination that will cost the least.
1514 * Not static, so we can easily link it into a unit test.
1516 void lcp2_allocate_unassigned(struct ctdb_context *ctdb,
1517 struct ctdb_node_map *nodemap,
1519 struct ctdb_public_ip_list *all_ips,
1520 uint32_t *lcp2_imbalances)
1522 struct ctdb_public_ip_list *tmp_ip;
1526 uint32_t mindsum, dstdsum, dstimbl, minimbl;
1527 struct ctdb_public_ip_list *minip;
1529 bool should_loop = true;
1530 bool have_unassigned = true;
1532 while (have_unassigned && should_loop) {
1533 should_loop = false;
1535 DEBUG(DEBUG_DEBUG,(" ----------------------------------------\n"));
1536 DEBUG(DEBUG_DEBUG,(" CONSIDERING MOVES (UNASSIGNED)\n"));
1542 /* loop over each unassigned ip. */
1543 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1544 if (tmp_ip->pnn != -1) {
1548 for (dstnode=0; dstnode < nodemap->num; dstnode++) {
1549 /* only check nodes that can actually serve this ip */
1550 if (can_node_serve_ip(ctdb, dstnode, tmp_ip)) {
1551 /* no it couldnt so skip to the next node */
1554 if (nodemap->nodes[dstnode].flags & mask) {
1558 dstdsum = ip_distance_2_sum(&(tmp_ip->addr), all_ips, dstnode);
1559 dstimbl = lcp2_imbalances[dstnode] + dstdsum;
1560 DEBUG(DEBUG_DEBUG,(" %s -> %d [+%d]\n",
1561 ctdb_addr_to_str(&(tmp_ip->addr)),
1563 dstimbl - lcp2_imbalances[dstnode]));
1566 if ((minnode == -1) || (dstdsum < mindsum)) {
1576 DEBUG(DEBUG_DEBUG,(" ----------------------------------------\n"));
1578 /* If we found one then assign it to the given node. */
1579 if (minnode != -1) {
1580 minip->pnn = minnode;
1581 lcp2_imbalances[minnode] = minimbl;
1582 DEBUG(DEBUG_INFO,(" %s -> %d [+%d]\n",
1583 ctdb_addr_to_str(&(minip->addr)),
1588 /* There might be a better way but at least this is clear. */
1589 have_unassigned = false;
1590 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1591 if (tmp_ip->pnn == -1) {
1592 have_unassigned = true;
1597 /* We know if we have an unassigned addresses so we might as
1600 if (have_unassigned) {
1601 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1602 if (tmp_ip->pnn == -1) {
1603 DEBUG(DEBUG_WARNING,("Failed to find node to cover ip %s\n",
1604 ctdb_addr_to_str(&tmp_ip->addr)));
1610 /* LCP2 algorithm for rebalancing the cluster. Given a candidate node
1611 * to move IPs from, determines the best IP/destination node
1612 * combination to move from the source node.
1614 * Not static, so we can easily link it into a unit test.
1616 bool lcp2_failback_candidate(struct ctdb_context *ctdb,
1617 struct ctdb_node_map *nodemap,
1618 struct ctdb_public_ip_list *all_ips,
1621 uint32_t *lcp2_imbalances,
1622 bool *newly_healthy)
1624 int dstnode, mindstnode;
1625 uint32_t srcimbl, srcdsum, dstimbl, dstdsum;
1626 uint32_t minsrcimbl, mindstimbl;
1627 struct ctdb_public_ip_list *minip;
1628 struct ctdb_public_ip_list *tmp_ip;
1630 /* Find an IP and destination node that best reduces imbalance. */
1636 DEBUG(DEBUG_DEBUG,(" ----------------------------------------\n"));
1637 DEBUG(DEBUG_DEBUG,(" CONSIDERING MOVES FROM %d [%d]\n", srcnode, candimbl));
1639 for (tmp_ip=all_ips; tmp_ip; tmp_ip=tmp_ip->next) {
1640 /* Only consider addresses on srcnode. */
1641 if (tmp_ip->pnn != srcnode) {
1645 /* What is this IP address costing the source node? */
1646 srcdsum = ip_distance_2_sum(&(tmp_ip->addr), all_ips, srcnode);
1647 srcimbl = candimbl - srcdsum;
1649 /* Consider this IP address would cost each potential
1650 * destination node. Destination nodes are limited to
1651 * those that are newly healthy, since we don't want
1652 * to do gratuitous failover of IPs just to make minor
1653 * balance improvements.
1655 for (dstnode=0; dstnode < nodemap->num; dstnode++) {
1656 if (! newly_healthy[dstnode]) {
1659 /* only check nodes that can actually serve this ip */
1660 if (can_node_serve_ip(ctdb, dstnode, tmp_ip)) {
1661 /* no it couldnt so skip to the next node */
1665 dstdsum = ip_distance_2_sum(&(tmp_ip->addr), all_ips, dstnode);
1666 dstimbl = lcp2_imbalances[dstnode] + dstdsum;
1667 DEBUG(DEBUG_DEBUG,(" %d [%d] -> %s -> %d [+%d]\n",
1668 srcnode, srcimbl - lcp2_imbalances[srcnode],
1669 ctdb_addr_to_str(&(tmp_ip->addr)),
1670 dstnode, dstimbl - lcp2_imbalances[dstnode]));
1672 if ((dstimbl < candimbl) && (dstdsum < srcdsum) && \
1673 ((mindstnode == -1) || \
1674 ((srcimbl + dstimbl) < (minsrcimbl + mindstimbl)))) {
1677 minsrcimbl = srcimbl;
1678 mindstnode = dstnode;
1679 mindstimbl = dstimbl;
1683 DEBUG(DEBUG_DEBUG,(" ----------------------------------------\n"));
1685 if (mindstnode != -1) {
1686 /* We found a move that makes things better... */
1687 DEBUG(DEBUG_INFO,("%d [%d] -> %s -> %d [+%d]\n",
1688 srcnode, minsrcimbl - lcp2_imbalances[srcnode],
1689 ctdb_addr_to_str(&(minip->addr)),
1690 mindstnode, mindstimbl - lcp2_imbalances[mindstnode]));
1693 lcp2_imbalances[srcnode] = srcimbl;
1694 lcp2_imbalances[mindstnode] = mindstimbl;
1695 minip->pnn = mindstnode;
1704 struct lcp2_imbalance_pnn {
1709 int lcp2_cmp_imbalance_pnn(const void * a, const void * b)
1711 const struct lcp2_imbalance_pnn * lipa = (const struct lcp2_imbalance_pnn *) a;
1712 const struct lcp2_imbalance_pnn * lipb = (const struct lcp2_imbalance_pnn *) b;
1714 if (lipa->imbalance > lipb->imbalance) {
1716 } else if (lipa->imbalance == lipb->imbalance) {
1723 /* LCP2 algorithm for rebalancing the cluster. This finds the source
1724 * node with the highest LCP2 imbalance, and then determines the best
1725 * IP/destination node combination to move from the source node.
1727 * Not static, so we can easily link it into a unit test.
1729 bool lcp2_failback(struct ctdb_context *ctdb,
1730 struct ctdb_node_map *nodemap,
1732 struct ctdb_public_ip_list *all_ips,
1733 uint32_t *lcp2_imbalances,
1734 bool *newly_healthy)
1736 int i, num_newly_healthy;
1737 struct lcp2_imbalance_pnn * lips;
1740 /* It is only worth continuing if we have suitable target
1741 * nodes to transfer IPs to. This check is much cheaper than
1744 num_newly_healthy = 0;
1745 for (i = 0; i < nodemap->num; i++) {
1746 if (newly_healthy[i]) {
1747 num_newly_healthy++;
1750 if (num_newly_healthy == 0) {
1754 /* Put the imbalances and nodes into an array, sort them and
1755 * iterate through candidates. Usually the 1st one will be
1756 * used, so this doesn't cost much...
1758 lips = talloc_array(ctdb, struct lcp2_imbalance_pnn, nodemap->num);
1759 for (i = 0; i < nodemap->num; i++) {
1760 lips[i].imbalance = lcp2_imbalances[i];
1763 qsort(lips, nodemap->num, sizeof(struct lcp2_imbalance_pnn),
1764 lcp2_cmp_imbalance_pnn);
1767 for (i = 0; i < nodemap->num; i++) {
1768 /* This means that all nodes had 0 or 1 addresses, so
1769 * can't be imbalanced.
1771 if (lips[i].imbalance == 0) {
1775 if (lcp2_failback_candidate(ctdb,
1791 /* The calculation part of the IP allocation algorithm.
1792 * Not static, so we can easily link it into a unit test.
1794 void ctdb_takeover_run_core(struct ctdb_context *ctdb,
1795 struct ctdb_node_map *nodemap,
1796 struct ctdb_public_ip_list **all_ips_p)
1798 int i, num_healthy, retries, num_ips;
1800 struct ctdb_public_ip_list *all_ips, *tmp_ip;
1801 uint32_t *lcp2_imbalances;
1802 bool *newly_healthy;
1804 TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
1806 /* Count how many completely healthy nodes we have */
1808 for (i=0;i<nodemap->num;i++) {
1809 if (!(nodemap->nodes[i].flags & (NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED))) {
1814 if (num_healthy > 0) {
1815 /* We have healthy nodes, so only consider them for
1816 serving public addresses
1818 mask = NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED;
1820 /* We didnt have any completely healthy nodes so
1821 use "disabled" nodes as a fallback
1823 mask = NODE_FLAGS_INACTIVE;
1826 /* since nodes only know about those public addresses that
1827 can be served by that particular node, no single node has
1828 a full list of all public addresses that exist in the cluster.
1829 Walk over all node structures and create a merged list of
1830 all public addresses that exist in the cluster.
1832 keep the tree of ips around as ctdb->ip_tree
1834 all_ips = create_merged_ip_list(ctdb);
1835 *all_ips_p = all_ips; /* minimal code changes */
1837 /* Count how many ips we have */
1839 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1843 /* If we want deterministic ip allocations, i.e. that the ip addresses
1844 will always be allocated the same way for a specific set of
1845 available/unavailable nodes.
1847 if (1 == ctdb->tunable.deterministic_public_ips) {
1848 DEBUG(DEBUG_NOTICE,("Deterministic IPs enabled. Resetting all ip allocations\n"));
1849 for (i=0,tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next,i++) {
1850 tmp_ip->pnn = i%nodemap->num;
1855 /* mark all public addresses with a masked node as being served by
1858 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1859 if (tmp_ip->pnn == -1) {
1862 if (nodemap->nodes[tmp_ip->pnn].flags & mask) {
1867 /* verify that the assigned nodes can serve that public ip
1868 and set it to -1 if not
1870 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1871 if (tmp_ip->pnn == -1) {
1874 if (can_node_serve_ip(ctdb, tmp_ip->pnn, tmp_ip) != 0) {
1875 /* this node can not serve this ip. */
1880 if (1 == ctdb->tunable.lcp2_public_ip_assignment) {
1881 lcp2_init(tmp_ctx, nodemap, mask, all_ips, &lcp2_imbalances, &newly_healthy);
1884 /* now we must redistribute all public addresses with takeover node
1885 -1 among the nodes available
1889 if (1 == ctdb->tunable.lcp2_public_ip_assignment) {
1890 lcp2_allocate_unassigned(ctdb, nodemap, mask, all_ips, lcp2_imbalances);
1892 basic_allocate_unassigned(ctdb, nodemap, mask, all_ips);
1895 /* If we dont want ips to fail back after a node becomes healthy
1896 again, we wont even try to reallocat the ip addresses so that
1897 they are evenly spread out.
1898 This can NOT be used at the same time as DeterministicIPs !
1900 if (1 == ctdb->tunable.no_ip_failback) {
1901 if (1 == ctdb->tunable.deterministic_public_ips) {
1902 DEBUG(DEBUG_ERR, ("ERROR: You can not use 'DeterministicIPs' and 'NoIPFailback' at the same time\n"));
1908 /* now, try to make sure the ip adresses are evenly distributed
1911 if (1 == ctdb->tunable.lcp2_public_ip_assignment) {
1912 if (lcp2_failback(ctdb, nodemap, mask, all_ips, lcp2_imbalances, newly_healthy)) {
1916 if (basic_failback(ctdb, nodemap, mask, all_ips, num_ips, &retries)) {
1921 /* finished distributing the public addresses, now just send the
1922 info out to the nodes
1926 /* at this point ->pnn is the node which will own each IP
1927 or -1 if there is no node that can cover this ip
1934 make any IP alias changes for public addresses that are necessary
1936 int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap,
1937 client_async_callback fail_callback, void *callback_data)
1940 struct ctdb_public_ip ip;
1941 struct ctdb_public_ipv4 ipv4;
1943 struct ctdb_public_ip_list *all_ips, *tmp_ip;
1945 struct timeval timeout;
1946 struct client_async_data *async_data;
1947 struct ctdb_client_control_state *state;
1948 TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
1951 * ip failover is completely disabled, just send out the
1952 * ipreallocated event.
1954 if (ctdb->tunable.disable_ip_failover != 0) {
1960 /* Do the IP reassignment calculations */
1961 ctdb_takeover_run_core(ctdb, nodemap, &all_ips);
1963 /* now tell all nodes to delete any alias that they should not
1964 have. This will be a NOOP on nodes that don't currently
1965 hold the given alias */
1966 async_data = talloc_zero(tmp_ctx, struct client_async_data);
1967 CTDB_NO_MEMORY_FATAL(ctdb, async_data);
1969 async_data->fail_callback = fail_callback;
1970 async_data->callback_data = callback_data;
1972 for (i=0;i<nodemap->num;i++) {
1973 /* don't talk to unconnected nodes, but do talk to banned nodes */
1974 if (nodemap->nodes[i].flags & NODE_FLAGS_DISCONNECTED) {
1978 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1979 if (tmp_ip->pnn == nodemap->nodes[i].pnn) {
1980 /* This node should be serving this
1981 vnn so dont tell it to release the ip
1985 if (tmp_ip->addr.sa.sa_family == AF_INET) {
1986 ipv4.pnn = tmp_ip->pnn;
1987 ipv4.sin = tmp_ip->addr.ip;
1989 timeout = TAKEOVER_TIMEOUT();
1990 data.dsize = sizeof(ipv4);
1991 data.dptr = (uint8_t *)&ipv4;
1992 state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
1993 0, CTDB_CONTROL_RELEASE_IPv4, 0,
1997 ip.pnn = tmp_ip->pnn;
1998 ip.addr = tmp_ip->addr;
2000 timeout = TAKEOVER_TIMEOUT();
2001 data.dsize = sizeof(ip);
2002 data.dptr = (uint8_t *)&ip;
2003 state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
2004 0, CTDB_CONTROL_RELEASE_IP, 0,
2009 if (state == NULL) {
2010 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_RELEASE_IP to node %u\n", nodemap->nodes[i].pnn));
2011 talloc_free(tmp_ctx);
2015 ctdb_client_async_add(async_data, state);
2018 if (ctdb_client_async_wait(ctdb, async_data) != 0) {
2019 DEBUG(DEBUG_ERR,(__location__ " Async control CTDB_CONTROL_RELEASE_IP failed\n"));
2020 talloc_free(tmp_ctx);
2023 talloc_free(async_data);
2026 /* tell all nodes to get their own IPs */
2027 async_data = talloc_zero(tmp_ctx, struct client_async_data);
2028 CTDB_NO_MEMORY_FATAL(ctdb, async_data);
2030 async_data->fail_callback = fail_callback;
2031 async_data->callback_data = callback_data;
2033 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
2034 if (tmp_ip->pnn == -1) {
2035 /* this IP won't be taken over */
2039 if (tmp_ip->addr.sa.sa_family == AF_INET) {
2040 ipv4.pnn = tmp_ip->pnn;
2041 ipv4.sin = tmp_ip->addr.ip;
2043 timeout = TAKEOVER_TIMEOUT();
2044 data.dsize = sizeof(ipv4);
2045 data.dptr = (uint8_t *)&ipv4;
2046 state = ctdb_control_send(ctdb, tmp_ip->pnn,
2047 0, CTDB_CONTROL_TAKEOVER_IPv4, 0,
2051 ip.pnn = tmp_ip->pnn;
2052 ip.addr = tmp_ip->addr;
2054 timeout = TAKEOVER_TIMEOUT();
2055 data.dsize = sizeof(ip);
2056 data.dptr = (uint8_t *)&ip;
2057 state = ctdb_control_send(ctdb, tmp_ip->pnn,
2058 0, CTDB_CONTROL_TAKEOVER_IP, 0,
2062 if (state == NULL) {
2063 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_TAKEOVER_IP to node %u\n", tmp_ip->pnn));
2064 talloc_free(tmp_ctx);
2068 ctdb_client_async_add(async_data, state);
2070 if (ctdb_client_async_wait(ctdb, async_data) != 0) {
2071 DEBUG(DEBUG_ERR,(__location__ " Async control CTDB_CONTROL_TAKEOVER_IP failed\n"));
2072 talloc_free(tmp_ctx);
2077 /* tell all nodes to update natwg */
2078 /* send the flags update natgw on all connected nodes */
2079 data.dptr = discard_const("ipreallocated");
2080 data.dsize = strlen((char *)data.dptr) + 1;
2081 nodes = list_of_connected_nodes(ctdb, nodemap, tmp_ctx, true);
2082 if (ctdb_client_async_control(ctdb, CTDB_CONTROL_RUN_EVENTSCRIPTS,
2083 nodes, 0, TAKEOVER_TIMEOUT(),
2085 NULL, fail_callback,
2086 callback_data) != 0) {
2087 DEBUG(DEBUG_ERR, (__location__ " ctdb_control to updatenatgw failed\n"));
2090 talloc_free(tmp_ctx);
2096 destroy a ctdb_client_ip structure
2098 static int ctdb_client_ip_destructor(struct ctdb_client_ip *ip)
2100 DEBUG(DEBUG_DEBUG,("destroying client tcp for %s:%u (client_id %u)\n",
2101 ctdb_addr_to_str(&ip->addr),
2102 ntohs(ip->addr.ip.sin_port),
2105 DLIST_REMOVE(ip->ctdb->client_ip_list, ip);
2110 called by a client to inform us of a TCP connection that it is managing
2111 that should tickled with an ACK when IP takeover is done
2112 we handle both the old ipv4 style of packets as well as the new ipv4/6
2115 int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id,
2118 struct ctdb_client *client = ctdb_reqid_find(ctdb, client_id, struct ctdb_client);
2119 struct ctdb_control_tcp *old_addr = NULL;
2120 struct ctdb_control_tcp_addr new_addr;
2121 struct ctdb_control_tcp_addr *tcp_sock = NULL;
2122 struct ctdb_tcp_list *tcp;
2123 struct ctdb_tcp_connection t;
2126 struct ctdb_client_ip *ip;
2127 struct ctdb_vnn *vnn;
2128 ctdb_sock_addr addr;
2130 switch (indata.dsize) {
2131 case sizeof(struct ctdb_control_tcp):
2132 old_addr = (struct ctdb_control_tcp *)indata.dptr;
2133 ZERO_STRUCT(new_addr);
2134 tcp_sock = &new_addr;
2135 tcp_sock->src.ip = old_addr->src;
2136 tcp_sock->dest.ip = old_addr->dest;
2138 case sizeof(struct ctdb_control_tcp_addr):
2139 tcp_sock = (struct ctdb_control_tcp_addr *)indata.dptr;
2142 DEBUG(DEBUG_ERR,(__location__ " Invalid data structure passed "
2143 "to ctdb_control_tcp_client. size was %d but "
2144 "only allowed sizes are %lu and %lu\n",
2146 (long unsigned)sizeof(struct ctdb_control_tcp),
2147 (long unsigned)sizeof(struct ctdb_control_tcp_addr)));
2151 addr = tcp_sock->src;
2152 ctdb_canonicalize_ip(&addr, &tcp_sock->src);
2153 addr = tcp_sock->dest;
2154 ctdb_canonicalize_ip(&addr, &tcp_sock->dest);
2157 memcpy(&addr, &tcp_sock->dest, sizeof(addr));
2158 vnn = find_public_ip_vnn(ctdb, &addr);
2160 switch (addr.sa.sa_family) {
2162 if (ntohl(addr.ip.sin_addr.s_addr) != INADDR_LOOPBACK) {
2163 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public address.\n",
2164 ctdb_addr_to_str(&addr)));
2168 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public ipv6 address.\n",
2169 ctdb_addr_to_str(&addr)));
2172 DEBUG(DEBUG_ERR,(__location__ " Unknown family type %d\n", addr.sa.sa_family));
2178 if (vnn->pnn != ctdb->pnn) {
2179 DEBUG(DEBUG_ERR,("Attempt to register tcp client for IP %s we don't hold - failing (client_id %u pid %u)\n",
2180 ctdb_addr_to_str(&addr),
2181 client_id, client->pid));
2182 /* failing this call will tell smbd to die */
2186 ip = talloc(client, struct ctdb_client_ip);
2187 CTDB_NO_MEMORY(ctdb, ip);
2191 ip->client_id = client_id;
2192 talloc_set_destructor(ip, ctdb_client_ip_destructor);
2193 DLIST_ADD(ctdb->client_ip_list, ip);
2195 tcp = talloc(client, struct ctdb_tcp_list);
2196 CTDB_NO_MEMORY(ctdb, tcp);
2198 tcp->connection.src_addr = tcp_sock->src;
2199 tcp->connection.dst_addr = tcp_sock->dest;
2201 DLIST_ADD(client->tcp_list, tcp);
2203 t.src_addr = tcp_sock->src;
2204 t.dst_addr = tcp_sock->dest;
2206 data.dptr = (uint8_t *)&t;
2207 data.dsize = sizeof(t);
2209 switch (addr.sa.sa_family) {
2211 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
2212 (unsigned)ntohs(tcp_sock->dest.ip.sin_port),
2213 ctdb_addr_to_str(&tcp_sock->src),
2214 (unsigned)ntohs(tcp_sock->src.ip.sin_port), client_id, client->pid));
2217 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
2218 (unsigned)ntohs(tcp_sock->dest.ip6.sin6_port),
2219 ctdb_addr_to_str(&tcp_sock->src),
2220 (unsigned)ntohs(tcp_sock->src.ip6.sin6_port), client_id, client->pid));
2223 DEBUG(DEBUG_ERR,(__location__ " Unknown family %d\n", addr.sa.sa_family));
2227 /* tell all nodes about this tcp connection */
2228 ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0,
2229 CTDB_CONTROL_TCP_ADD,
2230 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
2232 DEBUG(DEBUG_ERR,(__location__ " Failed to send CTDB_CONTROL_TCP_ADD\n"));
2240 find a tcp address on a list
2242 static struct ctdb_tcp_connection *ctdb_tcp_find(struct ctdb_tcp_array *array,
2243 struct ctdb_tcp_connection *tcp)
2247 if (array == NULL) {
2251 for (i=0;i<array->num;i++) {
2252 if (ctdb_same_sockaddr(&array->connections[i].src_addr, &tcp->src_addr) &&
2253 ctdb_same_sockaddr(&array->connections[i].dst_addr, &tcp->dst_addr)) {
2254 return &array->connections[i];
2263 called by a daemon to inform us of a TCP connection that one of its
2264 clients managing that should tickled with an ACK when IP takeover is
2267 int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata, bool tcp_update_needed)
2269 struct ctdb_tcp_connection *p = (struct ctdb_tcp_connection *)indata.dptr;
2270 struct ctdb_tcp_array *tcparray;
2271 struct ctdb_tcp_connection tcp;
2272 struct ctdb_vnn *vnn;
2274 vnn = find_public_ip_vnn(ctdb, &p->dst_addr);
2276 DEBUG(DEBUG_INFO,(__location__ " got TCP_ADD control for an address which is not a public address '%s'\n",
2277 ctdb_addr_to_str(&p->dst_addr)));
2283 tcparray = vnn->tcp_array;
2285 /* If this is the first tickle */
2286 if (tcparray == NULL) {
2287 tcparray = talloc_size(ctdb->nodes,
2288 offsetof(struct ctdb_tcp_array, connections) +
2289 sizeof(struct ctdb_tcp_connection) * 1);
2290 CTDB_NO_MEMORY(ctdb, tcparray);
2291 vnn->tcp_array = tcparray;
2294 tcparray->connections = talloc_size(tcparray, sizeof(struct ctdb_tcp_connection));
2295 CTDB_NO_MEMORY(ctdb, tcparray->connections);
2297 tcparray->connections[tcparray->num].src_addr = p->src_addr;
2298 tcparray->connections[tcparray->num].dst_addr = p->dst_addr;
2301 if (tcp_update_needed) {
2302 vnn->tcp_update_needed = true;
2308 /* Do we already have this tickle ?*/
2309 tcp.src_addr = p->src_addr;
2310 tcp.dst_addr = p->dst_addr;
2311 if (ctdb_tcp_find(vnn->tcp_array, &tcp) != NULL) {
2312 DEBUG(DEBUG_DEBUG,("Already had tickle info for %s:%u for vnn:%u\n",
2313 ctdb_addr_to_str(&tcp.dst_addr),
2314 ntohs(tcp.dst_addr.ip.sin_port),
2319 /* A new tickle, we must add it to the array */
2320 tcparray->connections = talloc_realloc(tcparray, tcparray->connections,
2321 struct ctdb_tcp_connection,
2323 CTDB_NO_MEMORY(ctdb, tcparray->connections);
2325 vnn->tcp_array = tcparray;
2326 tcparray->connections[tcparray->num].src_addr = p->src_addr;
2327 tcparray->connections[tcparray->num].dst_addr = p->dst_addr;
2330 DEBUG(DEBUG_INFO,("Added tickle info for %s:%u from vnn %u\n",
2331 ctdb_addr_to_str(&tcp.dst_addr),
2332 ntohs(tcp.dst_addr.ip.sin_port),
2335 if (tcp_update_needed) {
2336 vnn->tcp_update_needed = true;
2344 called by a daemon to inform us of a TCP connection that one of its
2345 clients managing that should tickled with an ACK when IP takeover is
2348 static void ctdb_remove_tcp_connection(struct ctdb_context *ctdb, struct ctdb_tcp_connection *conn)
2350 struct ctdb_tcp_connection *tcpp;
2351 struct ctdb_vnn *vnn = find_public_ip_vnn(ctdb, &conn->dst_addr);
2354 DEBUG(DEBUG_ERR,(__location__ " unable to find public address %s\n",
2355 ctdb_addr_to_str(&conn->dst_addr)));
2359 /* if the array is empty we cant remove it
2360 and we dont need to do anything
2362 if (vnn->tcp_array == NULL) {
2363 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist (array is empty) %s:%u\n",
2364 ctdb_addr_to_str(&conn->dst_addr),
2365 ntohs(conn->dst_addr.ip.sin_port)));
2370 /* See if we know this connection
2371 if we dont know this connection then we dont need to do anything
2373 tcpp = ctdb_tcp_find(vnn->tcp_array, conn);
2375 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist %s:%u\n",
2376 ctdb_addr_to_str(&conn->dst_addr),
2377 ntohs(conn->dst_addr.ip.sin_port)));
2382 /* We need to remove this entry from the array.
2383 Instead of allocating a new array and copying data to it
2384 we cheat and just copy the last entry in the existing array
2385 to the entry that is to be removed and just shring the
2388 *tcpp = vnn->tcp_array->connections[vnn->tcp_array->num - 1];
2389 vnn->tcp_array->num--;
2391 /* If we deleted the last entry we also need to remove the entire array
2393 if (vnn->tcp_array->num == 0) {
2394 talloc_free(vnn->tcp_array);
2395 vnn->tcp_array = NULL;
2398 vnn->tcp_update_needed = true;
2400 DEBUG(DEBUG_INFO,("Removed tickle info for %s:%u\n",
2401 ctdb_addr_to_str(&conn->src_addr),
2402 ntohs(conn->src_addr.ip.sin_port)));
2407 called by a daemon to inform us of a TCP connection that one of its
2408 clients used are no longer needed in the tickle database
2410 int32_t ctdb_control_tcp_remove(struct ctdb_context *ctdb, TDB_DATA indata)
2412 struct ctdb_tcp_connection *conn = (struct ctdb_tcp_connection *)indata.dptr;
2414 ctdb_remove_tcp_connection(ctdb, conn);
2421 called when a daemon restarts - send all tickes for all public addresses
2422 we are serving immediately to the new node.
2424 int32_t ctdb_control_startup(struct ctdb_context *ctdb, uint32_t vnn)
2426 /*XXX here we should send all tickes we are serving to the new node */
2432 called when a client structure goes away - hook to remove
2433 elements from the tcp_list in all daemons
2435 void ctdb_takeover_client_destructor_hook(struct ctdb_client *client)
2437 while (client->tcp_list) {
2438 struct ctdb_tcp_list *tcp = client->tcp_list;
2439 DLIST_REMOVE(client->tcp_list, tcp);
2440 ctdb_remove_tcp_connection(client->ctdb, &tcp->connection);
2446 release all IPs on shutdown
2448 void ctdb_release_all_ips(struct ctdb_context *ctdb)
2450 struct ctdb_vnn *vnn;
2452 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2453 if (!ctdb_sys_have_ip(&vnn->public_address)) {
2454 ctdb_vnn_unassign_iface(ctdb, vnn);
2460 ctdb_event_script_args(ctdb, CTDB_EVENT_RELEASE_IP, "%s %s %u",
2461 ctdb_vnn_iface_string(vnn),
2462 ctdb_addr_to_str(&vnn->public_address),
2463 vnn->public_netmask_bits);
2464 release_kill_clients(ctdb, &vnn->public_address);
2465 ctdb_vnn_unassign_iface(ctdb, vnn);
2471 get list of public IPs
2473 int32_t ctdb_control_get_public_ips(struct ctdb_context *ctdb,
2474 struct ctdb_req_control *c, TDB_DATA *outdata)
2477 struct ctdb_all_public_ips *ips;
2478 struct ctdb_vnn *vnn;
2479 bool only_available = false;
2481 if (c->flags & CTDB_PUBLIC_IP_FLAGS_ONLY_AVAILABLE) {
2482 only_available = true;
2485 /* count how many public ip structures we have */
2487 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2491 len = offsetof(struct ctdb_all_public_ips, ips) +
2492 num*sizeof(struct ctdb_public_ip);
2493 ips = talloc_zero_size(outdata, len);
2494 CTDB_NO_MEMORY(ctdb, ips);
2497 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2498 if (only_available && !ctdb_vnn_available(ctdb, vnn)) {
2501 ips->ips[i].pnn = vnn->pnn;
2502 ips->ips[i].addr = vnn->public_address;
2506 len = offsetof(struct ctdb_all_public_ips, ips) +
2507 i*sizeof(struct ctdb_public_ip);
2509 outdata->dsize = len;
2510 outdata->dptr = (uint8_t *)ips;
2517 get list of public IPs, old ipv4 style. only returns ipv4 addresses
2519 int32_t ctdb_control_get_public_ipsv4(struct ctdb_context *ctdb,
2520 struct ctdb_req_control *c, TDB_DATA *outdata)
2523 struct ctdb_all_public_ipsv4 *ips;
2524 struct ctdb_vnn *vnn;
2526 /* count how many public ip structures we have */
2528 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2529 if (vnn->public_address.sa.sa_family != AF_INET) {
2535 len = offsetof(struct ctdb_all_public_ipsv4, ips) +
2536 num*sizeof(struct ctdb_public_ipv4);
2537 ips = talloc_zero_size(outdata, len);
2538 CTDB_NO_MEMORY(ctdb, ips);
2540 outdata->dsize = len;
2541 outdata->dptr = (uint8_t *)ips;
2545 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2546 if (vnn->public_address.sa.sa_family != AF_INET) {
2549 ips->ips[i].pnn = vnn->pnn;
2550 ips->ips[i].sin = vnn->public_address.ip;
2557 int32_t ctdb_control_get_public_ip_info(struct ctdb_context *ctdb,
2558 struct ctdb_req_control *c,
2563 ctdb_sock_addr *addr;
2564 struct ctdb_control_public_ip_info *info;
2565 struct ctdb_vnn *vnn;
2567 addr = (ctdb_sock_addr *)indata.dptr;
2569 vnn = find_public_ip_vnn(ctdb, addr);
2571 /* if it is not a public ip it could be our 'single ip' */
2572 if (ctdb->single_ip_vnn) {
2573 if (ctdb_same_ip(&ctdb->single_ip_vnn->public_address, addr)) {
2574 vnn = ctdb->single_ip_vnn;
2579 DEBUG(DEBUG_ERR,(__location__ " Could not get public ip info, "
2580 "'%s'not a public address\n",
2581 ctdb_addr_to_str(addr)));
2585 /* count how many public ip structures we have */
2587 for (;vnn->ifaces[num];) {
2591 len = offsetof(struct ctdb_control_public_ip_info, ifaces) +
2592 num*sizeof(struct ctdb_control_iface_info);
2593 info = talloc_zero_size(outdata, len);
2594 CTDB_NO_MEMORY(ctdb, info);
2596 info->ip.addr = vnn->public_address;
2597 info->ip.pnn = vnn->pnn;
2598 info->active_idx = 0xFFFFFFFF;
2600 for (i=0; vnn->ifaces[i]; i++) {
2601 struct ctdb_iface *cur;
2603 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
2605 DEBUG(DEBUG_CRIT, (__location__ " internal error iface[%s] unknown\n",
2609 if (vnn->iface == cur) {
2610 info->active_idx = i;
2612 strcpy(info->ifaces[i].name, cur->name);
2613 info->ifaces[i].link_state = cur->link_up;
2614 info->ifaces[i].references = cur->references;
2617 len = offsetof(struct ctdb_control_public_ip_info, ifaces) +
2618 i*sizeof(struct ctdb_control_iface_info);
2620 outdata->dsize = len;
2621 outdata->dptr = (uint8_t *)info;
2626 int32_t ctdb_control_get_ifaces(struct ctdb_context *ctdb,
2627 struct ctdb_req_control *c,
2631 struct ctdb_control_get_ifaces *ifaces;
2632 struct ctdb_iface *cur;
2634 /* count how many public ip structures we have */
2636 for (cur=ctdb->ifaces;cur;cur=cur->next) {
2640 len = offsetof(struct ctdb_control_get_ifaces, ifaces) +
2641 num*sizeof(struct ctdb_control_iface_info);
2642 ifaces = talloc_zero_size(outdata, len);
2643 CTDB_NO_MEMORY(ctdb, ifaces);
2646 for (cur=ctdb->ifaces;cur;cur=cur->next) {
2647 strcpy(ifaces->ifaces[i].name, cur->name);
2648 ifaces->ifaces[i].link_state = cur->link_up;
2649 ifaces->ifaces[i].references = cur->references;
2653 len = offsetof(struct ctdb_control_get_ifaces, ifaces) +
2654 i*sizeof(struct ctdb_control_iface_info);
2656 outdata->dsize = len;
2657 outdata->dptr = (uint8_t *)ifaces;
2662 int32_t ctdb_control_set_iface_link(struct ctdb_context *ctdb,
2663 struct ctdb_req_control *c,
2666 struct ctdb_control_iface_info *info;
2667 struct ctdb_iface *iface;
2668 bool link_up = false;
2670 info = (struct ctdb_control_iface_info *)indata.dptr;
2672 if (info->name[CTDB_IFACE_SIZE] != '\0') {
2673 int len = strnlen(info->name, CTDB_IFACE_SIZE);
2674 DEBUG(DEBUG_ERR, (__location__ " name[%*.*s] not terminated\n",
2675 len, len, info->name));
2679 switch (info->link_state) {
2687 DEBUG(DEBUG_ERR, (__location__ " link_state[%u] invalid\n",
2688 (unsigned int)info->link_state));
2692 if (info->references != 0) {
2693 DEBUG(DEBUG_ERR, (__location__ " references[%u] should be 0\n",
2694 (unsigned int)info->references));
2698 iface = ctdb_find_iface(ctdb, info->name);
2699 if (iface == NULL) {
2703 if (link_up == iface->link_up) {
2707 DEBUG(iface->link_up?DEBUG_ERR:DEBUG_NOTICE,
2708 ("iface[%s] has changed it's link status %s => %s\n",
2710 iface->link_up?"up":"down",
2711 link_up?"up":"down"));
2713 iface->link_up = link_up;
2719 structure containing the listening socket and the list of tcp connections
2720 that the ctdb daemon is to kill
2722 struct ctdb_kill_tcp {
2723 struct ctdb_vnn *vnn;
2724 struct ctdb_context *ctdb;
2726 struct fd_event *fde;
2727 trbt_tree_t *connections;
2732 a tcp connection that is to be killed
2734 struct ctdb_killtcp_con {
2735 ctdb_sock_addr src_addr;
2736 ctdb_sock_addr dst_addr;
2738 struct ctdb_kill_tcp *killtcp;
2741 /* this function is used to create a key to represent this socketpair
2742 in the killtcp tree.
2743 this key is used to insert and lookup matching socketpairs that are
2744 to be tickled and RST
2746 #define KILLTCP_KEYLEN 10
2747 static uint32_t *killtcp_key(ctdb_sock_addr *src, ctdb_sock_addr *dst)
2749 static uint32_t key[KILLTCP_KEYLEN];
2751 bzero(key, sizeof(key));
2753 if (src->sa.sa_family != dst->sa.sa_family) {
2754 DEBUG(DEBUG_ERR, (__location__ " ERROR, different families passed :%u vs %u\n", src->sa.sa_family, dst->sa.sa_family));
2758 switch (src->sa.sa_family) {
2760 key[0] = dst->ip.sin_addr.s_addr;
2761 key[1] = src->ip.sin_addr.s_addr;
2762 key[2] = dst->ip.sin_port;
2763 key[3] = src->ip.sin_port;
2766 key[0] = dst->ip6.sin6_addr.s6_addr32[3];
2767 key[1] = src->ip6.sin6_addr.s6_addr32[3];
2768 key[2] = dst->ip6.sin6_addr.s6_addr32[2];
2769 key[3] = src->ip6.sin6_addr.s6_addr32[2];
2770 key[4] = dst->ip6.sin6_addr.s6_addr32[1];
2771 key[5] = src->ip6.sin6_addr.s6_addr32[1];
2772 key[6] = dst->ip6.sin6_addr.s6_addr32[0];
2773 key[7] = src->ip6.sin6_addr.s6_addr32[0];
2774 key[8] = dst->ip6.sin6_port;
2775 key[9] = src->ip6.sin6_port;
2778 DEBUG(DEBUG_ERR, (__location__ " ERROR, unknown family passed :%u\n", src->sa.sa_family));
2786 called when we get a read event on the raw socket
2788 static void capture_tcp_handler(struct event_context *ev, struct fd_event *fde,
2789 uint16_t flags, void *private_data)
2791 struct ctdb_kill_tcp *killtcp = talloc_get_type(private_data, struct ctdb_kill_tcp);
2792 struct ctdb_killtcp_con *con;
2793 ctdb_sock_addr src, dst;
2794 uint32_t ack_seq, seq;
2796 if (!(flags & EVENT_FD_READ)) {
2800 if (ctdb_sys_read_tcp_packet(killtcp->capture_fd,
2801 killtcp->private_data,
2803 &ack_seq, &seq) != 0) {
2804 /* probably a non-tcp ACK packet */
2808 /* check if we have this guy in our list of connections
2811 con = trbt_lookuparray32(killtcp->connections,
2812 KILLTCP_KEYLEN, killtcp_key(&src, &dst));
2814 /* no this was some other packet we can just ignore */
2818 /* This one has been tickled !
2819 now reset him and remove him from the list.
2821 DEBUG(DEBUG_INFO, ("sending a tcp reset to kill connection :%d -> %s:%d\n",
2822 ntohs(con->dst_addr.ip.sin_port),
2823 ctdb_addr_to_str(&con->src_addr),
2824 ntohs(con->src_addr.ip.sin_port)));
2826 ctdb_sys_send_tcp(&con->dst_addr, &con->src_addr, ack_seq, seq, 1);
2831 /* when traversing the list of all tcp connections to send tickle acks to
2832 (so that we can capture the ack coming back and kill the connection
2834 this callback is called for each connection we are currently trying to kill
2836 static void tickle_connection_traverse(void *param, void *data)
2838 struct ctdb_killtcp_con *con = talloc_get_type(data, struct ctdb_killtcp_con);
2840 /* have tried too many times, just give up */
2841 if (con->count >= 5) {
2842 /* can't delete in traverse: reparent to delete_cons */
2843 talloc_steal(param, con);
2847 /* othervise, try tickling it again */
2850 (ctdb_sock_addr *)&con->dst_addr,
2851 (ctdb_sock_addr *)&con->src_addr,
2857 called every second until all sentenced connections have been reset
2859 static void ctdb_tickle_sentenced_connections(struct event_context *ev, struct timed_event *te,
2860 struct timeval t, void *private_data)
2862 struct ctdb_kill_tcp *killtcp = talloc_get_type(private_data, struct ctdb_kill_tcp);
2863 void *delete_cons = talloc_new(NULL);
2865 /* loop over all connections sending tickle ACKs */
2866 trbt_traversearray32(killtcp->connections, KILLTCP_KEYLEN, tickle_connection_traverse, delete_cons);
2868 /* now we've finished traverse, it's safe to do deletion. */
2869 talloc_free(delete_cons);
2871 /* If there are no more connections to kill we can remove the
2872 entire killtcp structure
2874 if ( (killtcp->connections == NULL) ||
2875 (killtcp->connections->root == NULL) ) {
2876 talloc_free(killtcp);
2880 /* try tickling them again in a seconds time
2882 event_add_timed(killtcp->ctdb->ev, killtcp, timeval_current_ofs(1, 0),
2883 ctdb_tickle_sentenced_connections, killtcp);
2887 destroy the killtcp structure
2889 static int ctdb_killtcp_destructor(struct ctdb_kill_tcp *killtcp)
2891 struct ctdb_vnn *tmpvnn;
2893 /* verify that this vnn is still active */
2894 for (tmpvnn = killtcp->ctdb->vnn; tmpvnn; tmpvnn = tmpvnn->next) {
2895 if (tmpvnn == killtcp->vnn) {
2900 if (tmpvnn == NULL) {
2904 if (killtcp->vnn->killtcp != killtcp) {
2908 killtcp->vnn->killtcp = NULL;
2914 /* nothing fancy here, just unconditionally replace any existing
2915 connection structure with the new one.
2917 dont even free the old one if it did exist, that one is talloc_stolen
2918 by the same node in the tree anyway and will be deleted when the new data
2921 static void *add_killtcp_callback(void *parm, void *data)
2927 add a tcp socket to the list of connections we want to RST
2929 static int ctdb_killtcp_add_connection(struct ctdb_context *ctdb,
2933 ctdb_sock_addr src, dst;
2934 struct ctdb_kill_tcp *killtcp;
2935 struct ctdb_killtcp_con *con;
2936 struct ctdb_vnn *vnn;
2938 ctdb_canonicalize_ip(s, &src);
2939 ctdb_canonicalize_ip(d, &dst);
2941 vnn = find_public_ip_vnn(ctdb, &dst);
2943 vnn = find_public_ip_vnn(ctdb, &src);
2946 /* if it is not a public ip it could be our 'single ip' */
2947 if (ctdb->single_ip_vnn) {
2948 if (ctdb_same_ip(&ctdb->single_ip_vnn->public_address, &dst)) {
2949 vnn = ctdb->single_ip_vnn;
2954 DEBUG(DEBUG_ERR,(__location__ " Could not killtcp, not a public address\n"));
2958 killtcp = vnn->killtcp;
2960 /* If this is the first connection to kill we must allocate
2963 if (killtcp == NULL) {
2964 killtcp = talloc_zero(vnn, struct ctdb_kill_tcp);
2965 CTDB_NO_MEMORY(ctdb, killtcp);
2968 killtcp->ctdb = ctdb;
2969 killtcp->capture_fd = -1;
2970 killtcp->connections = trbt_create(killtcp, 0);
2972 vnn->killtcp = killtcp;
2973 talloc_set_destructor(killtcp, ctdb_killtcp_destructor);
2978 /* create a structure that describes this connection we want to
2979 RST and store it in killtcp->connections
2981 con = talloc(killtcp, struct ctdb_killtcp_con);
2982 CTDB_NO_MEMORY(ctdb, con);
2983 con->src_addr = src;
2984 con->dst_addr = dst;
2986 con->killtcp = killtcp;
2989 trbt_insertarray32_callback(killtcp->connections,
2990 KILLTCP_KEYLEN, killtcp_key(&con->dst_addr, &con->src_addr),
2991 add_killtcp_callback, con);
2994 If we dont have a socket to listen on yet we must create it
2996 if (killtcp->capture_fd == -1) {
2997 const char *iface = ctdb_vnn_iface_string(vnn);
2998 killtcp->capture_fd = ctdb_sys_open_capture_socket(iface, &killtcp->private_data);
2999 if (killtcp->capture_fd == -1) {
3000 DEBUG(DEBUG_CRIT,(__location__ " Failed to open capturing "
3001 "socket on iface '%s' for killtcp (%s)\n",
3002 iface, strerror(errno)));
3008 if (killtcp->fde == NULL) {
3009 killtcp->fde = event_add_fd(ctdb->ev, killtcp, killtcp->capture_fd,
3011 capture_tcp_handler, killtcp);
3012 tevent_fd_set_auto_close(killtcp->fde);
3014 /* We also need to set up some events to tickle all these connections
3015 until they are all reset
3017 event_add_timed(ctdb->ev, killtcp, timeval_current_ofs(1, 0),
3018 ctdb_tickle_sentenced_connections, killtcp);
3021 /* tickle him once now */
3030 talloc_free(vnn->killtcp);
3031 vnn->killtcp = NULL;
3036 kill a TCP connection.
3038 int32_t ctdb_control_kill_tcp(struct ctdb_context *ctdb, TDB_DATA indata)
3040 struct ctdb_control_killtcp *killtcp = (struct ctdb_control_killtcp *)indata.dptr;
3042 return ctdb_killtcp_add_connection(ctdb, &killtcp->src_addr, &killtcp->dst_addr);
3046 called by a daemon to inform us of the entire list of TCP tickles for
3047 a particular public address.
3048 this control should only be sent by the node that is currently serving
3049 that public address.
3051 int32_t ctdb_control_set_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata)
3053 struct ctdb_control_tcp_tickle_list *list = (struct ctdb_control_tcp_tickle_list *)indata.dptr;
3054 struct ctdb_tcp_array *tcparray;
3055 struct ctdb_vnn *vnn;
3057 /* We must at least have tickles.num or else we cant verify the size
3058 of the received data blob
3060 if (indata.dsize < offsetof(struct ctdb_control_tcp_tickle_list,
3061 tickles.connections)) {
3062 DEBUG(DEBUG_ERR,("Bad indata in ctdb_control_set_tcp_tickle_list. Not enough data for the tickle.num field\n"));
3066 /* verify that the size of data matches what we expect */
3067 if (indata.dsize < offsetof(struct ctdb_control_tcp_tickle_list,
3068 tickles.connections)
3069 + sizeof(struct ctdb_tcp_connection)
3070 * list->tickles.num) {
3071 DEBUG(DEBUG_ERR,("Bad indata in ctdb_control_set_tcp_tickle_list\n"));
3075 vnn = find_public_ip_vnn(ctdb, &list->addr);
3077 DEBUG(DEBUG_INFO,(__location__ " Could not set tcp tickle list, '%s' is not a public address\n",
3078 ctdb_addr_to_str(&list->addr)));
3083 /* remove any old ticklelist we might have */
3084 talloc_free(vnn->tcp_array);
3085 vnn->tcp_array = NULL;
3087 tcparray = talloc(ctdb->nodes, struct ctdb_tcp_array);
3088 CTDB_NO_MEMORY(ctdb, tcparray);
3090 tcparray->num = list->tickles.num;
3092 tcparray->connections = talloc_array(tcparray, struct ctdb_tcp_connection, tcparray->num);
3093 CTDB_NO_MEMORY(ctdb, tcparray->connections);
3095 memcpy(tcparray->connections, &list->tickles.connections[0],
3096 sizeof(struct ctdb_tcp_connection)*tcparray->num);
3098 /* We now have a new fresh tickle list array for this vnn */
3099 vnn->tcp_array = talloc_steal(vnn, tcparray);
3105 called to return the full list of tickles for the puclic address associated
3106 with the provided vnn
3108 int32_t ctdb_control_get_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata)
3110 ctdb_sock_addr *addr = (ctdb_sock_addr *)indata.dptr;
3111 struct ctdb_control_tcp_tickle_list *list;
3112 struct ctdb_tcp_array *tcparray;
3114 struct ctdb_vnn *vnn;
3116 vnn = find_public_ip_vnn(ctdb, addr);
3118 DEBUG(DEBUG_ERR,(__location__ " Could not get tcp tickle list, '%s' is not a public address\n",
3119 ctdb_addr_to_str(addr)));
3124 tcparray = vnn->tcp_array;
3126 num = tcparray->num;
3131 outdata->dsize = offsetof(struct ctdb_control_tcp_tickle_list,
3132 tickles.connections)
3133 + sizeof(struct ctdb_tcp_connection) * num;
3135 outdata->dptr = talloc_size(outdata, outdata->dsize);
3136 CTDB_NO_MEMORY(ctdb, outdata->dptr);
3137 list = (struct ctdb_control_tcp_tickle_list *)outdata->dptr;
3140 list->tickles.num = num;
3142 memcpy(&list->tickles.connections[0], tcparray->connections,
3143 sizeof(struct ctdb_tcp_connection) * num);
3151 set the list of all tcp tickles for a public address
3153 static int ctdb_ctrl_set_tcp_tickles(struct ctdb_context *ctdb,
3154 struct timeval timeout, uint32_t destnode,
3155 ctdb_sock_addr *addr,
3156 struct ctdb_tcp_array *tcparray)
3160 struct ctdb_control_tcp_tickle_list *list;
3163 num = tcparray->num;
3168 data.dsize = offsetof(struct ctdb_control_tcp_tickle_list,
3169 tickles.connections) +
3170 sizeof(struct ctdb_tcp_connection) * num;
3171 data.dptr = talloc_size(ctdb, data.dsize);
3172 CTDB_NO_MEMORY(ctdb, data.dptr);
3174 list = (struct ctdb_control_tcp_tickle_list *)data.dptr;
3176 list->tickles.num = num;
3178 memcpy(&list->tickles.connections[0], tcparray->connections, sizeof(struct ctdb_tcp_connection) * num);
3181 ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0,
3182 CTDB_CONTROL_SET_TCP_TICKLE_LIST,
3183 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
3185 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set tcp tickles failed\n"));
3189 talloc_free(data.dptr);
3196 perform tickle updates if required
3198 static void ctdb_update_tcp_tickles(struct event_context *ev,
3199 struct timed_event *te,
3200 struct timeval t, void *private_data)
3202 struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
3204 struct ctdb_vnn *vnn;
3206 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
3207 /* we only send out updates for public addresses that
3210 if (ctdb->pnn != vnn->pnn) {
3213 /* We only send out the updates if we need to */
3214 if (!vnn->tcp_update_needed) {
3217 ret = ctdb_ctrl_set_tcp_tickles(ctdb,
3219 CTDB_BROADCAST_CONNECTED,
3220 &vnn->public_address,
3223 DEBUG(DEBUG_ERR,("Failed to send the tickle update for public address %s\n",
3224 ctdb_addr_to_str(&vnn->public_address)));
3228 event_add_timed(ctdb->ev, ctdb->tickle_update_context,
3229 timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
3230 ctdb_update_tcp_tickles, ctdb);
3235 start periodic update of tcp tickles
3237 void ctdb_start_tcp_tickle_update(struct ctdb_context *ctdb)
3239 ctdb->tickle_update_context = talloc_new(ctdb);
3241 event_add_timed(ctdb->ev, ctdb->tickle_update_context,
3242 timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
3243 ctdb_update_tcp_tickles, ctdb);
3249 struct control_gratious_arp {
3250 struct ctdb_context *ctdb;
3251 ctdb_sock_addr addr;
3257 send a control_gratuitous arp
3259 static void send_gratious_arp(struct event_context *ev, struct timed_event *te,
3260 struct timeval t, void *private_data)
3263 struct control_gratious_arp *arp = talloc_get_type(private_data,
3264 struct control_gratious_arp);
3266 ret = ctdb_sys_send_arp(&arp->addr, arp->iface);
3268 DEBUG(DEBUG_ERR,(__location__ " sending of gratious arp on iface '%s' failed (%s)\n",
3269 arp->iface, strerror(errno)));
3274 if (arp->count == CTDB_ARP_REPEAT) {
3279 event_add_timed(arp->ctdb->ev, arp,
3280 timeval_current_ofs(CTDB_ARP_INTERVAL, 0),
3281 send_gratious_arp, arp);
3288 int32_t ctdb_control_send_gratious_arp(struct ctdb_context *ctdb, TDB_DATA indata)
3290 struct ctdb_control_gratious_arp *gratious_arp = (struct ctdb_control_gratious_arp *)indata.dptr;
3291 struct control_gratious_arp *arp;
3293 /* verify the size of indata */
3294 if (indata.dsize < offsetof(struct ctdb_control_gratious_arp, iface)) {
3295 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_gratious_arp structure. Got %u require %u bytes\n",
3296 (unsigned)indata.dsize,
3297 (unsigned)offsetof(struct ctdb_control_gratious_arp, iface)));
3301 ( offsetof(struct ctdb_control_gratious_arp, iface)
3302 + gratious_arp->len ) ){
3304 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
3305 "but should be %u bytes\n",
3306 (unsigned)indata.dsize,
3307 (unsigned)(offsetof(struct ctdb_control_gratious_arp, iface)+gratious_arp->len)));
3312 arp = talloc(ctdb, struct control_gratious_arp);
3313 CTDB_NO_MEMORY(ctdb, arp);
3316 arp->addr = gratious_arp->addr;
3317 arp->iface = talloc_strdup(arp, gratious_arp->iface);
3318 CTDB_NO_MEMORY(ctdb, arp->iface);
3321 event_add_timed(arp->ctdb->ev, arp,
3322 timeval_zero(), send_gratious_arp, arp);
3327 int32_t ctdb_control_add_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
3329 struct ctdb_control_ip_iface *pub = (struct ctdb_control_ip_iface *)indata.dptr;
3332 /* verify the size of indata */
3333 if (indata.dsize < offsetof(struct ctdb_control_ip_iface, iface)) {
3334 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_ip_iface structure\n"));
3338 ( offsetof(struct ctdb_control_ip_iface, iface)
3341 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
3342 "but should be %u bytes\n",
3343 (unsigned)indata.dsize,
3344 (unsigned)(offsetof(struct ctdb_control_ip_iface, iface)+pub->len)));
3348 ret = ctdb_add_public_address(ctdb, &pub->addr, pub->mask, &pub->iface[0]);
3351 DEBUG(DEBUG_ERR,(__location__ " Failed to add public address\n"));
3359 called when releaseip event finishes for del_public_address
3361 static void delete_ip_callback(struct ctdb_context *ctdb, int status,
3364 talloc_free(private_data);
3367 int32_t ctdb_control_del_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
3369 struct ctdb_control_ip_iface *pub = (struct ctdb_control_ip_iface *)indata.dptr;
3370 struct ctdb_vnn *vnn;
3373 /* verify the size of indata */
3374 if (indata.dsize < offsetof(struct ctdb_control_ip_iface, iface)) {
3375 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_ip_iface structure\n"));
3379 ( offsetof(struct ctdb_control_ip_iface, iface)
3382 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
3383 "but should be %u bytes\n",
3384 (unsigned)indata.dsize,
3385 (unsigned)(offsetof(struct ctdb_control_ip_iface, iface)+pub->len)));
3389 /* walk over all public addresses until we find a match */
3390 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
3391 if (ctdb_same_ip(&vnn->public_address, &pub->addr)) {
3392 TALLOC_CTX *mem_ctx;
3394 DLIST_REMOVE(ctdb->vnn, vnn);
3395 if (vnn->pnn != ctdb->pnn) {
3396 if (vnn->iface != NULL) {
3397 ctdb_vnn_unassign_iface(ctdb, vnn);
3404 mem_ctx = talloc_new(ctdb);
3405 talloc_steal(mem_ctx, vnn);
3406 ret = ctdb_event_script_callback(ctdb,
3407 mem_ctx, delete_ip_callback, mem_ctx,
3409 CTDB_EVENT_RELEASE_IP,
3411 ctdb_vnn_iface_string(vnn),
3412 ctdb_addr_to_str(&vnn->public_address),
3413 vnn->public_netmask_bits);
3414 if (vnn->iface != NULL) {
3415 ctdb_vnn_unassign_iface(ctdb, vnn);
3427 /* This function is called from the recovery daemon to verify that a remote
3428 node has the expected ip allocation.
3429 This is verified against ctdb->ip_tree
3431 int verify_remote_ip_allocation(struct ctdb_context *ctdb, struct ctdb_all_public_ips *ips)
3433 struct ctdb_public_ip_list *tmp_ip;
3436 if (ctdb->ip_tree == NULL) {
3437 /* dont know the expected allocation yet, assume remote node
3446 for (i=0; i<ips->num; i++) {
3447 tmp_ip = trbt_lookuparray32(ctdb->ip_tree, IP_KEYLEN, ip_key(&ips->ips[i].addr));
3448 if (tmp_ip == NULL) {
3449 DEBUG(DEBUG_ERR,(__location__ " Could not find host for address %s, reassign ips\n", ctdb_addr_to_str(&ips->ips[i].addr)));
3453 if (tmp_ip->pnn == -1 || ips->ips[i].pnn == -1) {
3457 if (tmp_ip->pnn != ips->ips[i].pnn) {
3458 DEBUG(DEBUG_ERR,("Inconsistent ip allocation. Trigger reallocation. Thinks %s is held by node %u while it is held by node %u\n", ctdb_addr_to_str(&ips->ips[i].addr), ips->ips[i].pnn, tmp_ip->pnn));
3466 int update_ip_assignment_tree(struct ctdb_context *ctdb, struct ctdb_public_ip *ip)
3468 struct ctdb_public_ip_list *tmp_ip;
3470 if (ctdb->ip_tree == NULL) {
3471 DEBUG(DEBUG_ERR,("No ctdb->ip_tree yet. Failed to update ip assignment\n"));
3475 tmp_ip = trbt_lookuparray32(ctdb->ip_tree, IP_KEYLEN, ip_key(&ip->addr));
3476 if (tmp_ip == NULL) {
3477 DEBUG(DEBUG_ERR,(__location__ " Could not find record for address %s, update ip\n", ctdb_addr_to_str(&ip->addr)));
3481 DEBUG(DEBUG_NOTICE,("Updated ip assignment tree for ip : %s from node %u to node %u\n", ctdb_addr_to_str(&ip->addr), tmp_ip->pnn, ip->pnn));
3482 tmp_ip->pnn = ip->pnn;