4 Copyright (C) Ronnie Sahlberg 2007
5 Copyright (C) Andrew Tridgell 2007
6 Copyright (C) Martin Schwenke 2011
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, see <http://www.gnu.org/licenses/>.
22 #include "lib/tevent/tevent.h"
23 #include "lib/tdb/include/tdb.h"
24 #include "lib/util/dlinklist.h"
25 #include "system/network.h"
26 #include "system/filesys.h"
27 #include "system/wait.h"
28 #include "../include/ctdb_private.h"
29 #include "../common/rb_tree.h"
32 #define TAKEOVER_TIMEOUT() timeval_current_ofs(ctdb->tunable.takeover_timeout,0)
34 #define CTDB_ARP_INTERVAL 1
35 #define CTDB_ARP_REPEAT 3
38 struct ctdb_iface *prev, *next;
44 static const char *ctdb_vnn_iface_string(const struct ctdb_vnn *vnn)
47 return vnn->iface->name;
53 static int ctdb_add_local_iface(struct ctdb_context *ctdb, const char *iface)
57 /* Verify that we dont have an entry for this ip yet */
58 for (i=ctdb->ifaces;i;i=i->next) {
59 if (strcmp(i->name, iface) == 0) {
64 /* create a new structure for this interface */
65 i = talloc_zero(ctdb, struct ctdb_iface);
66 CTDB_NO_MEMORY_FATAL(ctdb, i);
67 i->name = talloc_strdup(i, iface);
68 CTDB_NO_MEMORY(ctdb, i->name);
71 DLIST_ADD(ctdb->ifaces, i);
76 static struct ctdb_iface *ctdb_find_iface(struct ctdb_context *ctdb,
81 /* Verify that we dont have an entry for this ip yet */
82 for (i=ctdb->ifaces;i;i=i->next) {
83 if (strcmp(i->name, iface) == 0) {
91 static struct ctdb_iface *ctdb_vnn_best_iface(struct ctdb_context *ctdb,
95 struct ctdb_iface *cur = NULL;
96 struct ctdb_iface *best = NULL;
98 for (i=0; vnn->ifaces[i]; i++) {
100 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
114 if (cur->references < best->references) {
123 static int32_t ctdb_vnn_assign_iface(struct ctdb_context *ctdb,
124 struct ctdb_vnn *vnn)
126 struct ctdb_iface *best = NULL;
129 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
130 "still assigned to iface '%s'\n",
131 ctdb_addr_to_str(&vnn->public_address),
132 ctdb_vnn_iface_string(vnn)));
136 best = ctdb_vnn_best_iface(ctdb, vnn);
138 DEBUG(DEBUG_ERR, (__location__ " public address '%s' "
139 "cannot assign to iface any iface\n",
140 ctdb_addr_to_str(&vnn->public_address)));
146 vnn->pnn = ctdb->pnn;
148 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
149 "now assigned to iface '%s' refs[%d]\n",
150 ctdb_addr_to_str(&vnn->public_address),
151 ctdb_vnn_iface_string(vnn),
156 static void ctdb_vnn_unassign_iface(struct ctdb_context *ctdb,
157 struct ctdb_vnn *vnn)
159 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
160 "now unassigned (old iface '%s' refs[%d])\n",
161 ctdb_addr_to_str(&vnn->public_address),
162 ctdb_vnn_iface_string(vnn),
163 vnn->iface?vnn->iface->references:0));
165 vnn->iface->references--;
168 if (vnn->pnn == ctdb->pnn) {
173 static bool ctdb_vnn_available(struct ctdb_context *ctdb,
174 struct ctdb_vnn *vnn)
178 if (vnn->iface && vnn->iface->link_up) {
182 for (i=0; vnn->ifaces[i]; i++) {
183 struct ctdb_iface *cur;
185 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
198 struct ctdb_takeover_arp {
199 struct ctdb_context *ctdb;
202 struct ctdb_tcp_array *tcparray;
203 struct ctdb_vnn *vnn;
208 lists of tcp endpoints
210 struct ctdb_tcp_list {
211 struct ctdb_tcp_list *prev, *next;
212 struct ctdb_tcp_connection connection;
216 list of clients to kill on IP release
218 struct ctdb_client_ip {
219 struct ctdb_client_ip *prev, *next;
220 struct ctdb_context *ctdb;
227 send a gratuitous arp
229 static void ctdb_control_send_arp(struct event_context *ev, struct timed_event *te,
230 struct timeval t, void *private_data)
232 struct ctdb_takeover_arp *arp = talloc_get_type(private_data,
233 struct ctdb_takeover_arp);
235 struct ctdb_tcp_array *tcparray;
236 const char *iface = ctdb_vnn_iface_string(arp->vnn);
238 ret = ctdb_sys_send_arp(&arp->addr, iface);
240 DEBUG(DEBUG_CRIT,(__location__ " sending of arp failed on iface '%s' (%s)\n",
241 iface, strerror(errno)));
244 tcparray = arp->tcparray;
246 for (i=0;i<tcparray->num;i++) {
247 struct ctdb_tcp_connection *tcon;
249 tcon = &tcparray->connections[i];
250 DEBUG(DEBUG_INFO,("sending tcp tickle ack for %u->%s:%u\n",
251 (unsigned)ntohs(tcon->dst_addr.ip.sin_port),
252 ctdb_addr_to_str(&tcon->src_addr),
253 (unsigned)ntohs(tcon->src_addr.ip.sin_port)));
254 ret = ctdb_sys_send_tcp(
259 DEBUG(DEBUG_CRIT,(__location__ " Failed to send tcp tickle ack for %s\n",
260 ctdb_addr_to_str(&tcon->src_addr)));
267 if (arp->count == CTDB_ARP_REPEAT) {
272 event_add_timed(arp->ctdb->ev, arp->vnn->takeover_ctx,
273 timeval_current_ofs(CTDB_ARP_INTERVAL, 100000),
274 ctdb_control_send_arp, arp);
277 static int32_t ctdb_announce_vnn_iface(struct ctdb_context *ctdb,
278 struct ctdb_vnn *vnn)
280 struct ctdb_takeover_arp *arp;
281 struct ctdb_tcp_array *tcparray;
283 if (!vnn->takeover_ctx) {
284 vnn->takeover_ctx = talloc_new(vnn);
285 if (!vnn->takeover_ctx) {
290 arp = talloc_zero(vnn->takeover_ctx, struct ctdb_takeover_arp);
296 arp->addr = vnn->public_address;
299 tcparray = vnn->tcp_array;
301 /* add all of the known tcp connections for this IP to the
302 list of tcp connections to send tickle acks for */
303 arp->tcparray = talloc_steal(arp, tcparray);
305 vnn->tcp_array = NULL;
306 vnn->tcp_update_needed = true;
309 event_add_timed(arp->ctdb->ev, vnn->takeover_ctx,
310 timeval_zero(), ctdb_control_send_arp, arp);
315 struct takeover_callback_state {
316 struct ctdb_req_control *c;
317 ctdb_sock_addr *addr;
318 struct ctdb_vnn *vnn;
321 struct ctdb_do_takeip_state {
322 struct ctdb_req_control *c;
323 struct ctdb_vnn *vnn;
327 called when takeip event finishes
329 static void ctdb_do_takeip_callback(struct ctdb_context *ctdb, int status,
332 struct ctdb_do_takeip_state *state =
333 talloc_get_type(private_data, struct ctdb_do_takeip_state);
338 struct ctdb_node *node = ctdb->nodes[ctdb->pnn];
340 if (status == -ETIME) {
343 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
344 ctdb_addr_to_str(&state->vnn->public_address),
345 ctdb_vnn_iface_string(state->vnn)));
346 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
348 node->flags |= NODE_FLAGS_UNHEALTHY;
353 ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
355 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
360 data.dptr = (uint8_t *)ctdb_addr_to_str(&state->vnn->public_address);
361 data.dsize = strlen((char *)data.dptr) + 1;
362 DEBUG(DEBUG_INFO,(__location__ " sending TAKE_IP for '%s'\n", data.dptr));
364 ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_TAKE_IP, data);
367 /* the control succeeded */
368 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
374 take over an ip address
376 static int32_t ctdb_do_takeip(struct ctdb_context *ctdb,
377 struct ctdb_req_control *c,
378 struct ctdb_vnn *vnn)
381 struct ctdb_do_takeip_state *state;
383 ret = ctdb_vnn_assign_iface(ctdb, vnn);
385 DEBUG(DEBUG_ERR,("Takeover of IP %s/%u failed to "
386 "assin a usable interface\n",
387 ctdb_addr_to_str(&vnn->public_address),
388 vnn->public_netmask_bits));
392 state = talloc(vnn, struct ctdb_do_takeip_state);
393 CTDB_NO_MEMORY(ctdb, state);
395 state->c = talloc_steal(ctdb, c);
398 DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u on interface %s\n",
399 ctdb_addr_to_str(&vnn->public_address),
400 vnn->public_netmask_bits,
401 ctdb_vnn_iface_string(vnn)));
403 ret = ctdb_event_script_callback(ctdb,
405 ctdb_do_takeip_callback,
410 ctdb_vnn_iface_string(vnn),
411 ctdb_addr_to_str(&vnn->public_address),
412 vnn->public_netmask_bits);
415 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
416 ctdb_addr_to_str(&vnn->public_address),
417 ctdb_vnn_iface_string(vnn)));
425 struct ctdb_do_updateip_state {
426 struct ctdb_req_control *c;
427 struct ctdb_iface *old;
428 struct ctdb_vnn *vnn;
432 called when updateip event finishes
434 static void ctdb_do_updateip_callback(struct ctdb_context *ctdb, int status,
437 struct ctdb_do_updateip_state *state =
438 talloc_get_type(private_data, struct ctdb_do_updateip_state);
442 if (status == -ETIME) {
445 DEBUG(DEBUG_ERR,(__location__ " Failed to move IP %s from interface %s to %s\n",
446 ctdb_addr_to_str(&state->vnn->public_address),
448 ctdb_vnn_iface_string(state->vnn)));
451 * All we can do is reset the old interface
452 * and let the next run fix it
454 ctdb_vnn_unassign_iface(ctdb, state->vnn);
455 state->vnn->iface = state->old;
456 state->vnn->iface->references++;
458 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
463 ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
465 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
470 /* the control succeeded */
471 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
477 update (move) an ip address
479 static int32_t ctdb_do_updateip(struct ctdb_context *ctdb,
480 struct ctdb_req_control *c,
481 struct ctdb_vnn *vnn)
484 struct ctdb_do_updateip_state *state;
485 struct ctdb_iface *old = vnn->iface;
486 const char *new_name;
488 ctdb_vnn_unassign_iface(ctdb, vnn);
489 ret = ctdb_vnn_assign_iface(ctdb, vnn);
491 DEBUG(DEBUG_ERR,("update of IP %s/%u failed to "
492 "assin a usable interface (old iface '%s')\n",
493 ctdb_addr_to_str(&vnn->public_address),
494 vnn->public_netmask_bits,
499 new_name = ctdb_vnn_iface_string(vnn);
500 if (old->name != NULL && new_name != NULL && !strcmp(old->name, new_name)) {
501 /* A benign update from one interface onto itself.
502 * no need to run the eventscripts in this case, just return
505 ctdb_request_control_reply(ctdb, c, NULL, 0, NULL);
509 state = talloc(vnn, struct ctdb_do_updateip_state);
510 CTDB_NO_MEMORY(ctdb, state);
512 state->c = talloc_steal(ctdb, c);
516 DEBUG(DEBUG_NOTICE,("Update of IP %s/%u from "
517 "interface %s to %s\n",
518 ctdb_addr_to_str(&vnn->public_address),
519 vnn->public_netmask_bits,
523 ret = ctdb_event_script_callback(ctdb,
525 ctdb_do_updateip_callback,
528 CTDB_EVENT_UPDATE_IP,
532 ctdb_addr_to_str(&vnn->public_address),
533 vnn->public_netmask_bits);
535 DEBUG(DEBUG_ERR,(__location__ " Failed update IP %s from interface %s to %s\n",
536 ctdb_addr_to_str(&vnn->public_address),
537 old->name, new_name));
546 Find the vnn of the node that has a public ip address
547 returns -1 if the address is not known as a public address
549 static struct ctdb_vnn *find_public_ip_vnn(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
551 struct ctdb_vnn *vnn;
553 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
554 if (ctdb_same_ip(&vnn->public_address, addr)) {
563 take over an ip address
565 int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb,
566 struct ctdb_req_control *c,
571 struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
572 struct ctdb_vnn *vnn;
573 bool have_ip = false;
574 bool do_updateip = false;
575 bool do_takeip = false;
576 struct ctdb_iface *best_iface = NULL;
578 if (pip->pnn != ctdb->pnn) {
579 DEBUG(DEBUG_ERR,(__location__" takeoverip called for an ip '%s' "
580 "with pnn %d, but we're node %d\n",
581 ctdb_addr_to_str(&pip->addr),
582 pip->pnn, ctdb->pnn));
586 /* update out vnn list */
587 vnn = find_public_ip_vnn(ctdb, &pip->addr);
589 DEBUG(DEBUG_INFO,("takeoverip called for an ip '%s' that is not a public address\n",
590 ctdb_addr_to_str(&pip->addr)));
594 have_ip = ctdb_sys_have_ip(&pip->addr);
595 best_iface = ctdb_vnn_best_iface(ctdb, vnn);
596 if (best_iface == NULL) {
597 DEBUG(DEBUG_ERR,("takeoverip of IP %s/%u failed to find"
598 "a usable interface (old %s, have_ip %d)\n",
599 ctdb_addr_to_str(&vnn->public_address),
600 vnn->public_netmask_bits,
601 ctdb_vnn_iface_string(vnn),
606 if (vnn->iface == NULL && vnn->pnn == -1 && have_ip && best_iface != NULL) {
607 DEBUG(DEBUG_ERR,("Taking over newly created ip\n"));
611 if (vnn->iface == NULL && have_ip) {
612 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
613 "but we have no interface assigned, has someone manually configured it? Ignore for now.\n",
614 ctdb_addr_to_str(&vnn->public_address)));
618 if (vnn->pnn != ctdb->pnn && have_ip && vnn->pnn != -1) {
619 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
620 "and we have it on iface[%s], but it was assigned to node %d"
621 "and we are node %d, banning ourself\n",
622 ctdb_addr_to_str(&vnn->public_address),
623 ctdb_vnn_iface_string(vnn), vnn->pnn, ctdb->pnn));
628 if (vnn->pnn == -1 && have_ip) {
629 vnn->pnn = ctdb->pnn;
630 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
631 "and we already have it on iface[%s], update local daemon\n",
632 ctdb_addr_to_str(&vnn->public_address),
633 ctdb_vnn_iface_string(vnn)));
638 if (vnn->iface->link_up) {
639 /* only move when the rebalance gains something */
640 if (vnn->iface->references > (best_iface->references + 1)) {
643 } else if (vnn->iface != best_iface) {
650 ctdb_vnn_unassign_iface(ctdb, vnn);
657 ret = ctdb_do_takeip(ctdb, c, vnn);
661 } else if (do_updateip) {
662 ret = ctdb_do_updateip(ctdb, c, vnn);
668 * The interface is up and the kernel known the ip
671 DEBUG(DEBUG_INFO,("Redundant takeover of IP %s/%u on interface %s (ip already held)\n",
672 ctdb_addr_to_str(&pip->addr),
673 vnn->public_netmask_bits,
674 ctdb_vnn_iface_string(vnn)));
678 /* tell ctdb_control.c that we will be replying asynchronously */
685 takeover an ip address old v4 style
687 int32_t ctdb_control_takeover_ipv4(struct ctdb_context *ctdb,
688 struct ctdb_req_control *c,
694 data.dsize = sizeof(struct ctdb_public_ip);
695 data.dptr = (uint8_t *)talloc_zero(c, struct ctdb_public_ip);
696 CTDB_NO_MEMORY(ctdb, data.dptr);
698 memcpy(data.dptr, indata.dptr, indata.dsize);
699 return ctdb_control_takeover_ip(ctdb, c, data, async_reply);
703 kill any clients that are registered with a IP that is being released
705 static void release_kill_clients(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
707 struct ctdb_client_ip *ip;
709 DEBUG(DEBUG_INFO,("release_kill_clients for ip %s\n",
710 ctdb_addr_to_str(addr)));
712 for (ip=ctdb->client_ip_list; ip; ip=ip->next) {
713 ctdb_sock_addr tmp_addr;
716 DEBUG(DEBUG_INFO,("checking for client %u with IP %s\n",
718 ctdb_addr_to_str(&ip->addr)));
720 if (ctdb_same_ip(&tmp_addr, addr)) {
721 struct ctdb_client *client = ctdb_reqid_find(ctdb,
724 DEBUG(DEBUG_INFO,("matched client %u with IP %s and pid %u\n",
726 ctdb_addr_to_str(&ip->addr),
729 if (client->pid != 0) {
730 DEBUG(DEBUG_INFO,(__location__ " Killing client pid %u for IP %s on client_id %u\n",
731 (unsigned)client->pid,
732 ctdb_addr_to_str(addr),
734 kill(client->pid, SIGKILL);
741 called when releaseip event finishes
743 static void release_ip_callback(struct ctdb_context *ctdb, int status,
746 struct takeover_callback_state *state =
747 talloc_get_type(private_data, struct takeover_callback_state);
750 if (status == -ETIME) {
754 /* send a message to all clients of this node telling them
755 that the cluster has been reconfigured and they should
756 release any sockets on this IP */
757 data.dptr = (uint8_t *)talloc_strdup(state, ctdb_addr_to_str(state->addr));
758 CTDB_NO_MEMORY_VOID(ctdb, data.dptr);
759 data.dsize = strlen((char *)data.dptr)+1;
761 DEBUG(DEBUG_INFO,(__location__ " sending RELEASE_IP for '%s'\n", data.dptr));
763 ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_RELEASE_IP, data);
765 /* kill clients that have registered with this IP */
766 release_kill_clients(ctdb, state->addr);
768 ctdb_vnn_unassign_iface(ctdb, state->vnn);
770 /* the control succeeded */
771 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
776 release an ip address
778 int32_t ctdb_control_release_ip(struct ctdb_context *ctdb,
779 struct ctdb_req_control *c,
784 struct takeover_callback_state *state;
785 struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
786 struct ctdb_vnn *vnn;
788 /* update our vnn list */
789 vnn = find_public_ip_vnn(ctdb, &pip->addr);
791 DEBUG(DEBUG_INFO,("releaseip called for an ip '%s' that is not a public address\n",
792 ctdb_addr_to_str(&pip->addr)));
797 /* stop any previous arps */
798 talloc_free(vnn->takeover_ctx);
799 vnn->takeover_ctx = NULL;
801 if (!ctdb_sys_have_ip(&pip->addr)) {
802 DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u on interface %s (ip not held)\n",
803 ctdb_addr_to_str(&pip->addr),
804 vnn->public_netmask_bits,
805 ctdb_vnn_iface_string(vnn)));
806 ctdb_vnn_unassign_iface(ctdb, vnn);
810 if (vnn->iface == NULL) {
811 DEBUG(DEBUG_ERR,(__location__ " release_ip of IP %s is known to the kernel, "
812 "but we have no interface assigned, has someone manually configured it? Ignore for now.\n",
813 ctdb_addr_to_str(&vnn->public_address)));
817 DEBUG(DEBUG_NOTICE,("Release of IP %s/%u on interface %s node:%d\n",
818 ctdb_addr_to_str(&pip->addr),
819 vnn->public_netmask_bits,
820 ctdb_vnn_iface_string(vnn),
823 state = talloc(ctdb, struct takeover_callback_state);
824 CTDB_NO_MEMORY(ctdb, state);
826 state->c = talloc_steal(state, c);
827 state->addr = talloc(state, ctdb_sock_addr);
828 CTDB_NO_MEMORY(ctdb, state->addr);
829 *state->addr = pip->addr;
832 ret = ctdb_event_script_callback(ctdb,
833 state, release_ip_callback, state,
835 CTDB_EVENT_RELEASE_IP,
837 ctdb_vnn_iface_string(vnn),
838 ctdb_addr_to_str(&pip->addr),
839 vnn->public_netmask_bits);
841 DEBUG(DEBUG_ERR,(__location__ " Failed to release IP %s on interface %s\n",
842 ctdb_addr_to_str(&pip->addr),
843 ctdb_vnn_iface_string(vnn)));
848 /* tell the control that we will be reply asynchronously */
854 release an ip address old v4 style
856 int32_t ctdb_control_release_ipv4(struct ctdb_context *ctdb,
857 struct ctdb_req_control *c,
863 data.dsize = sizeof(struct ctdb_public_ip);
864 data.dptr = (uint8_t *)talloc_zero(c, struct ctdb_public_ip);
865 CTDB_NO_MEMORY(ctdb, data.dptr);
867 memcpy(data.dptr, indata.dptr, indata.dsize);
868 return ctdb_control_release_ip(ctdb, c, data, async_reply);
872 static int ctdb_add_public_address(struct ctdb_context *ctdb,
873 ctdb_sock_addr *addr,
874 unsigned mask, const char *ifaces)
876 struct ctdb_vnn *vnn;
883 tmp = talloc_strdup(vnn, ifaces);
884 for (iface = strtok(tmp, ","); iface; iface = strtok(NULL, ",")) {
885 if (!ctdb_sys_check_iface_exists(iface)) {
886 DEBUG(DEBUG_CRIT,("Interface %s does not exist. Can not add public-address : %s\n", iface, ctdb_addr_to_str(addr)));
893 /* Verify that we dont have an entry for this ip yet */
894 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
895 if (ctdb_same_sockaddr(addr, &vnn->public_address)) {
896 DEBUG(DEBUG_CRIT,("Same ip '%s' specified multiple times in the public address list \n",
897 ctdb_addr_to_str(addr)));
902 /* create a new vnn structure for this ip address */
903 vnn = talloc_zero(ctdb, struct ctdb_vnn);
904 CTDB_NO_MEMORY_FATAL(ctdb, vnn);
905 vnn->ifaces = talloc_array(vnn, const char *, num + 2);
906 tmp = talloc_strdup(vnn, ifaces);
907 CTDB_NO_MEMORY_FATAL(ctdb, tmp);
908 for (iface = strtok(tmp, ","); iface; iface = strtok(NULL, ",")) {
909 vnn->ifaces = talloc_realloc(vnn, vnn->ifaces, const char *, num + 2);
910 CTDB_NO_MEMORY_FATAL(ctdb, vnn->ifaces);
911 vnn->ifaces[num] = talloc_strdup(vnn, iface);
912 CTDB_NO_MEMORY_FATAL(ctdb, vnn->ifaces[num]);
916 vnn->ifaces[num] = NULL;
917 vnn->public_address = *addr;
918 vnn->public_netmask_bits = mask;
920 if (ctdb_sys_have_ip(addr)) {
921 DEBUG(DEBUG_ERR,("We are already hosting public address '%s'. setting PNN to ourself:%d\n", ctdb_addr_to_str(addr), ctdb->pnn));
922 vnn->pnn = ctdb->pnn;
925 for (i=0; vnn->ifaces[i]; i++) {
926 ret = ctdb_add_local_iface(ctdb, vnn->ifaces[i]);
928 DEBUG(DEBUG_CRIT, (__location__ " failed to add iface[%s] "
929 "for public_address[%s]\n",
930 vnn->ifaces[i], ctdb_addr_to_str(addr)));
935 vnn->iface = ctdb_find_iface(ctdb, vnn->ifaces[i]);
939 DLIST_ADD(ctdb->vnn, vnn);
945 setup the event script directory
947 int ctdb_set_event_script_dir(struct ctdb_context *ctdb, const char *script_dir)
949 ctdb->event_script_dir = talloc_strdup(ctdb, script_dir);
950 CTDB_NO_MEMORY(ctdb, ctdb->event_script_dir);
954 static void ctdb_check_interfaces_event(struct event_context *ev, struct timed_event *te,
955 struct timeval t, void *private_data)
957 struct ctdb_context *ctdb = talloc_get_type(private_data,
958 struct ctdb_context);
959 struct ctdb_vnn *vnn;
961 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
964 for (i=0; vnn->ifaces[i] != NULL; i++) {
965 if (!ctdb_sys_check_iface_exists(vnn->ifaces[i])) {
966 DEBUG(DEBUG_CRIT,("Interface %s does not exist but is used by public ip %s\n",
968 ctdb_addr_to_str(&vnn->public_address)));
973 event_add_timed(ctdb->ev, ctdb->check_public_ifaces_ctx,
974 timeval_current_ofs(30, 0),
975 ctdb_check_interfaces_event, ctdb);
979 static int ctdb_start_monitoring_interfaces(struct ctdb_context *ctdb)
981 if (ctdb->check_public_ifaces_ctx != NULL) {
982 talloc_free(ctdb->check_public_ifaces_ctx);
983 ctdb->check_public_ifaces_ctx = NULL;
986 ctdb->check_public_ifaces_ctx = talloc_new(ctdb);
987 if (ctdb->check_public_ifaces_ctx == NULL) {
988 ctdb_fatal(ctdb, "failed to allocate context for checking interfaces");
991 event_add_timed(ctdb->ev, ctdb->check_public_ifaces_ctx,
992 timeval_current_ofs(30, 0),
993 ctdb_check_interfaces_event, ctdb);
1000 setup the public address lists from a file
1002 int ctdb_set_public_addresses(struct ctdb_context *ctdb, const char *alist)
1008 lines = file_lines_load(alist, &nlines, ctdb);
1009 if (lines == NULL) {
1010 ctdb_set_error(ctdb, "Failed to load public address list '%s'\n", alist);
1013 while (nlines > 0 && strcmp(lines[nlines-1], "") == 0) {
1017 for (i=0;i<nlines;i++) {
1019 ctdb_sock_addr addr;
1020 const char *addrstr;
1025 while ((*line == ' ') || (*line == '\t')) {
1031 if (strcmp(line, "") == 0) {
1034 tok = strtok(line, " \t");
1036 tok = strtok(NULL, " \t");
1038 if (NULL == ctdb->default_public_interface) {
1039 DEBUG(DEBUG_CRIT,("No default public interface and no interface specified at line %u of public address list\n",
1044 ifaces = ctdb->default_public_interface;
1049 if (!addrstr || !parse_ip_mask(addrstr, ifaces, &addr, &mask)) {
1050 DEBUG(DEBUG_CRIT,("Badly formed line %u in public address list\n", i+1));
1054 if (ctdb_add_public_address(ctdb, &addr, mask, ifaces)) {
1055 DEBUG(DEBUG_CRIT,("Failed to add line %u to the public address list\n", i+1));
1062 ctdb_start_monitoring_interfaces(ctdb);
1068 int ctdb_set_single_public_ip(struct ctdb_context *ctdb,
1072 struct ctdb_vnn *svnn;
1073 struct ctdb_iface *cur = NULL;
1077 svnn = talloc_zero(ctdb, struct ctdb_vnn);
1078 CTDB_NO_MEMORY(ctdb, svnn);
1080 svnn->ifaces = talloc_array(svnn, const char *, 2);
1081 CTDB_NO_MEMORY(ctdb, svnn->ifaces);
1082 svnn->ifaces[0] = talloc_strdup(svnn->ifaces, iface);
1083 CTDB_NO_MEMORY(ctdb, svnn->ifaces[0]);
1084 svnn->ifaces[1] = NULL;
1086 ok = parse_ip(ip, iface, 0, &svnn->public_address);
1092 ret = ctdb_add_local_iface(ctdb, svnn->ifaces[0]);
1094 DEBUG(DEBUG_CRIT, (__location__ " failed to add iface[%s] "
1095 "for single_ip[%s]\n",
1097 ctdb_addr_to_str(&svnn->public_address)));
1102 /* assume the single public ip interface is initially "good" */
1103 cur = ctdb_find_iface(ctdb, iface);
1105 DEBUG(DEBUG_CRIT,("Can not find public interface %s used by --single-public-ip", iface));
1108 cur->link_up = true;
1110 ret = ctdb_vnn_assign_iface(ctdb, svnn);
1116 ctdb->single_ip_vnn = svnn;
1120 /* Given a physical node, return the number of
1121 public addresses that is currently assigned to this node.
1123 static int node_ip_coverage(struct ctdb_context *ctdb,
1125 struct ctdb_public_ip_list *ips)
1129 for (;ips;ips=ips->next) {
1130 if (ips->pnn == pnn) {
1138 /* Check if this is a public ip known to the node, i.e. can that
1139 node takeover this ip ?
1141 static int can_node_serve_ip(struct ctdb_context *ctdb, int32_t pnn,
1142 struct ctdb_public_ip_list *ip)
1144 struct ctdb_all_public_ips *public_ips;
1147 public_ips = ctdb->nodes[pnn]->available_public_ips;
1149 if (public_ips == NULL) {
1153 for (i=0;i<public_ips->num;i++) {
1154 if (ctdb_same_ip(&ip->addr, &public_ips->ips[i].addr)) {
1155 /* yes, this node can serve this public ip */
1164 /* search the node lists list for a node to takeover this ip.
1165 pick the node that currently are serving the least number of ips
1166 so that the ips get spread out evenly.
1168 static int find_takeover_node(struct ctdb_context *ctdb,
1169 struct ctdb_node_map *nodemap, uint32_t mask,
1170 struct ctdb_public_ip_list *ip,
1171 struct ctdb_public_ip_list *all_ips)
1173 int pnn, min=0, num;
1177 for (i=0;i<nodemap->num;i++) {
1178 if (nodemap->nodes[i].flags & mask) {
1179 /* This node is not healty and can not be used to serve
1185 /* verify that this node can serve this ip */
1186 if (can_node_serve_ip(ctdb, i, ip)) {
1187 /* no it couldnt so skip to the next node */
1191 num = node_ip_coverage(ctdb, i, all_ips);
1192 /* was this the first node we checked ? */
1204 DEBUG(DEBUG_WARNING,(__location__ " Could not find node to take over public address '%s'\n",
1205 ctdb_addr_to_str(&ip->addr)));
1215 static uint32_t *ip_key(ctdb_sock_addr *ip)
1217 static uint32_t key[IP_KEYLEN];
1219 bzero(key, sizeof(key));
1221 switch (ip->sa.sa_family) {
1223 key[3] = htonl(ip->ip.sin_addr.s_addr);
1226 key[0] = htonl(ip->ip6.sin6_addr.s6_addr32[0]);
1227 key[1] = htonl(ip->ip6.sin6_addr.s6_addr32[1]);
1228 key[2] = htonl(ip->ip6.sin6_addr.s6_addr32[2]);
1229 key[3] = htonl(ip->ip6.sin6_addr.s6_addr32[3]);
1232 DEBUG(DEBUG_ERR, (__location__ " ERROR, unknown family passed :%u\n", ip->sa.sa_family));
1239 static void *add_ip_callback(void *parm, void *data)
1241 struct ctdb_public_ip_list *this_ip = parm;
1242 struct ctdb_public_ip_list *prev_ip = data;
1244 if (prev_ip == NULL) {
1247 if (this_ip->pnn == -1) {
1248 this_ip->pnn = prev_ip->pnn;
1254 void getips_count_callback(void *param, void *data)
1256 struct ctdb_public_ip_list **ip_list = (struct ctdb_public_ip_list **)param;
1257 struct ctdb_public_ip_list *new_ip = (struct ctdb_public_ip_list *)data;
1259 new_ip->next = *ip_list;
1263 static struct ctdb_public_ip_list *
1264 create_merged_ip_list(struct ctdb_context *ctdb)
1267 struct ctdb_public_ip_list *ip_list;
1268 struct ctdb_all_public_ips *public_ips;
1270 if (ctdb->ip_tree != NULL) {
1271 talloc_free(ctdb->ip_tree);
1272 ctdb->ip_tree = NULL;
1274 ctdb->ip_tree = trbt_create(ctdb, 0);
1276 for (i=0;i<ctdb->num_nodes;i++) {
1277 public_ips = ctdb->nodes[i]->known_public_ips;
1279 if (ctdb->nodes[i]->flags & NODE_FLAGS_DELETED) {
1283 /* there were no public ips for this node */
1284 if (public_ips == NULL) {
1288 for (j=0;j<public_ips->num;j++) {
1289 struct ctdb_public_ip_list *tmp_ip;
1291 tmp_ip = talloc_zero(ctdb->ip_tree, struct ctdb_public_ip_list);
1292 CTDB_NO_MEMORY_NULL(ctdb, tmp_ip);
1293 tmp_ip->pnn = public_ips->ips[j].pnn;
1294 tmp_ip->addr = public_ips->ips[j].addr;
1295 tmp_ip->next = NULL;
1297 trbt_insertarray32_callback(ctdb->ip_tree,
1298 IP_KEYLEN, ip_key(&public_ips->ips[j].addr),
1305 trbt_traversearray32(ctdb->ip_tree, IP_KEYLEN, getips_count_callback, &ip_list);
1311 * This is the length of the longtest common prefix between the IPs.
1312 * It is calculated by XOR-ing the 2 IPs together and counting the
1313 * number of leading zeroes. The implementation means that all
1314 * addresses end up being 128 bits long.
1315 * Not static, so we can easily link it into a unit test.
1317 * FIXME? Should we consider IPv4 and IPv6 separately given that the
1318 * 12 bytes of 0 prefix padding will hurt the algorithm if there are
1319 * lots of nodes and IP addresses?
1321 uint32_t ip_distance(ctdb_sock_addr *ip1, ctdb_sock_addr *ip2)
1323 uint32_t ip1_k[IP_KEYLEN];
1328 uint32_t distance = 0;
1330 memcpy(ip1_k, ip_key(ip1), sizeof(ip1_k));
1332 for (i=0; i<IP_KEYLEN; i++) {
1333 x = ip1_k[i] ^ t[i];
1337 /* Count number of leading zeroes.
1338 * FIXME? This could be optimised...
1340 while ((x & (1 << 31)) == 0) {
1350 /* Calculate the IP distance for the given IP relative to IPs on the
1351 given node. The ips argument is generally the all_ips variable
1352 used in the main part of the algorithm.
1353 * Not static, so we can easily link it into a unit test.
1355 uint32_t ip_distance_2_sum(ctdb_sock_addr *ip,
1356 struct ctdb_public_ip_list *ips,
1359 struct ctdb_public_ip_list *t;
1364 for (t=ips; t != NULL; t=t->next) {
1365 if (t->pnn != pnn) {
1369 /* Optimisation: We never calculate the distance
1370 * between an address and itself. This allows us to
1371 * calculate the effect of removing an address from a
1372 * node by simply calculating the distance between
1373 * that address and all of the exitsing addresses.
1374 * Moreover, we assume that we're only ever dealing
1375 * with addresses from all_ips so we can identify an
1376 * address via a pointer rather than doing a more
1377 * expensive address comparison. */
1378 if (&(t->addr) == ip) {
1382 d = ip_distance(ip, &(t->addr));
1383 sum += d * d; /* Cheaper than pulling in math.h :-) */
1389 /* Return the LCP2 imbalance metric for addresses currently assigned
1391 * Not static, so we can easily link it into a unit test.
1393 uint32_t lcp2_imbalance(struct ctdb_public_ip_list * all_ips, int pnn)
1395 struct ctdb_public_ip_list *t;
1397 uint32_t imbalance = 0;
1399 for (t=all_ips; t!=NULL; t=t->next) {
1400 if (t->pnn != pnn) {
1403 /* Pass the rest of the IPs rather than the whole
1406 imbalance += ip_distance_2_sum(&(t->addr), t->next, pnn);
1412 /* Allocate any unassigned IPs just by looping through the IPs and
1413 * finding the best node for each.
1414 * Not static, so we can easily link it into a unit test.
1416 void basic_allocate_unassigned(struct ctdb_context *ctdb,
1417 struct ctdb_node_map *nodemap,
1419 struct ctdb_public_ip_list *all_ips)
1421 struct ctdb_public_ip_list *tmp_ip;
1423 /* loop over all ip's and find a physical node to cover for
1426 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1427 if (tmp_ip->pnn == -1) {
1428 if (find_takeover_node(ctdb, nodemap, mask, tmp_ip, all_ips)) {
1429 DEBUG(DEBUG_WARNING,("Failed to find node to cover ip %s\n",
1430 ctdb_addr_to_str(&tmp_ip->addr)));
1436 /* Basic non-deterministic rebalancing algorithm.
1437 * Not static, so we can easily link it into a unit test.
1439 bool basic_failback(struct ctdb_context *ctdb,
1440 struct ctdb_node_map *nodemap,
1442 struct ctdb_public_ip_list *all_ips,
1447 int maxnode, maxnum=0, minnode, minnum=0, num;
1448 struct ctdb_public_ip_list *tmp_ip;
1450 /* for each ip address, loop over all nodes that can serve
1451 this ip and make sure that the difference between the node
1452 serving the most and the node serving the least ip's are
1455 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1456 if (tmp_ip->pnn == -1) {
1460 /* Get the highest and lowest number of ips's served by any
1461 valid node which can serve this ip.
1465 for (i=0;i<nodemap->num;i++) {
1466 if (nodemap->nodes[i].flags & mask) {
1470 /* only check nodes that can actually serve this ip */
1471 if (can_node_serve_ip(ctdb, i, tmp_ip)) {
1472 /* no it couldnt so skip to the next node */
1476 num = node_ip_coverage(ctdb, i, all_ips);
1477 if (maxnode == -1) {
1486 if (minnode == -1) {
1496 if (maxnode == -1) {
1497 DEBUG(DEBUG_WARNING,(__location__ " Could not find maxnode. May not be able to serve ip '%s'\n",
1498 ctdb_addr_to_str(&tmp_ip->addr)));
1503 /* If we want deterministic IPs then dont try to reallocate
1504 them to spread out the load.
1506 if (1 == ctdb->tunable.deterministic_public_ips) {
1510 /* if the spread between the smallest and largest coverage by
1511 a node is >=2 we steal one of the ips from the node with
1512 most coverage to even things out a bit.
1513 try to do this a limited number of times since we dont
1514 want to spend too much time balancing the ip coverage.
1516 if ( (maxnum > minnum+1)
1517 && (*retries < (num_ips + 5)) ){
1518 struct ctdb_public_ip_list *tmp;
1520 /* mark one of maxnode's vnn's as unassigned and try
1523 for (tmp=all_ips;tmp;tmp=tmp->next) {
1524 if (tmp->pnn == maxnode) {
1536 /* Do necessary LCP2 initialisation. Bury it in a function here so
1537 * that we can unit test it.
1538 * Not static, so we can easily link it into a unit test.
1540 void lcp2_init(struct ctdb_context * tmp_ctx,
1541 struct ctdb_node_map * nodemap,
1543 struct ctdb_public_ip_list *all_ips,
1544 uint32_t **lcp2_imbalances,
1545 bool **newly_healthy)
1548 struct ctdb_public_ip_list *tmp_ip;
1550 *newly_healthy = talloc_array(tmp_ctx, bool, nodemap->num);
1551 CTDB_NO_MEMORY_FATAL(tmp_ctx, *newly_healthy);
1552 *lcp2_imbalances = talloc_array(tmp_ctx, uint32_t, nodemap->num);
1553 CTDB_NO_MEMORY_FATAL(tmp_ctx, *lcp2_imbalances);
1555 for (i=0;i<nodemap->num;i++) {
1556 (*lcp2_imbalances)[i] = lcp2_imbalance(all_ips, i);
1557 /* First step: is the node "healthy"? */
1558 (*newly_healthy)[i] = ! (bool)(nodemap->nodes[i].flags & mask);
1561 /* 2nd step: if a ndoe has IPs assigned then it must have been
1562 * healthy before, so we remove it from consideration... */
1563 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1564 if (tmp_ip->pnn != -1) {
1565 (*newly_healthy)[tmp_ip->pnn] = false;
1570 /* Allocate any unassigned addresses using the LCP2 algorithm to find
1571 * the IP/node combination that will cost the least.
1572 * Not static, so we can easily link it into a unit test.
1574 void lcp2_allocate_unassigned(struct ctdb_context *ctdb,
1575 struct ctdb_node_map *nodemap,
1577 struct ctdb_public_ip_list *all_ips,
1578 uint32_t *lcp2_imbalances)
1580 struct ctdb_public_ip_list *tmp_ip;
1584 uint32_t mindsum, dstdsum, dstimbl, minimbl;
1585 struct ctdb_public_ip_list *minip;
1587 bool should_loop = true;
1588 bool have_unassigned = true;
1590 while (have_unassigned && should_loop) {
1591 should_loop = false;
1593 DEBUG(DEBUG_DEBUG,(" ----------------------------------------\n"));
1594 DEBUG(DEBUG_DEBUG,(" CONSIDERING MOVES (UNASSIGNED)\n"));
1600 /* loop over each unassigned ip. */
1601 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1602 if (tmp_ip->pnn != -1) {
1606 for (dstnode=0; dstnode < nodemap->num; dstnode++) {
1607 /* only check nodes that can actually serve this ip */
1608 if (can_node_serve_ip(ctdb, dstnode, tmp_ip)) {
1609 /* no it couldnt so skip to the next node */
1612 if (nodemap->nodes[dstnode].flags & mask) {
1616 dstdsum = ip_distance_2_sum(&(tmp_ip->addr), all_ips, dstnode);
1617 dstimbl = lcp2_imbalances[dstnode] + dstdsum;
1618 DEBUG(DEBUG_DEBUG,(" %s -> %d [+%d]\n",
1619 ctdb_addr_to_str(&(tmp_ip->addr)),
1621 dstimbl - lcp2_imbalances[dstnode]));
1624 if ((minnode == -1) || (dstdsum < mindsum)) {
1634 DEBUG(DEBUG_DEBUG,(" ----------------------------------------\n"));
1636 /* If we found one then assign it to the given node. */
1637 if (minnode != -1) {
1638 minip->pnn = minnode;
1639 lcp2_imbalances[minnode] = minimbl;
1640 DEBUG(DEBUG_INFO,(" %s -> %d [+%d]\n",
1641 ctdb_addr_to_str(&(minip->addr)),
1646 /* There might be a better way but at least this is clear. */
1647 have_unassigned = false;
1648 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1649 if (tmp_ip->pnn == -1) {
1650 have_unassigned = true;
1655 /* We know if we have an unassigned addresses so we might as
1658 if (have_unassigned) {
1659 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1660 if (tmp_ip->pnn == -1) {
1661 DEBUG(DEBUG_WARNING,("Failed to find node to cover ip %s\n",
1662 ctdb_addr_to_str(&tmp_ip->addr)));
1668 /* LCP2 algorithm for rebalancing the cluster. This finds the source
1669 * node with the highest LCP2 imbalance, and then determines the best
1670 * IP/destination node combination to move from the source node.
1672 * Not static, so we can easily link it into a unit test.
1674 bool lcp2_failback(struct ctdb_context *ctdb,
1675 struct ctdb_node_map *nodemap,
1677 struct ctdb_public_ip_list *all_ips,
1678 uint32_t *lcp2_imbalances,
1679 bool *newly_healthy)
1681 int srcnode, dstnode, mindstnode, i, num_newly_healthy;
1682 uint32_t srcimbl, srcdsum, maximbl, dstimbl, dstdsum;
1683 uint32_t minsrcimbl, mindstimbl, b;
1684 struct ctdb_public_ip_list *minip;
1685 struct ctdb_public_ip_list *tmp_ip;
1687 /* It is only worth continuing if we have suitable target
1688 * nodes to transfer IPs to. This check is much cheaper than
1691 num_newly_healthy = 0;
1692 for (i = 0; i < nodemap->num; i++) {
1693 if (newly_healthy[i]) {
1694 num_newly_healthy++;
1697 if (num_newly_healthy == 0) {
1701 /* Get the node with the highest imbalance metric. */
1704 for (i=0; i < nodemap->num; i++) {
1705 b = lcp2_imbalances[i];
1706 if ((srcnode == -1) || (b > maximbl)) {
1712 /* This means that all nodes had 0 or 1 addresses, so can't be
1719 /* Find an IP and destination node that best reduces imbalance. */
1725 DEBUG(DEBUG_DEBUG,(" ----------------------------------------\n"));
1726 DEBUG(DEBUG_DEBUG,(" CONSIDERING MOVES FROM %d [%d]\n", srcnode, maximbl));
1728 for (tmp_ip=all_ips; tmp_ip; tmp_ip=tmp_ip->next) {
1729 /* Only consider addresses on srcnode. */
1730 if (tmp_ip->pnn != srcnode) {
1734 /* What is this IP address costing the source node? */
1735 srcdsum = ip_distance_2_sum(&(tmp_ip->addr), all_ips, srcnode);
1736 srcimbl = maximbl - srcdsum;
1738 /* Consider this IP address would cost each potential
1739 * destination node. Destination nodes are limited to
1740 * those that are newly healthy, since we don't want
1741 * to do gratuitous failover of IPs just to make minor
1742 * balance improvements.
1744 for (dstnode=0; dstnode < nodemap->num; dstnode++) {
1745 if (! newly_healthy[dstnode]) {
1748 /* only check nodes that can actually serve this ip */
1749 if (can_node_serve_ip(ctdb, dstnode, tmp_ip)) {
1750 /* no it couldnt so skip to the next node */
1754 dstdsum = ip_distance_2_sum(&(tmp_ip->addr), all_ips, dstnode);
1755 dstimbl = lcp2_imbalances[dstnode] + dstdsum;
1756 DEBUG(DEBUG_DEBUG,(" %d [%d] -> %s -> %d [+%d]\n",
1757 srcnode, srcimbl - lcp2_imbalances[srcnode],
1758 ctdb_addr_to_str(&(tmp_ip->addr)),
1759 dstnode, dstimbl - lcp2_imbalances[dstnode]));
1761 if ((dstimbl < maximbl) && (dstdsum < srcdsum) && \
1762 ((mindstnode == -1) || \
1763 ((srcimbl + dstimbl) < (minsrcimbl + mindstimbl)))) {
1766 minsrcimbl = srcimbl;
1767 mindstnode = dstnode;
1768 mindstimbl = dstimbl;
1772 DEBUG(DEBUG_DEBUG,(" ----------------------------------------\n"));
1774 if (mindstnode != -1) {
1775 /* We found a move that makes things better... */
1776 DEBUG(DEBUG_INFO,("%d [%d] -> %s -> %d [+%d]\n",
1777 srcnode, minsrcimbl - lcp2_imbalances[srcnode],
1778 ctdb_addr_to_str(&(minip->addr)),
1779 mindstnode, mindstimbl - lcp2_imbalances[mindstnode]));
1782 lcp2_imbalances[srcnode] = srcimbl;
1783 lcp2_imbalances[mindstnode] = mindstimbl;
1784 minip->pnn = mindstnode;
1793 /* The calculation part of the IP allocation algorithm.
1794 * Not static, so we can easily link it into a unit test.
1796 void ctdb_takeover_run_core(struct ctdb_context *ctdb,
1797 struct ctdb_node_map *nodemap,
1798 struct ctdb_public_ip_list **all_ips_p)
1800 int i, num_healthy, retries, num_ips;
1802 struct ctdb_public_ip_list *all_ips, *tmp_ip;
1803 uint32_t *lcp2_imbalances;
1804 bool *newly_healthy;
1806 TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
1808 /* Count how many completely healthy nodes we have */
1810 for (i=0;i<nodemap->num;i++) {
1811 if (!(nodemap->nodes[i].flags & (NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED))) {
1816 if (num_healthy > 0) {
1817 /* We have healthy nodes, so only consider them for
1818 serving public addresses
1820 mask = NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED;
1822 /* We didnt have any completely healthy nodes so
1823 use "disabled" nodes as a fallback
1825 mask = NODE_FLAGS_INACTIVE;
1828 /* since nodes only know about those public addresses that
1829 can be served by that particular node, no single node has
1830 a full list of all public addresses that exist in the cluster.
1831 Walk over all node structures and create a merged list of
1832 all public addresses that exist in the cluster.
1834 keep the tree of ips around as ctdb->ip_tree
1836 all_ips = create_merged_ip_list(ctdb);
1837 *all_ips_p = all_ips; /* minimal code changes */
1839 /* Count how many ips we have */
1841 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1845 /* If we want deterministic ip allocations, i.e. that the ip addresses
1846 will always be allocated the same way for a specific set of
1847 available/unavailable nodes.
1849 if (1 == ctdb->tunable.deterministic_public_ips) {
1850 DEBUG(DEBUG_NOTICE,("Deterministic IPs enabled. Resetting all ip allocations\n"));
1851 for (i=0,tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next,i++) {
1852 tmp_ip->pnn = i%nodemap->num;
1857 /* mark all public addresses with a masked node as being served by
1860 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1861 if (tmp_ip->pnn == -1) {
1864 if (nodemap->nodes[tmp_ip->pnn].flags & mask) {
1869 /* verify that the assigned nodes can serve that public ip
1870 and set it to -1 if not
1872 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1873 if (tmp_ip->pnn == -1) {
1876 if (can_node_serve_ip(ctdb, tmp_ip->pnn, tmp_ip) != 0) {
1877 /* this node can not serve this ip. */
1882 if (1 == ctdb->tunable.lcp2_public_ip_assignment) {
1883 lcp2_init(tmp_ctx, nodemap, mask, all_ips, &lcp2_imbalances, &newly_healthy);
1886 /* now we must redistribute all public addresses with takeover node
1887 -1 among the nodes available
1891 if (1 == ctdb->tunable.lcp2_public_ip_assignment) {
1892 lcp2_allocate_unassigned(ctdb, nodemap, mask, all_ips, lcp2_imbalances);
1894 basic_allocate_unassigned(ctdb, nodemap, mask, all_ips);
1897 /* If we dont want ips to fail back after a node becomes healthy
1898 again, we wont even try to reallocat the ip addresses so that
1899 they are evenly spread out.
1900 This can NOT be used at the same time as DeterministicIPs !
1902 if (1 == ctdb->tunable.no_ip_failback) {
1903 if (1 == ctdb->tunable.deterministic_public_ips) {
1904 DEBUG(DEBUG_ERR, ("ERROR: You can not use 'DeterministicIPs' and 'NoIPFailback' at the same time\n"));
1910 /* now, try to make sure the ip adresses are evenly distributed
1913 if (1 == ctdb->tunable.lcp2_public_ip_assignment) {
1914 if (lcp2_failback(ctdb, nodemap, mask, all_ips, lcp2_imbalances, newly_healthy)) {
1918 if (basic_failback(ctdb, nodemap, mask, all_ips, num_ips, &retries)) {
1923 /* finished distributing the public addresses, now just send the
1924 info out to the nodes
1928 /* at this point ->pnn is the node which will own each IP
1929 or -1 if there is no node that can cover this ip
1936 make any IP alias changes for public addresses that are necessary
1938 int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap)
1941 struct ctdb_public_ip ip;
1942 struct ctdb_public_ipv4 ipv4;
1944 struct ctdb_public_ip_list *all_ips, *tmp_ip;
1946 struct timeval timeout;
1947 struct client_async_data *async_data;
1948 struct ctdb_client_control_state *state;
1949 TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
1952 * ip failover is completely disabled, just send out the
1953 * ipreallocated event.
1955 if (ctdb->tunable.disable_ip_failover != 0) {
1961 /* Do the IP reassignment calculations */
1962 ctdb_takeover_run_core(ctdb, nodemap, &all_ips);
1964 /* now tell all nodes to delete any alias that they should not
1965 have. This will be a NOOP on nodes that don't currently
1966 hold the given alias */
1967 async_data = talloc_zero(tmp_ctx, struct client_async_data);
1968 CTDB_NO_MEMORY_FATAL(ctdb, async_data);
1970 for (i=0;i<nodemap->num;i++) {
1971 /* don't talk to unconnected nodes, but do talk to banned nodes */
1972 if (nodemap->nodes[i].flags & NODE_FLAGS_DISCONNECTED) {
1976 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1977 if (tmp_ip->pnn == nodemap->nodes[i].pnn) {
1978 /* This node should be serving this
1979 vnn so dont tell it to release the ip
1983 if (tmp_ip->addr.sa.sa_family == AF_INET) {
1984 ipv4.pnn = tmp_ip->pnn;
1985 ipv4.sin = tmp_ip->addr.ip;
1987 timeout = TAKEOVER_TIMEOUT();
1988 data.dsize = sizeof(ipv4);
1989 data.dptr = (uint8_t *)&ipv4;
1990 state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
1991 0, CTDB_CONTROL_RELEASE_IPv4, 0,
1995 ip.pnn = tmp_ip->pnn;
1996 ip.addr = tmp_ip->addr;
1998 timeout = TAKEOVER_TIMEOUT();
1999 data.dsize = sizeof(ip);
2000 data.dptr = (uint8_t *)&ip;
2001 state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
2002 0, CTDB_CONTROL_RELEASE_IP, 0,
2007 if (state == NULL) {
2008 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_RELEASE_IP to node %u\n", nodemap->nodes[i].pnn));
2009 talloc_free(tmp_ctx);
2013 ctdb_client_async_add(async_data, state);
2016 if (ctdb_client_async_wait(ctdb, async_data) != 0) {
2017 DEBUG(DEBUG_ERR,(__location__ " Async control CTDB_CONTROL_RELEASE_IP failed\n"));
2018 talloc_free(tmp_ctx);
2021 talloc_free(async_data);
2024 /* tell all nodes to get their own IPs */
2025 async_data = talloc_zero(tmp_ctx, struct client_async_data);
2026 CTDB_NO_MEMORY_FATAL(ctdb, async_data);
2027 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
2028 if (tmp_ip->pnn == -1) {
2029 /* this IP won't be taken over */
2033 if (tmp_ip->addr.sa.sa_family == AF_INET) {
2034 ipv4.pnn = tmp_ip->pnn;
2035 ipv4.sin = tmp_ip->addr.ip;
2037 timeout = TAKEOVER_TIMEOUT();
2038 data.dsize = sizeof(ipv4);
2039 data.dptr = (uint8_t *)&ipv4;
2040 state = ctdb_control_send(ctdb, tmp_ip->pnn,
2041 0, CTDB_CONTROL_TAKEOVER_IPv4, 0,
2045 ip.pnn = tmp_ip->pnn;
2046 ip.addr = tmp_ip->addr;
2048 timeout = TAKEOVER_TIMEOUT();
2049 data.dsize = sizeof(ip);
2050 data.dptr = (uint8_t *)&ip;
2051 state = ctdb_control_send(ctdb, tmp_ip->pnn,
2052 0, CTDB_CONTROL_TAKEOVER_IP, 0,
2056 if (state == NULL) {
2057 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_TAKEOVER_IP to node %u\n", tmp_ip->pnn));
2058 talloc_free(tmp_ctx);
2062 ctdb_client_async_add(async_data, state);
2064 if (ctdb_client_async_wait(ctdb, async_data) != 0) {
2065 DEBUG(DEBUG_ERR,(__location__ " Async control CTDB_CONTROL_TAKEOVER_IP failed\n"));
2066 talloc_free(tmp_ctx);
2071 /* tell all nodes to update natwg */
2072 /* send the flags update natgw on all connected nodes */
2073 data.dptr = discard_const("ipreallocated");
2074 data.dsize = strlen((char *)data.dptr) + 1;
2075 nodes = list_of_connected_nodes(ctdb, nodemap, tmp_ctx, true);
2076 if (ctdb_client_async_control(ctdb, CTDB_CONTROL_RUN_EVENTSCRIPTS,
2077 nodes, 0, TAKEOVER_TIMEOUT(),
2081 DEBUG(DEBUG_ERR, (__location__ " ctdb_control to updatenatgw failed\n"));
2084 talloc_free(tmp_ctx);
2090 destroy a ctdb_client_ip structure
2092 static int ctdb_client_ip_destructor(struct ctdb_client_ip *ip)
2094 DEBUG(DEBUG_DEBUG,("destroying client tcp for %s:%u (client_id %u)\n",
2095 ctdb_addr_to_str(&ip->addr),
2096 ntohs(ip->addr.ip.sin_port),
2099 DLIST_REMOVE(ip->ctdb->client_ip_list, ip);
2104 called by a client to inform us of a TCP connection that it is managing
2105 that should tickled with an ACK when IP takeover is done
2106 we handle both the old ipv4 style of packets as well as the new ipv4/6
2109 int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id,
2112 struct ctdb_client *client = ctdb_reqid_find(ctdb, client_id, struct ctdb_client);
2113 struct ctdb_control_tcp *old_addr = NULL;
2114 struct ctdb_control_tcp_addr new_addr;
2115 struct ctdb_control_tcp_addr *tcp_sock = NULL;
2116 struct ctdb_tcp_list *tcp;
2117 struct ctdb_tcp_connection t;
2120 struct ctdb_client_ip *ip;
2121 struct ctdb_vnn *vnn;
2122 ctdb_sock_addr addr;
2124 switch (indata.dsize) {
2125 case sizeof(struct ctdb_control_tcp):
2126 old_addr = (struct ctdb_control_tcp *)indata.dptr;
2127 ZERO_STRUCT(new_addr);
2128 tcp_sock = &new_addr;
2129 tcp_sock->src.ip = old_addr->src;
2130 tcp_sock->dest.ip = old_addr->dest;
2132 case sizeof(struct ctdb_control_tcp_addr):
2133 tcp_sock = (struct ctdb_control_tcp_addr *)indata.dptr;
2136 DEBUG(DEBUG_ERR,(__location__ " Invalid data structure passed "
2137 "to ctdb_control_tcp_client. size was %d but "
2138 "only allowed sizes are %lu and %lu\n",
2140 (long unsigned)sizeof(struct ctdb_control_tcp),
2141 (long unsigned)sizeof(struct ctdb_control_tcp_addr)));
2145 addr = tcp_sock->src;
2146 ctdb_canonicalize_ip(&addr, &tcp_sock->src);
2147 addr = tcp_sock->dest;
2148 ctdb_canonicalize_ip(&addr, &tcp_sock->dest);
2151 memcpy(&addr, &tcp_sock->dest, sizeof(addr));
2152 vnn = find_public_ip_vnn(ctdb, &addr);
2154 switch (addr.sa.sa_family) {
2156 if (ntohl(addr.ip.sin_addr.s_addr) != INADDR_LOOPBACK) {
2157 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public address.\n",
2158 ctdb_addr_to_str(&addr)));
2162 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public ipv6 address.\n",
2163 ctdb_addr_to_str(&addr)));
2166 DEBUG(DEBUG_ERR,(__location__ " Unknown family type %d\n", addr.sa.sa_family));
2172 if (vnn->pnn != ctdb->pnn) {
2173 DEBUG(DEBUG_ERR,("Attempt to register tcp client for IP %s we don't hold - failing (client_id %u pid %u)\n",
2174 ctdb_addr_to_str(&addr),
2175 client_id, client->pid));
2176 /* failing this call will tell smbd to die */
2180 ip = talloc(client, struct ctdb_client_ip);
2181 CTDB_NO_MEMORY(ctdb, ip);
2185 ip->client_id = client_id;
2186 talloc_set_destructor(ip, ctdb_client_ip_destructor);
2187 DLIST_ADD(ctdb->client_ip_list, ip);
2189 tcp = talloc(client, struct ctdb_tcp_list);
2190 CTDB_NO_MEMORY(ctdb, tcp);
2192 tcp->connection.src_addr = tcp_sock->src;
2193 tcp->connection.dst_addr = tcp_sock->dest;
2195 DLIST_ADD(client->tcp_list, tcp);
2197 t.src_addr = tcp_sock->src;
2198 t.dst_addr = tcp_sock->dest;
2200 data.dptr = (uint8_t *)&t;
2201 data.dsize = sizeof(t);
2203 switch (addr.sa.sa_family) {
2205 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
2206 (unsigned)ntohs(tcp_sock->dest.ip.sin_port),
2207 ctdb_addr_to_str(&tcp_sock->src),
2208 (unsigned)ntohs(tcp_sock->src.ip.sin_port), client_id, client->pid));
2211 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
2212 (unsigned)ntohs(tcp_sock->dest.ip6.sin6_port),
2213 ctdb_addr_to_str(&tcp_sock->src),
2214 (unsigned)ntohs(tcp_sock->src.ip6.sin6_port), client_id, client->pid));
2217 DEBUG(DEBUG_ERR,(__location__ " Unknown family %d\n", addr.sa.sa_family));
2221 /* tell all nodes about this tcp connection */
2222 ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0,
2223 CTDB_CONTROL_TCP_ADD,
2224 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
2226 DEBUG(DEBUG_ERR,(__location__ " Failed to send CTDB_CONTROL_TCP_ADD\n"));
2234 find a tcp address on a list
2236 static struct ctdb_tcp_connection *ctdb_tcp_find(struct ctdb_tcp_array *array,
2237 struct ctdb_tcp_connection *tcp)
2241 if (array == NULL) {
2245 for (i=0;i<array->num;i++) {
2246 if (ctdb_same_sockaddr(&array->connections[i].src_addr, &tcp->src_addr) &&
2247 ctdb_same_sockaddr(&array->connections[i].dst_addr, &tcp->dst_addr)) {
2248 return &array->connections[i];
2257 called by a daemon to inform us of a TCP connection that one of its
2258 clients managing that should tickled with an ACK when IP takeover is
2261 int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata, bool tcp_update_needed)
2263 struct ctdb_tcp_connection *p = (struct ctdb_tcp_connection *)indata.dptr;
2264 struct ctdb_tcp_array *tcparray;
2265 struct ctdb_tcp_connection tcp;
2266 struct ctdb_vnn *vnn;
2268 vnn = find_public_ip_vnn(ctdb, &p->dst_addr);
2270 DEBUG(DEBUG_INFO,(__location__ " got TCP_ADD control for an address which is not a public address '%s'\n",
2271 ctdb_addr_to_str(&p->dst_addr)));
2277 tcparray = vnn->tcp_array;
2279 /* If this is the first tickle */
2280 if (tcparray == NULL) {
2281 tcparray = talloc_size(ctdb->nodes,
2282 offsetof(struct ctdb_tcp_array, connections) +
2283 sizeof(struct ctdb_tcp_connection) * 1);
2284 CTDB_NO_MEMORY(ctdb, tcparray);
2285 vnn->tcp_array = tcparray;
2288 tcparray->connections = talloc_size(tcparray, sizeof(struct ctdb_tcp_connection));
2289 CTDB_NO_MEMORY(ctdb, tcparray->connections);
2291 tcparray->connections[tcparray->num].src_addr = p->src_addr;
2292 tcparray->connections[tcparray->num].dst_addr = p->dst_addr;
2295 if (tcp_update_needed) {
2296 vnn->tcp_update_needed = true;
2302 /* Do we already have this tickle ?*/
2303 tcp.src_addr = p->src_addr;
2304 tcp.dst_addr = p->dst_addr;
2305 if (ctdb_tcp_find(vnn->tcp_array, &tcp) != NULL) {
2306 DEBUG(DEBUG_DEBUG,("Already had tickle info for %s:%u for vnn:%u\n",
2307 ctdb_addr_to_str(&tcp.dst_addr),
2308 ntohs(tcp.dst_addr.ip.sin_port),
2313 /* A new tickle, we must add it to the array */
2314 tcparray->connections = talloc_realloc(tcparray, tcparray->connections,
2315 struct ctdb_tcp_connection,
2317 CTDB_NO_MEMORY(ctdb, tcparray->connections);
2319 vnn->tcp_array = tcparray;
2320 tcparray->connections[tcparray->num].src_addr = p->src_addr;
2321 tcparray->connections[tcparray->num].dst_addr = p->dst_addr;
2324 DEBUG(DEBUG_INFO,("Added tickle info for %s:%u from vnn %u\n",
2325 ctdb_addr_to_str(&tcp.dst_addr),
2326 ntohs(tcp.dst_addr.ip.sin_port),
2329 if (tcp_update_needed) {
2330 vnn->tcp_update_needed = true;
2338 called by a daemon to inform us of a TCP connection that one of its
2339 clients managing that should tickled with an ACK when IP takeover is
2342 static void ctdb_remove_tcp_connection(struct ctdb_context *ctdb, struct ctdb_tcp_connection *conn)
2344 struct ctdb_tcp_connection *tcpp;
2345 struct ctdb_vnn *vnn = find_public_ip_vnn(ctdb, &conn->dst_addr);
2348 DEBUG(DEBUG_ERR,(__location__ " unable to find public address %s\n",
2349 ctdb_addr_to_str(&conn->dst_addr)));
2353 /* if the array is empty we cant remove it
2354 and we dont need to do anything
2356 if (vnn->tcp_array == NULL) {
2357 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist (array is empty) %s:%u\n",
2358 ctdb_addr_to_str(&conn->dst_addr),
2359 ntohs(conn->dst_addr.ip.sin_port)));
2364 /* See if we know this connection
2365 if we dont know this connection then we dont need to do anything
2367 tcpp = ctdb_tcp_find(vnn->tcp_array, conn);
2369 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist %s:%u\n",
2370 ctdb_addr_to_str(&conn->dst_addr),
2371 ntohs(conn->dst_addr.ip.sin_port)));
2376 /* We need to remove this entry from the array.
2377 Instead of allocating a new array and copying data to it
2378 we cheat and just copy the last entry in the existing array
2379 to the entry that is to be removed and just shring the
2382 *tcpp = vnn->tcp_array->connections[vnn->tcp_array->num - 1];
2383 vnn->tcp_array->num--;
2385 /* If we deleted the last entry we also need to remove the entire array
2387 if (vnn->tcp_array->num == 0) {
2388 talloc_free(vnn->tcp_array);
2389 vnn->tcp_array = NULL;
2392 vnn->tcp_update_needed = true;
2394 DEBUG(DEBUG_INFO,("Removed tickle info for %s:%u\n",
2395 ctdb_addr_to_str(&conn->src_addr),
2396 ntohs(conn->src_addr.ip.sin_port)));
2401 called by a daemon to inform us of a TCP connection that one of its
2402 clients used are no longer needed in the tickle database
2404 int32_t ctdb_control_tcp_remove(struct ctdb_context *ctdb, TDB_DATA indata)
2406 struct ctdb_tcp_connection *conn = (struct ctdb_tcp_connection *)indata.dptr;
2408 ctdb_remove_tcp_connection(ctdb, conn);
2415 called when a daemon restarts - send all tickes for all public addresses
2416 we are serving immediately to the new node.
2418 int32_t ctdb_control_startup(struct ctdb_context *ctdb, uint32_t vnn)
2420 /*XXX here we should send all tickes we are serving to the new node */
2426 called when a client structure goes away - hook to remove
2427 elements from the tcp_list in all daemons
2429 void ctdb_takeover_client_destructor_hook(struct ctdb_client *client)
2431 while (client->tcp_list) {
2432 struct ctdb_tcp_list *tcp = client->tcp_list;
2433 DLIST_REMOVE(client->tcp_list, tcp);
2434 ctdb_remove_tcp_connection(client->ctdb, &tcp->connection);
2440 release all IPs on shutdown
2442 void ctdb_release_all_ips(struct ctdb_context *ctdb)
2444 struct ctdb_vnn *vnn;
2446 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2447 if (!ctdb_sys_have_ip(&vnn->public_address)) {
2448 ctdb_vnn_unassign_iface(ctdb, vnn);
2454 ctdb_event_script_args(ctdb, CTDB_EVENT_RELEASE_IP, "%s %s %u",
2455 ctdb_vnn_iface_string(vnn),
2456 ctdb_addr_to_str(&vnn->public_address),
2457 vnn->public_netmask_bits);
2458 release_kill_clients(ctdb, &vnn->public_address);
2459 ctdb_vnn_unassign_iface(ctdb, vnn);
2465 get list of public IPs
2467 int32_t ctdb_control_get_public_ips(struct ctdb_context *ctdb,
2468 struct ctdb_req_control *c, TDB_DATA *outdata)
2471 struct ctdb_all_public_ips *ips;
2472 struct ctdb_vnn *vnn;
2473 bool only_available = false;
2475 if (c->flags & CTDB_PUBLIC_IP_FLAGS_ONLY_AVAILABLE) {
2476 only_available = true;
2479 /* count how many public ip structures we have */
2481 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2485 len = offsetof(struct ctdb_all_public_ips, ips) +
2486 num*sizeof(struct ctdb_public_ip);
2487 ips = talloc_zero_size(outdata, len);
2488 CTDB_NO_MEMORY(ctdb, ips);
2491 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2492 if (only_available && !ctdb_vnn_available(ctdb, vnn)) {
2495 ips->ips[i].pnn = vnn->pnn;
2496 ips->ips[i].addr = vnn->public_address;
2500 len = offsetof(struct ctdb_all_public_ips, ips) +
2501 i*sizeof(struct ctdb_public_ip);
2503 outdata->dsize = len;
2504 outdata->dptr = (uint8_t *)ips;
2511 get list of public IPs, old ipv4 style. only returns ipv4 addresses
2513 int32_t ctdb_control_get_public_ipsv4(struct ctdb_context *ctdb,
2514 struct ctdb_req_control *c, TDB_DATA *outdata)
2517 struct ctdb_all_public_ipsv4 *ips;
2518 struct ctdb_vnn *vnn;
2520 /* count how many public ip structures we have */
2522 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2523 if (vnn->public_address.sa.sa_family != AF_INET) {
2529 len = offsetof(struct ctdb_all_public_ipsv4, ips) +
2530 num*sizeof(struct ctdb_public_ipv4);
2531 ips = talloc_zero_size(outdata, len);
2532 CTDB_NO_MEMORY(ctdb, ips);
2534 outdata->dsize = len;
2535 outdata->dptr = (uint8_t *)ips;
2539 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2540 if (vnn->public_address.sa.sa_family != AF_INET) {
2543 ips->ips[i].pnn = vnn->pnn;
2544 ips->ips[i].sin = vnn->public_address.ip;
2551 int32_t ctdb_control_get_public_ip_info(struct ctdb_context *ctdb,
2552 struct ctdb_req_control *c,
2557 ctdb_sock_addr *addr;
2558 struct ctdb_control_public_ip_info *info;
2559 struct ctdb_vnn *vnn;
2561 addr = (ctdb_sock_addr *)indata.dptr;
2563 vnn = find_public_ip_vnn(ctdb, addr);
2565 /* if it is not a public ip it could be our 'single ip' */
2566 if (ctdb->single_ip_vnn) {
2567 if (ctdb_same_ip(&ctdb->single_ip_vnn->public_address, addr)) {
2568 vnn = ctdb->single_ip_vnn;
2573 DEBUG(DEBUG_ERR,(__location__ " Could not get public ip info, "
2574 "'%s'not a public address\n",
2575 ctdb_addr_to_str(addr)));
2579 /* count how many public ip structures we have */
2581 for (;vnn->ifaces[num];) {
2585 len = offsetof(struct ctdb_control_public_ip_info, ifaces) +
2586 num*sizeof(struct ctdb_control_iface_info);
2587 info = talloc_zero_size(outdata, len);
2588 CTDB_NO_MEMORY(ctdb, info);
2590 info->ip.addr = vnn->public_address;
2591 info->ip.pnn = vnn->pnn;
2592 info->active_idx = 0xFFFFFFFF;
2594 for (i=0; vnn->ifaces[i]; i++) {
2595 struct ctdb_iface *cur;
2597 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
2599 DEBUG(DEBUG_CRIT, (__location__ " internal error iface[%s] unknown\n",
2603 if (vnn->iface == cur) {
2604 info->active_idx = i;
2606 strcpy(info->ifaces[i].name, cur->name);
2607 info->ifaces[i].link_state = cur->link_up;
2608 info->ifaces[i].references = cur->references;
2611 len = offsetof(struct ctdb_control_public_ip_info, ifaces) +
2612 i*sizeof(struct ctdb_control_iface_info);
2614 outdata->dsize = len;
2615 outdata->dptr = (uint8_t *)info;
2620 int32_t ctdb_control_get_ifaces(struct ctdb_context *ctdb,
2621 struct ctdb_req_control *c,
2625 struct ctdb_control_get_ifaces *ifaces;
2626 struct ctdb_iface *cur;
2628 /* count how many public ip structures we have */
2630 for (cur=ctdb->ifaces;cur;cur=cur->next) {
2634 len = offsetof(struct ctdb_control_get_ifaces, ifaces) +
2635 num*sizeof(struct ctdb_control_iface_info);
2636 ifaces = talloc_zero_size(outdata, len);
2637 CTDB_NO_MEMORY(ctdb, ifaces);
2640 for (cur=ctdb->ifaces;cur;cur=cur->next) {
2641 strcpy(ifaces->ifaces[i].name, cur->name);
2642 ifaces->ifaces[i].link_state = cur->link_up;
2643 ifaces->ifaces[i].references = cur->references;
2647 len = offsetof(struct ctdb_control_get_ifaces, ifaces) +
2648 i*sizeof(struct ctdb_control_iface_info);
2650 outdata->dsize = len;
2651 outdata->dptr = (uint8_t *)ifaces;
2656 int32_t ctdb_control_set_iface_link(struct ctdb_context *ctdb,
2657 struct ctdb_req_control *c,
2660 struct ctdb_control_iface_info *info;
2661 struct ctdb_iface *iface;
2662 bool link_up = false;
2664 info = (struct ctdb_control_iface_info *)indata.dptr;
2666 if (info->name[CTDB_IFACE_SIZE] != '\0') {
2667 int len = strnlen(info->name, CTDB_IFACE_SIZE);
2668 DEBUG(DEBUG_ERR, (__location__ " name[%*.*s] not terminated\n",
2669 len, len, info->name));
2673 switch (info->link_state) {
2681 DEBUG(DEBUG_ERR, (__location__ " link_state[%u] invalid\n",
2682 (unsigned int)info->link_state));
2686 if (info->references != 0) {
2687 DEBUG(DEBUG_ERR, (__location__ " references[%u] should be 0\n",
2688 (unsigned int)info->references));
2692 iface = ctdb_find_iface(ctdb, info->name);
2693 if (iface == NULL) {
2697 if (link_up == iface->link_up) {
2701 DEBUG(iface->link_up?DEBUG_ERR:DEBUG_NOTICE,
2702 ("iface[%s] has changed it's link status %s => %s\n",
2704 iface->link_up?"up":"down",
2705 link_up?"up":"down"));
2707 iface->link_up = link_up;
2713 structure containing the listening socket and the list of tcp connections
2714 that the ctdb daemon is to kill
2716 struct ctdb_kill_tcp {
2717 struct ctdb_vnn *vnn;
2718 struct ctdb_context *ctdb;
2720 struct fd_event *fde;
2721 trbt_tree_t *connections;
2726 a tcp connection that is to be killed
2728 struct ctdb_killtcp_con {
2729 ctdb_sock_addr src_addr;
2730 ctdb_sock_addr dst_addr;
2732 struct ctdb_kill_tcp *killtcp;
2735 /* this function is used to create a key to represent this socketpair
2736 in the killtcp tree.
2737 this key is used to insert and lookup matching socketpairs that are
2738 to be tickled and RST
2740 #define KILLTCP_KEYLEN 10
2741 static uint32_t *killtcp_key(ctdb_sock_addr *src, ctdb_sock_addr *dst)
2743 static uint32_t key[KILLTCP_KEYLEN];
2745 bzero(key, sizeof(key));
2747 if (src->sa.sa_family != dst->sa.sa_family) {
2748 DEBUG(DEBUG_ERR, (__location__ " ERROR, different families passed :%u vs %u\n", src->sa.sa_family, dst->sa.sa_family));
2752 switch (src->sa.sa_family) {
2754 key[0] = dst->ip.sin_addr.s_addr;
2755 key[1] = src->ip.sin_addr.s_addr;
2756 key[2] = dst->ip.sin_port;
2757 key[3] = src->ip.sin_port;
2760 key[0] = dst->ip6.sin6_addr.s6_addr32[3];
2761 key[1] = src->ip6.sin6_addr.s6_addr32[3];
2762 key[2] = dst->ip6.sin6_addr.s6_addr32[2];
2763 key[3] = src->ip6.sin6_addr.s6_addr32[2];
2764 key[4] = dst->ip6.sin6_addr.s6_addr32[1];
2765 key[5] = src->ip6.sin6_addr.s6_addr32[1];
2766 key[6] = dst->ip6.sin6_addr.s6_addr32[0];
2767 key[7] = src->ip6.sin6_addr.s6_addr32[0];
2768 key[8] = dst->ip6.sin6_port;
2769 key[9] = src->ip6.sin6_port;
2772 DEBUG(DEBUG_ERR, (__location__ " ERROR, unknown family passed :%u\n", src->sa.sa_family));
2780 called when we get a read event on the raw socket
2782 static void capture_tcp_handler(struct event_context *ev, struct fd_event *fde,
2783 uint16_t flags, void *private_data)
2785 struct ctdb_kill_tcp *killtcp = talloc_get_type(private_data, struct ctdb_kill_tcp);
2786 struct ctdb_killtcp_con *con;
2787 ctdb_sock_addr src, dst;
2788 uint32_t ack_seq, seq;
2790 if (!(flags & EVENT_FD_READ)) {
2794 if (ctdb_sys_read_tcp_packet(killtcp->capture_fd,
2795 killtcp->private_data,
2797 &ack_seq, &seq) != 0) {
2798 /* probably a non-tcp ACK packet */
2802 /* check if we have this guy in our list of connections
2805 con = trbt_lookuparray32(killtcp->connections,
2806 KILLTCP_KEYLEN, killtcp_key(&src, &dst));
2808 /* no this was some other packet we can just ignore */
2812 /* This one has been tickled !
2813 now reset him and remove him from the list.
2815 DEBUG(DEBUG_INFO, ("sending a tcp reset to kill connection :%d -> %s:%d\n",
2816 ntohs(con->dst_addr.ip.sin_port),
2817 ctdb_addr_to_str(&con->src_addr),
2818 ntohs(con->src_addr.ip.sin_port)));
2820 ctdb_sys_send_tcp(&con->dst_addr, &con->src_addr, ack_seq, seq, 1);
2825 /* when traversing the list of all tcp connections to send tickle acks to
2826 (so that we can capture the ack coming back and kill the connection
2828 this callback is called for each connection we are currently trying to kill
2830 static void tickle_connection_traverse(void *param, void *data)
2832 struct ctdb_killtcp_con *con = talloc_get_type(data, struct ctdb_killtcp_con);
2834 /* have tried too many times, just give up */
2835 if (con->count >= 5) {
2836 /* can't delete in traverse: reparent to delete_cons */
2837 talloc_steal(param, con);
2841 /* othervise, try tickling it again */
2844 (ctdb_sock_addr *)&con->dst_addr,
2845 (ctdb_sock_addr *)&con->src_addr,
2851 called every second until all sentenced connections have been reset
2853 static void ctdb_tickle_sentenced_connections(struct event_context *ev, struct timed_event *te,
2854 struct timeval t, void *private_data)
2856 struct ctdb_kill_tcp *killtcp = talloc_get_type(private_data, struct ctdb_kill_tcp);
2857 void *delete_cons = talloc_new(NULL);
2859 /* loop over all connections sending tickle ACKs */
2860 trbt_traversearray32(killtcp->connections, KILLTCP_KEYLEN, tickle_connection_traverse, delete_cons);
2862 /* now we've finished traverse, it's safe to do deletion. */
2863 talloc_free(delete_cons);
2865 /* If there are no more connections to kill we can remove the
2866 entire killtcp structure
2868 if ( (killtcp->connections == NULL) ||
2869 (killtcp->connections->root == NULL) ) {
2870 talloc_free(killtcp);
2874 /* try tickling them again in a seconds time
2876 event_add_timed(killtcp->ctdb->ev, killtcp, timeval_current_ofs(1, 0),
2877 ctdb_tickle_sentenced_connections, killtcp);
2881 destroy the killtcp structure
2883 static int ctdb_killtcp_destructor(struct ctdb_kill_tcp *killtcp)
2886 killtcp->vnn->killtcp = NULL;
2892 /* nothing fancy here, just unconditionally replace any existing
2893 connection structure with the new one.
2895 dont even free the old one if it did exist, that one is talloc_stolen
2896 by the same node in the tree anyway and will be deleted when the new data
2899 static void *add_killtcp_callback(void *parm, void *data)
2905 add a tcp socket to the list of connections we want to RST
2907 static int ctdb_killtcp_add_connection(struct ctdb_context *ctdb,
2911 ctdb_sock_addr src, dst;
2912 struct ctdb_kill_tcp *killtcp;
2913 struct ctdb_killtcp_con *con;
2914 struct ctdb_vnn *vnn;
2916 ctdb_canonicalize_ip(s, &src);
2917 ctdb_canonicalize_ip(d, &dst);
2919 vnn = find_public_ip_vnn(ctdb, &dst);
2921 vnn = find_public_ip_vnn(ctdb, &src);
2924 /* if it is not a public ip it could be our 'single ip' */
2925 if (ctdb->single_ip_vnn) {
2926 if (ctdb_same_ip(&ctdb->single_ip_vnn->public_address, &dst)) {
2927 vnn = ctdb->single_ip_vnn;
2932 DEBUG(DEBUG_ERR,(__location__ " Could not killtcp, not a public address\n"));
2936 killtcp = vnn->killtcp;
2938 /* If this is the first connection to kill we must allocate
2941 if (killtcp == NULL) {
2942 killtcp = talloc_zero(ctdb, struct ctdb_kill_tcp);
2943 CTDB_NO_MEMORY(ctdb, killtcp);
2946 killtcp->ctdb = ctdb;
2947 killtcp->capture_fd = -1;
2948 killtcp->connections = trbt_create(killtcp, 0);
2950 vnn->killtcp = killtcp;
2951 talloc_set_destructor(killtcp, ctdb_killtcp_destructor);
2956 /* create a structure that describes this connection we want to
2957 RST and store it in killtcp->connections
2959 con = talloc(killtcp, struct ctdb_killtcp_con);
2960 CTDB_NO_MEMORY(ctdb, con);
2961 con->src_addr = src;
2962 con->dst_addr = dst;
2964 con->killtcp = killtcp;
2967 trbt_insertarray32_callback(killtcp->connections,
2968 KILLTCP_KEYLEN, killtcp_key(&con->dst_addr, &con->src_addr),
2969 add_killtcp_callback, con);
2972 If we dont have a socket to listen on yet we must create it
2974 if (killtcp->capture_fd == -1) {
2975 const char *iface = ctdb_vnn_iface_string(vnn);
2976 killtcp->capture_fd = ctdb_sys_open_capture_socket(iface, &killtcp->private_data);
2977 if (killtcp->capture_fd == -1) {
2978 DEBUG(DEBUG_CRIT,(__location__ " Failed to open capturing "
2979 "socket on iface '%s' for killtcp (%s)\n",
2980 iface, strerror(errno)));
2986 if (killtcp->fde == NULL) {
2987 killtcp->fde = event_add_fd(ctdb->ev, killtcp, killtcp->capture_fd,
2989 capture_tcp_handler, killtcp);
2990 tevent_fd_set_auto_close(killtcp->fde);
2992 /* We also need to set up some events to tickle all these connections
2993 until they are all reset
2995 event_add_timed(ctdb->ev, killtcp, timeval_current_ofs(1, 0),
2996 ctdb_tickle_sentenced_connections, killtcp);
2999 /* tickle him once now */
3008 talloc_free(vnn->killtcp);
3009 vnn->killtcp = NULL;
3014 kill a TCP connection.
3016 int32_t ctdb_control_kill_tcp(struct ctdb_context *ctdb, TDB_DATA indata)
3018 struct ctdb_control_killtcp *killtcp = (struct ctdb_control_killtcp *)indata.dptr;
3020 return ctdb_killtcp_add_connection(ctdb, &killtcp->src_addr, &killtcp->dst_addr);
3024 called by a daemon to inform us of the entire list of TCP tickles for
3025 a particular public address.
3026 this control should only be sent by the node that is currently serving
3027 that public address.
3029 int32_t ctdb_control_set_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata)
3031 struct ctdb_control_tcp_tickle_list *list = (struct ctdb_control_tcp_tickle_list *)indata.dptr;
3032 struct ctdb_tcp_array *tcparray;
3033 struct ctdb_vnn *vnn;
3035 /* We must at least have tickles.num or else we cant verify the size
3036 of the received data blob
3038 if (indata.dsize < offsetof(struct ctdb_control_tcp_tickle_list,
3039 tickles.connections)) {
3040 DEBUG(DEBUG_ERR,("Bad indata in ctdb_control_set_tcp_tickle_list. Not enough data for the tickle.num field\n"));
3044 /* verify that the size of data matches what we expect */
3045 if (indata.dsize < offsetof(struct ctdb_control_tcp_tickle_list,
3046 tickles.connections)
3047 + sizeof(struct ctdb_tcp_connection)
3048 * list->tickles.num) {
3049 DEBUG(DEBUG_ERR,("Bad indata in ctdb_control_set_tcp_tickle_list\n"));
3053 vnn = find_public_ip_vnn(ctdb, &list->addr);
3055 DEBUG(DEBUG_INFO,(__location__ " Could not set tcp tickle list, '%s' is not a public address\n",
3056 ctdb_addr_to_str(&list->addr)));
3061 /* remove any old ticklelist we might have */
3062 talloc_free(vnn->tcp_array);
3063 vnn->tcp_array = NULL;
3065 tcparray = talloc(ctdb->nodes, struct ctdb_tcp_array);
3066 CTDB_NO_MEMORY(ctdb, tcparray);
3068 tcparray->num = list->tickles.num;
3070 tcparray->connections = talloc_array(tcparray, struct ctdb_tcp_connection, tcparray->num);
3071 CTDB_NO_MEMORY(ctdb, tcparray->connections);
3073 memcpy(tcparray->connections, &list->tickles.connections[0],
3074 sizeof(struct ctdb_tcp_connection)*tcparray->num);
3076 /* We now have a new fresh tickle list array for this vnn */
3077 vnn->tcp_array = talloc_steal(vnn, tcparray);
3083 called to return the full list of tickles for the puclic address associated
3084 with the provided vnn
3086 int32_t ctdb_control_get_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata)
3088 ctdb_sock_addr *addr = (ctdb_sock_addr *)indata.dptr;
3089 struct ctdb_control_tcp_tickle_list *list;
3090 struct ctdb_tcp_array *tcparray;
3092 struct ctdb_vnn *vnn;
3094 vnn = find_public_ip_vnn(ctdb, addr);
3096 DEBUG(DEBUG_ERR,(__location__ " Could not get tcp tickle list, '%s' is not a public address\n",
3097 ctdb_addr_to_str(addr)));
3102 tcparray = vnn->tcp_array;
3104 num = tcparray->num;
3109 outdata->dsize = offsetof(struct ctdb_control_tcp_tickle_list,
3110 tickles.connections)
3111 + sizeof(struct ctdb_tcp_connection) * num;
3113 outdata->dptr = talloc_size(outdata, outdata->dsize);
3114 CTDB_NO_MEMORY(ctdb, outdata->dptr);
3115 list = (struct ctdb_control_tcp_tickle_list *)outdata->dptr;
3118 list->tickles.num = num;
3120 memcpy(&list->tickles.connections[0], tcparray->connections,
3121 sizeof(struct ctdb_tcp_connection) * num);
3129 set the list of all tcp tickles for a public address
3131 static int ctdb_ctrl_set_tcp_tickles(struct ctdb_context *ctdb,
3132 struct timeval timeout, uint32_t destnode,
3133 ctdb_sock_addr *addr,
3134 struct ctdb_tcp_array *tcparray)
3138 struct ctdb_control_tcp_tickle_list *list;
3141 num = tcparray->num;
3146 data.dsize = offsetof(struct ctdb_control_tcp_tickle_list,
3147 tickles.connections) +
3148 sizeof(struct ctdb_tcp_connection) * num;
3149 data.dptr = talloc_size(ctdb, data.dsize);
3150 CTDB_NO_MEMORY(ctdb, data.dptr);
3152 list = (struct ctdb_control_tcp_tickle_list *)data.dptr;
3154 list->tickles.num = num;
3156 memcpy(&list->tickles.connections[0], tcparray->connections, sizeof(struct ctdb_tcp_connection) * num);
3159 ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0,
3160 CTDB_CONTROL_SET_TCP_TICKLE_LIST,
3161 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
3163 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set tcp tickles failed\n"));
3167 talloc_free(data.dptr);
3174 perform tickle updates if required
3176 static void ctdb_update_tcp_tickles(struct event_context *ev,
3177 struct timed_event *te,
3178 struct timeval t, void *private_data)
3180 struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
3182 struct ctdb_vnn *vnn;
3184 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
3185 /* we only send out updates for public addresses that
3188 if (ctdb->pnn != vnn->pnn) {
3191 /* We only send out the updates if we need to */
3192 if (!vnn->tcp_update_needed) {
3195 ret = ctdb_ctrl_set_tcp_tickles(ctdb,
3197 CTDB_BROADCAST_CONNECTED,
3198 &vnn->public_address,
3201 DEBUG(DEBUG_ERR,("Failed to send the tickle update for public address %s\n",
3202 ctdb_addr_to_str(&vnn->public_address)));
3206 event_add_timed(ctdb->ev, ctdb->tickle_update_context,
3207 timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
3208 ctdb_update_tcp_tickles, ctdb);
3213 start periodic update of tcp tickles
3215 void ctdb_start_tcp_tickle_update(struct ctdb_context *ctdb)
3217 ctdb->tickle_update_context = talloc_new(ctdb);
3219 event_add_timed(ctdb->ev, ctdb->tickle_update_context,
3220 timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
3221 ctdb_update_tcp_tickles, ctdb);
3227 struct control_gratious_arp {
3228 struct ctdb_context *ctdb;
3229 ctdb_sock_addr addr;
3235 send a control_gratuitous arp
3237 static void send_gratious_arp(struct event_context *ev, struct timed_event *te,
3238 struct timeval t, void *private_data)
3241 struct control_gratious_arp *arp = talloc_get_type(private_data,
3242 struct control_gratious_arp);
3244 ret = ctdb_sys_send_arp(&arp->addr, arp->iface);
3246 DEBUG(DEBUG_ERR,(__location__ " sending of gratious arp on iface '%s' failed (%s)\n",
3247 arp->iface, strerror(errno)));
3252 if (arp->count == CTDB_ARP_REPEAT) {
3257 event_add_timed(arp->ctdb->ev, arp,
3258 timeval_current_ofs(CTDB_ARP_INTERVAL, 0),
3259 send_gratious_arp, arp);
3266 int32_t ctdb_control_send_gratious_arp(struct ctdb_context *ctdb, TDB_DATA indata)
3268 struct ctdb_control_gratious_arp *gratious_arp = (struct ctdb_control_gratious_arp *)indata.dptr;
3269 struct control_gratious_arp *arp;
3271 /* verify the size of indata */
3272 if (indata.dsize < offsetof(struct ctdb_control_gratious_arp, iface)) {
3273 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_gratious_arp structure. Got %u require %u bytes\n",
3274 (unsigned)indata.dsize,
3275 (unsigned)offsetof(struct ctdb_control_gratious_arp, iface)));
3279 ( offsetof(struct ctdb_control_gratious_arp, iface)
3280 + gratious_arp->len ) ){
3282 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
3283 "but should be %u bytes\n",
3284 (unsigned)indata.dsize,
3285 (unsigned)(offsetof(struct ctdb_control_gratious_arp, iface)+gratious_arp->len)));
3290 arp = talloc(ctdb, struct control_gratious_arp);
3291 CTDB_NO_MEMORY(ctdb, arp);
3294 arp->addr = gratious_arp->addr;
3295 arp->iface = talloc_strdup(arp, gratious_arp->iface);
3296 CTDB_NO_MEMORY(ctdb, arp->iface);
3299 event_add_timed(arp->ctdb->ev, arp,
3300 timeval_zero(), send_gratious_arp, arp);
3305 int32_t ctdb_control_add_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
3307 struct ctdb_control_ip_iface *pub = (struct ctdb_control_ip_iface *)indata.dptr;
3310 /* verify the size of indata */
3311 if (indata.dsize < offsetof(struct ctdb_control_ip_iface, iface)) {
3312 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_ip_iface structure\n"));
3316 ( offsetof(struct ctdb_control_ip_iface, iface)
3319 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
3320 "but should be %u bytes\n",
3321 (unsigned)indata.dsize,
3322 (unsigned)(offsetof(struct ctdb_control_ip_iface, iface)+pub->len)));
3326 ret = ctdb_add_public_address(ctdb, &pub->addr, pub->mask, &pub->iface[0]);
3329 DEBUG(DEBUG_ERR,(__location__ " Failed to add public address\n"));
3337 called when releaseip event finishes for del_public_address
3339 static void delete_ip_callback(struct ctdb_context *ctdb, int status,
3342 talloc_free(private_data);
3345 int32_t ctdb_control_del_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
3347 struct ctdb_control_ip_iface *pub = (struct ctdb_control_ip_iface *)indata.dptr;
3348 struct ctdb_vnn *vnn;
3351 /* verify the size of indata */
3352 if (indata.dsize < offsetof(struct ctdb_control_ip_iface, iface)) {
3353 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_ip_iface structure\n"));
3357 ( offsetof(struct ctdb_control_ip_iface, iface)
3360 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
3361 "but should be %u bytes\n",
3362 (unsigned)indata.dsize,
3363 (unsigned)(offsetof(struct ctdb_control_ip_iface, iface)+pub->len)));
3367 /* walk over all public addresses until we find a match */
3368 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
3369 if (ctdb_same_ip(&vnn->public_address, &pub->addr)) {
3370 TALLOC_CTX *mem_ctx;
3372 DLIST_REMOVE(ctdb->vnn, vnn);
3373 if (vnn->iface != NULL) {
3374 ctdb_vnn_unassign_iface(ctdb, vnn);
3376 if (vnn->pnn != ctdb->pnn) {
3381 mem_ctx = talloc_new(ctdb);
3382 talloc_steal(mem_ctx, vnn);
3383 ret = ctdb_event_script_callback(ctdb,
3384 mem_ctx, delete_ip_callback, mem_ctx,
3386 CTDB_EVENT_RELEASE_IP,
3388 ctdb_vnn_iface_string(vnn),
3389 ctdb_addr_to_str(&vnn->public_address),
3390 vnn->public_netmask_bits);
3401 /* This function is called from the recovery daemon to verify that a remote
3402 node has the expected ip allocation.
3403 This is verified against ctdb->ip_tree
3405 int verify_remote_ip_allocation(struct ctdb_context *ctdb, struct ctdb_all_public_ips *ips)
3407 struct ctdb_public_ip_list *tmp_ip;
3410 if (ctdb->ip_tree == NULL) {
3411 /* dont know the expected allocation yet, assume remote node
3420 for (i=0; i<ips->num; i++) {
3421 tmp_ip = trbt_lookuparray32(ctdb->ip_tree, IP_KEYLEN, ip_key(&ips->ips[i].addr));
3422 if (tmp_ip == NULL) {
3423 DEBUG(DEBUG_ERR,(__location__ " Could not find host for address %s, reassign ips\n", ctdb_addr_to_str(&ips->ips[i].addr)));
3427 if (tmp_ip->pnn == -1 || ips->ips[i].pnn == -1) {
3431 if (tmp_ip->pnn != ips->ips[i].pnn) {
3432 DEBUG(DEBUG_ERR,("Inconsistent ip allocation. Trigger reallocation. Thinks %s is held by node %u while it is held by node %u\n", ctdb_addr_to_str(&ips->ips[i].addr), ips->ips[i].pnn, tmp_ip->pnn));
3440 int update_ip_assignment_tree(struct ctdb_context *ctdb, struct ctdb_public_ip *ip)
3442 struct ctdb_public_ip_list *tmp_ip;
3444 if (ctdb->ip_tree == NULL) {
3445 DEBUG(DEBUG_ERR,("No ctdb->ip_tree yet. Failed to update ip assignment\n"));
3449 tmp_ip = trbt_lookuparray32(ctdb->ip_tree, IP_KEYLEN, ip_key(&ip->addr));
3450 if (tmp_ip == NULL) {
3451 DEBUG(DEBUG_ERR,(__location__ " Could not find record for address %s, update ip\n", ctdb_addr_to_str(&ip->addr)));
3455 DEBUG(DEBUG_NOTICE,("Updated ip assignment tree for ip : %s from node %u to node %u\n", ctdb_addr_to_str(&ip->addr), tmp_ip->pnn, ip->pnn));
3456 tmp_ip->pnn = ip->pnn;