4 Copyright (C) Ronnie Sahlberg 2007
5 Copyright (C) Andrew Tridgell 2007
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, see <http://www.gnu.org/licenses/>.
21 #include "lib/events/events.h"
22 #include "lib/tdb/include/tdb.h"
23 #include "lib/util/dlinklist.h"
24 #include "system/network.h"
25 #include "system/filesys.h"
26 #include "system/wait.h"
27 #include "../include/ctdb_private.h"
28 #include "../common/rb_tree.h"
31 #define TAKEOVER_TIMEOUT() timeval_current_ofs(ctdb->tunable.takeover_timeout,0)
33 #define CTDB_ARP_INTERVAL 1
34 #define CTDB_ARP_REPEAT 3
37 struct ctdb_iface *prev, *next;
43 static const char *ctdb_vnn_iface_string(const struct ctdb_vnn *vnn)
46 return vnn->iface->name;
52 static int ctdb_add_local_iface(struct ctdb_context *ctdb, const char *iface)
56 /* Verify that we dont have an entry for this ip yet */
57 for (i=ctdb->ifaces;i;i=i->next) {
58 if (strcmp(i->name, iface) == 0) {
63 /* create a new structure for this interface */
64 i = talloc_zero(ctdb, struct ctdb_iface);
65 CTDB_NO_MEMORY_FATAL(ctdb, i);
66 i->name = talloc_strdup(i, iface);
67 CTDB_NO_MEMORY(ctdb, i->name);
70 DLIST_ADD(ctdb->ifaces, i);
75 static struct ctdb_iface *ctdb_find_iface(struct ctdb_context *ctdb,
80 /* Verify that we dont have an entry for this ip yet */
81 for (i=ctdb->ifaces;i;i=i->next) {
82 if (strcmp(i->name, iface) == 0) {
90 static struct ctdb_iface *ctdb_vnn_best_iface(struct ctdb_context *ctdb,
94 struct ctdb_iface *cur = NULL;
95 struct ctdb_iface *best = NULL;
97 for (i=0; vnn->ifaces[i]; i++) {
99 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
113 if (cur->references < best->references) {
122 static int32_t ctdb_vnn_assign_iface(struct ctdb_context *ctdb,
123 struct ctdb_vnn *vnn)
125 struct ctdb_iface *best = NULL;
128 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
129 "still assigned to iface '%s'\n",
130 ctdb_addr_to_str(&vnn->public_address),
131 ctdb_vnn_iface_string(vnn)));
135 best = ctdb_vnn_best_iface(ctdb, vnn);
137 DEBUG(DEBUG_ERR, (__location__ " public address '%s' "
138 "cannot assign to iface any iface\n",
139 ctdb_addr_to_str(&vnn->public_address)));
145 vnn->pnn = ctdb->pnn;
147 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
148 "now assigned to iface '%s' refs[%d]\n",
149 ctdb_addr_to_str(&vnn->public_address),
150 ctdb_vnn_iface_string(vnn),
155 static void ctdb_vnn_unassign_iface(struct ctdb_context *ctdb,
156 struct ctdb_vnn *vnn)
158 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
159 "now unassigned (old iface '%s' refs[%d])\n",
160 ctdb_addr_to_str(&vnn->public_address),
161 ctdb_vnn_iface_string(vnn),
162 vnn->iface?vnn->iface->references:0));
164 vnn->iface->references--;
167 if (vnn->pnn == ctdb->pnn) {
172 static bool ctdb_vnn_available(struct ctdb_context *ctdb,
173 struct ctdb_vnn *vnn)
177 if (vnn->iface && vnn->iface->link_up) {
181 for (i=0; vnn->ifaces[i]; i++) {
182 struct ctdb_iface *cur;
184 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
197 struct ctdb_takeover_arp {
198 struct ctdb_context *ctdb;
201 struct ctdb_tcp_array *tcparray;
202 struct ctdb_vnn *vnn;
207 lists of tcp endpoints
209 struct ctdb_tcp_list {
210 struct ctdb_tcp_list *prev, *next;
211 struct ctdb_tcp_connection connection;
215 list of clients to kill on IP release
217 struct ctdb_client_ip {
218 struct ctdb_client_ip *prev, *next;
219 struct ctdb_context *ctdb;
226 send a gratuitous arp
228 static void ctdb_control_send_arp(struct event_context *ev, struct timed_event *te,
229 struct timeval t, void *private_data)
231 struct ctdb_takeover_arp *arp = talloc_get_type(private_data,
232 struct ctdb_takeover_arp);
234 struct ctdb_tcp_array *tcparray;
235 const char *iface = ctdb_vnn_iface_string(arp->vnn);
237 ret = ctdb_sys_send_arp(&arp->addr, iface);
239 DEBUG(DEBUG_CRIT,(__location__ " sending of arp failed on iface '%s' (%s)\n",
240 iface, strerror(errno)));
243 tcparray = arp->tcparray;
245 for (i=0;i<tcparray->num;i++) {
246 struct ctdb_tcp_connection *tcon;
248 tcon = &tcparray->connections[i];
249 DEBUG(DEBUG_INFO,("sending tcp tickle ack for %u->%s:%u\n",
250 (unsigned)ntohs(tcon->dst_addr.ip.sin_port),
251 ctdb_addr_to_str(&tcon->src_addr),
252 (unsigned)ntohs(tcon->src_addr.ip.sin_port)));
253 ret = ctdb_sys_send_tcp(
258 DEBUG(DEBUG_CRIT,(__location__ " Failed to send tcp tickle ack for %s\n",
259 ctdb_addr_to_str(&tcon->src_addr)));
266 if (arp->count == CTDB_ARP_REPEAT) {
271 event_add_timed(arp->ctdb->ev, arp->vnn->takeover_ctx,
272 timeval_current_ofs(CTDB_ARP_INTERVAL, 100000),
273 ctdb_control_send_arp, arp);
276 static int32_t ctdb_announce_vnn_iface(struct ctdb_context *ctdb,
277 struct ctdb_vnn *vnn)
279 struct ctdb_takeover_arp *arp;
280 struct ctdb_tcp_array *tcparray;
282 if (!vnn->takeover_ctx) {
283 vnn->takeover_ctx = talloc_new(vnn);
284 if (!vnn->takeover_ctx) {
289 arp = talloc_zero(vnn->takeover_ctx, struct ctdb_takeover_arp);
295 arp->addr = vnn->public_address;
298 tcparray = vnn->tcp_array;
300 /* add all of the known tcp connections for this IP to the
301 list of tcp connections to send tickle acks for */
302 arp->tcparray = talloc_steal(arp, tcparray);
304 vnn->tcp_array = NULL;
305 vnn->tcp_update_needed = true;
308 event_add_timed(arp->ctdb->ev, vnn->takeover_ctx,
309 timeval_zero(), ctdb_control_send_arp, arp);
314 struct takeover_callback_state {
315 struct ctdb_req_control *c;
316 ctdb_sock_addr *addr;
317 struct ctdb_vnn *vnn;
320 struct ctdb_do_takeip_state {
321 struct ctdb_req_control *c;
322 struct ctdb_vnn *vnn;
326 called when takeip event finishes
328 static void ctdb_do_takeip_callback(struct ctdb_context *ctdb, int status,
331 struct ctdb_do_takeip_state *state =
332 talloc_get_type(private_data, struct ctdb_do_takeip_state);
336 if (status == -ETIME) {
339 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
340 ctdb_addr_to_str(&state->vnn->public_address),
341 ctdb_vnn_iface_string(state->vnn)));
342 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
347 ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
349 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
354 /* the control succeeded */
355 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
361 take over an ip address
363 static int32_t ctdb_do_takeip(struct ctdb_context *ctdb,
364 struct ctdb_req_control *c,
365 struct ctdb_vnn *vnn)
368 struct ctdb_do_takeip_state *state;
370 ret = ctdb_vnn_assign_iface(ctdb, vnn);
372 DEBUG(DEBUG_ERR,("Takeover of IP %s/%u failed to "
373 "assin a usable interface\n",
374 ctdb_addr_to_str(&vnn->public_address),
375 vnn->public_netmask_bits));
379 state = talloc(vnn, struct ctdb_do_takeip_state);
380 CTDB_NO_MEMORY(ctdb, state);
382 state->c = talloc_steal(ctdb, c);
385 DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u on interface %s\n",
386 ctdb_addr_to_str(&vnn->public_address),
387 vnn->public_netmask_bits,
388 ctdb_vnn_iface_string(vnn)));
390 ret = ctdb_event_script_callback(ctdb,
392 ctdb_do_takeip_callback,
397 ctdb_vnn_iface_string(vnn),
398 ctdb_addr_to_str(&vnn->public_address),
399 vnn->public_netmask_bits);
402 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
403 ctdb_addr_to_str(&vnn->public_address),
404 ctdb_vnn_iface_string(vnn)));
412 struct ctdb_do_updateip_state {
413 struct ctdb_req_control *c;
414 struct ctdb_iface *old;
415 struct ctdb_vnn *vnn;
419 called when updateip event finishes
421 static void ctdb_do_updateip_callback(struct ctdb_context *ctdb, int status,
424 struct ctdb_do_updateip_state *state =
425 talloc_get_type(private_data, struct ctdb_do_updateip_state);
429 if (status == -ETIME) {
432 DEBUG(DEBUG_ERR,(__location__ " Failed to move IP %s from interface %s to %s\n",
433 ctdb_addr_to_str(&state->vnn->public_address),
435 ctdb_vnn_iface_string(state->vnn)));
438 * All we can do is reset the old interface
439 * and let the next run fix it
441 ctdb_vnn_unassign_iface(ctdb, state->vnn);
442 state->vnn->iface = state->old;
443 state->vnn->iface->references++;
445 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
450 ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
452 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
457 /* the control succeeded */
458 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
464 update (move) an ip address
466 static int32_t ctdb_do_updateip(struct ctdb_context *ctdb,
467 struct ctdb_req_control *c,
468 struct ctdb_vnn *vnn)
471 struct ctdb_do_updateip_state *state;
472 struct ctdb_iface *old = vnn->iface;
474 ctdb_vnn_unassign_iface(ctdb, vnn);
475 ret = ctdb_vnn_assign_iface(ctdb, vnn);
477 DEBUG(DEBUG_ERR,("update of IP %s/%u failed to "
478 "assin a usable interface (old iface '%s')\n",
479 ctdb_addr_to_str(&vnn->public_address),
480 vnn->public_netmask_bits,
485 if (vnn->iface == old) {
486 DEBUG(DEBUG_ERR,("update of IP %s/%u trying to "
487 "assin a same interface '%s'\n",
488 ctdb_addr_to_str(&vnn->public_address),
489 vnn->public_netmask_bits,
494 state = talloc(vnn, struct ctdb_do_updateip_state);
495 CTDB_NO_MEMORY(ctdb, state);
497 state->c = talloc_steal(ctdb, c);
501 DEBUG(DEBUG_NOTICE,("Update of IP %s/%u from "
502 "interface %s to %s\n",
503 ctdb_addr_to_str(&vnn->public_address),
504 vnn->public_netmask_bits,
506 ctdb_vnn_iface_string(vnn)));
508 ret = ctdb_event_script_callback(ctdb,
510 ctdb_do_updateip_callback,
513 CTDB_EVENT_UPDATE_IP,
516 ctdb_vnn_iface_string(vnn),
517 ctdb_addr_to_str(&vnn->public_address),
518 vnn->public_netmask_bits);
520 DEBUG(DEBUG_ERR,(__location__ " Failed update IP %s from interface %s to %s\n",
521 ctdb_addr_to_str(&vnn->public_address),
522 old->name, ctdb_vnn_iface_string(vnn)));
531 Find the vnn of the node that has a public ip address
532 returns -1 if the address is not known as a public address
534 static struct ctdb_vnn *find_public_ip_vnn(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
536 struct ctdb_vnn *vnn;
538 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
539 if (ctdb_same_ip(&vnn->public_address, addr)) {
548 take over an ip address
550 int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb,
551 struct ctdb_req_control *c,
556 struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
557 struct ctdb_vnn *vnn;
558 bool have_ip = false;
559 bool do_updateip = false;
560 bool do_takeip = false;
561 struct ctdb_iface *best_iface = NULL;
563 if (pip->pnn != ctdb->pnn) {
564 DEBUG(DEBUG_ERR,(__location__" takeoverip called for an ip '%s' "
565 "with pnn %d, but we're node %d\n",
566 ctdb_addr_to_str(&pip->addr),
567 pip->pnn, ctdb->pnn));
571 /* update out vnn list */
572 vnn = find_public_ip_vnn(ctdb, &pip->addr);
574 DEBUG(DEBUG_INFO,("takeoverip called for an ip '%s' that is not a public address\n",
575 ctdb_addr_to_str(&pip->addr)));
579 have_ip = ctdb_sys_have_ip(&pip->addr);
580 best_iface = ctdb_vnn_best_iface(ctdb, vnn);
581 if (best_iface == NULL) {
582 DEBUG(DEBUG_ERR,("takeoverip of IP %s/%u failed to find"
583 "a usable interface (old %s, have_ip %d)\n",
584 ctdb_addr_to_str(&vnn->public_address),
585 vnn->public_netmask_bits,
586 ctdb_vnn_iface_string(vnn),
591 if (vnn->iface == NULL && have_ip) {
592 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
593 "but we have no interface assigned, has someone manually configured it?"
595 ctdb_addr_to_str(&vnn->public_address)));
600 if (vnn->pnn != ctdb->pnn && have_ip) {
601 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
602 "and we have it on iface[%s], but it was assigned to node %d"
603 "and we are node %d, banning ourself\n",
604 ctdb_addr_to_str(&vnn->public_address),
605 ctdb_vnn_iface_string(vnn), vnn->pnn, ctdb->pnn));
611 if (vnn->iface->link_up) {
612 /* only move when the rebalance gains something */
613 if (vnn->iface->references > (best_iface->references + 1)) {
616 } else if (vnn->iface != best_iface) {
623 ctdb_vnn_unassign_iface(ctdb, vnn);
630 ret = ctdb_do_takeip(ctdb, c, vnn);
634 } else if (do_updateip) {
635 ret = ctdb_do_updateip(ctdb, c, vnn);
641 * The interface is up and the kernel known the ip
644 DEBUG(DEBUG_INFO,("Redundant takeover of IP %s/%u on interface %s (ip already held)\n",
645 ctdb_addr_to_str(&pip->addr),
646 vnn->public_netmask_bits,
647 ctdb_vnn_iface_string(vnn)));
651 /* tell ctdb_control.c that we will be replying asynchronously */
658 takeover an ip address old v4 style
660 int32_t ctdb_control_takeover_ipv4(struct ctdb_context *ctdb,
661 struct ctdb_req_control *c,
667 data.dsize = sizeof(struct ctdb_public_ip);
668 data.dptr = (uint8_t *)talloc_zero(c, struct ctdb_public_ip);
669 CTDB_NO_MEMORY(ctdb, data.dptr);
671 memcpy(data.dptr, indata.dptr, indata.dsize);
672 return ctdb_control_takeover_ip(ctdb, c, data, async_reply);
676 kill any clients that are registered with a IP that is being released
678 static void release_kill_clients(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
680 struct ctdb_client_ip *ip;
682 DEBUG(DEBUG_INFO,("release_kill_clients for ip %s\n",
683 ctdb_addr_to_str(addr)));
685 for (ip=ctdb->client_ip_list; ip; ip=ip->next) {
686 ctdb_sock_addr tmp_addr;
689 DEBUG(DEBUG_INFO,("checking for client %u with IP %s\n",
691 ctdb_addr_to_str(&ip->addr)));
693 if (ctdb_same_ip(&tmp_addr, addr)) {
694 struct ctdb_client *client = ctdb_reqid_find(ctdb,
697 DEBUG(DEBUG_INFO,("matched client %u with IP %s and pid %u\n",
699 ctdb_addr_to_str(&ip->addr),
702 if (client->pid != 0) {
703 DEBUG(DEBUG_INFO,(__location__ " Killing client pid %u for IP %s on client_id %u\n",
704 (unsigned)client->pid,
705 ctdb_addr_to_str(addr),
707 kill(client->pid, SIGKILL);
714 called when releaseip event finishes
716 static void release_ip_callback(struct ctdb_context *ctdb, int status,
719 struct takeover_callback_state *state =
720 talloc_get_type(private_data, struct takeover_callback_state);
723 if (status == -ETIME) {
727 /* send a message to all clients of this node telling them
728 that the cluster has been reconfigured and they should
729 release any sockets on this IP */
730 data.dptr = (uint8_t *)talloc_strdup(state, ctdb_addr_to_str(state->addr));
731 CTDB_NO_MEMORY_VOID(ctdb, data.dptr);
732 data.dsize = strlen((char *)data.dptr)+1;
734 DEBUG(DEBUG_INFO,(__location__ " sending RELEASE_IP for '%s'\n", data.dptr));
736 ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_RELEASE_IP, data);
738 /* kill clients that have registered with this IP */
739 release_kill_clients(ctdb, state->addr);
741 ctdb_vnn_unassign_iface(ctdb, state->vnn);
743 /* the control succeeded */
744 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
749 release an ip address
751 int32_t ctdb_control_release_ip(struct ctdb_context *ctdb,
752 struct ctdb_req_control *c,
757 struct takeover_callback_state *state;
758 struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
759 struct ctdb_vnn *vnn;
761 /* update our vnn list */
762 vnn = find_public_ip_vnn(ctdb, &pip->addr);
764 DEBUG(DEBUG_INFO,("releaseip called for an ip '%s' that is not a public address\n",
765 ctdb_addr_to_str(&pip->addr)));
770 /* stop any previous arps */
771 talloc_free(vnn->takeover_ctx);
772 vnn->takeover_ctx = NULL;
774 if (!ctdb_sys_have_ip(&pip->addr)) {
775 DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u on interface %s (ip not held)\n",
776 ctdb_addr_to_str(&pip->addr),
777 vnn->public_netmask_bits,
778 ctdb_vnn_iface_string(vnn)));
779 ctdb_vnn_unassign_iface(ctdb, vnn);
783 if (vnn->iface == NULL) {
784 DEBUG(DEBUG_CRIT,(__location__ " release_ip of IP %s is known to the kernel, "
785 "but we have no interface assigned, has someone manually configured it?"
787 ctdb_addr_to_str(&vnn->public_address)));
792 DEBUG(DEBUG_NOTICE,("Release of IP %s/%u on interface %s node:%d\n",
793 ctdb_addr_to_str(&pip->addr),
794 vnn->public_netmask_bits,
795 ctdb_vnn_iface_string(vnn),
798 state = talloc(ctdb, struct takeover_callback_state);
799 CTDB_NO_MEMORY(ctdb, state);
801 state->c = talloc_steal(state, c);
802 state->addr = talloc(state, ctdb_sock_addr);
803 CTDB_NO_MEMORY(ctdb, state->addr);
804 *state->addr = pip->addr;
807 ret = ctdb_event_script_callback(ctdb,
808 state, release_ip_callback, state,
810 CTDB_EVENT_RELEASE_IP,
812 ctdb_vnn_iface_string(vnn),
813 ctdb_addr_to_str(&pip->addr),
814 vnn->public_netmask_bits);
816 DEBUG(DEBUG_ERR,(__location__ " Failed to release IP %s on interface %s\n",
817 ctdb_addr_to_str(&pip->addr),
818 ctdb_vnn_iface_string(vnn)));
823 /* tell the control that we will be reply asynchronously */
829 release an ip address old v4 style
831 int32_t ctdb_control_release_ipv4(struct ctdb_context *ctdb,
832 struct ctdb_req_control *c,
838 data.dsize = sizeof(struct ctdb_public_ip);
839 data.dptr = (uint8_t *)talloc_zero(c, struct ctdb_public_ip);
840 CTDB_NO_MEMORY(ctdb, data.dptr);
842 memcpy(data.dptr, indata.dptr, indata.dsize);
843 return ctdb_control_release_ip(ctdb, c, data, async_reply);
847 static int ctdb_add_public_address(struct ctdb_context *ctdb,
848 ctdb_sock_addr *addr,
849 unsigned mask, const char *ifaces)
851 struct ctdb_vnn *vnn;
858 /* Verify that we dont have an entry for this ip yet */
859 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
860 if (ctdb_same_sockaddr(addr, &vnn->public_address)) {
861 DEBUG(DEBUG_CRIT,("Same ip '%s' specified multiple times in the public address list \n",
862 ctdb_addr_to_str(addr)));
867 /* create a new vnn structure for this ip address */
868 vnn = talloc_zero(ctdb, struct ctdb_vnn);
869 CTDB_NO_MEMORY_FATAL(ctdb, vnn);
870 vnn->ifaces = talloc_array(vnn, const char *, num + 2);
871 tmp = talloc_strdup(vnn, ifaces);
872 CTDB_NO_MEMORY_FATAL(ctdb, tmp);
873 for (iface = strtok(tmp, ","); iface; iface = strtok(NULL, ",")) {
874 vnn->ifaces = talloc_realloc(vnn, vnn->ifaces, const char *, num + 2);
875 CTDB_NO_MEMORY_FATAL(ctdb, vnn->ifaces);
876 vnn->ifaces[num] = talloc_strdup(vnn, iface);
877 CTDB_NO_MEMORY_FATAL(ctdb, vnn->ifaces[num]);
881 vnn->ifaces[num] = NULL;
882 vnn->public_address = *addr;
883 vnn->public_netmask_bits = mask;
886 for (i=0; vnn->ifaces[i]; i++) {
887 ret = ctdb_add_local_iface(ctdb, vnn->ifaces[i]);
889 DEBUG(DEBUG_CRIT, (__location__ " failed to add iface[%s] "
890 "for public_address[%s]\n",
891 vnn->ifaces[i], ctdb_addr_to_str(addr)));
897 DLIST_ADD(ctdb->vnn, vnn);
903 setup the event script directory
905 int ctdb_set_event_script_dir(struct ctdb_context *ctdb, const char *script_dir)
907 ctdb->event_script_dir = talloc_strdup(ctdb, script_dir);
908 CTDB_NO_MEMORY(ctdb, ctdb->event_script_dir);
913 setup the public address lists from a file
915 int ctdb_set_public_addresses(struct ctdb_context *ctdb, const char *alist)
921 lines = file_lines_load(alist, &nlines, ctdb);
923 ctdb_set_error(ctdb, "Failed to load public address list '%s'\n", alist);
926 while (nlines > 0 && strcmp(lines[nlines-1], "") == 0) {
930 for (i=0;i<nlines;i++) {
938 while ((*line == ' ') || (*line == '\t')) {
944 if (strcmp(line, "") == 0) {
947 tok = strtok(line, " \t");
949 tok = strtok(NULL, " \t");
951 if (NULL == ctdb->default_public_interface) {
952 DEBUG(DEBUG_CRIT,("No default public interface and no interface specified at line %u of public address list\n",
957 ifaces = ctdb->default_public_interface;
962 if (!addrstr || !parse_ip_mask(addrstr, ifaces, &addr, &mask)) {
963 DEBUG(DEBUG_CRIT,("Badly formed line %u in public address list\n", i+1));
967 if (ctdb_add_public_address(ctdb, &addr, mask, ifaces)) {
968 DEBUG(DEBUG_CRIT,("Failed to add line %u to the public address list\n", i+1));
978 int ctdb_set_single_public_ip(struct ctdb_context *ctdb,
982 struct ctdb_vnn *svnn;
986 svnn = talloc_zero(ctdb, struct ctdb_vnn);
987 CTDB_NO_MEMORY(ctdb, svnn);
989 svnn->ifaces = talloc_array(svnn, const char *, 2);
990 CTDB_NO_MEMORY(ctdb, svnn->ifaces);
991 svnn->ifaces[0] = talloc_strdup(svnn->ifaces, iface);
992 CTDB_NO_MEMORY(ctdb, svnn->ifaces[0]);
993 svnn->ifaces[1] = NULL;
995 ok = parse_ip(ip, iface, 0, &svnn->public_address);
1001 ret = ctdb_add_local_iface(ctdb, svnn->ifaces[0]);
1003 DEBUG(DEBUG_CRIT, (__location__ " failed to add iface[%s] "
1004 "for single_ip[%s]\n",
1006 ctdb_addr_to_str(&svnn->public_address)));
1011 ret = ctdb_vnn_assign_iface(ctdb, svnn);
1017 ctdb->single_ip_vnn = svnn;
1021 struct ctdb_public_ip_list {
1022 struct ctdb_public_ip_list *next;
1024 ctdb_sock_addr addr;
1028 /* Given a physical node, return the number of
1029 public addresses that is currently assigned to this node.
1031 static int node_ip_coverage(struct ctdb_context *ctdb,
1033 struct ctdb_public_ip_list *ips)
1037 for (;ips;ips=ips->next) {
1038 if (ips->pnn == pnn) {
1046 /* Check if this is a public ip known to the node, i.e. can that
1047 node takeover this ip ?
1049 static int can_node_serve_ip(struct ctdb_context *ctdb, int32_t pnn,
1050 struct ctdb_public_ip_list *ip)
1052 struct ctdb_all_public_ips *public_ips;
1055 public_ips = ctdb->nodes[pnn]->available_public_ips;
1057 if (public_ips == NULL) {
1061 for (i=0;i<public_ips->num;i++) {
1062 if (ctdb_same_ip(&ip->addr, &public_ips->ips[i].addr)) {
1063 /* yes, this node can serve this public ip */
1072 /* search the node lists list for a node to takeover this ip.
1073 pick the node that currently are serving the least number of ips
1074 so that the ips get spread out evenly.
1076 static int find_takeover_node(struct ctdb_context *ctdb,
1077 struct ctdb_node_map *nodemap, uint32_t mask,
1078 struct ctdb_public_ip_list *ip,
1079 struct ctdb_public_ip_list *all_ips)
1081 int pnn, min=0, num;
1085 for (i=0;i<nodemap->num;i++) {
1086 if (nodemap->nodes[i].flags & mask) {
1087 /* This node is not healty and can not be used to serve
1093 /* verify that this node can serve this ip */
1094 if (can_node_serve_ip(ctdb, i, ip)) {
1095 /* no it couldnt so skip to the next node */
1099 num = node_ip_coverage(ctdb, i, all_ips);
1100 /* was this the first node we checked ? */
1112 DEBUG(DEBUG_WARNING,(__location__ " Could not find node to take over public address '%s'\n",
1113 ctdb_addr_to_str(&ip->addr)));
1123 static uint32_t *ip_key(ctdb_sock_addr *ip)
1125 static uint32_t key[IP_KEYLEN];
1127 bzero(key, sizeof(key));
1129 switch (ip->sa.sa_family) {
1131 key[3] = htonl(ip->ip.sin_addr.s_addr);
1134 key[0] = htonl(ip->ip6.sin6_addr.s6_addr32[0]);
1135 key[1] = htonl(ip->ip6.sin6_addr.s6_addr32[1]);
1136 key[2] = htonl(ip->ip6.sin6_addr.s6_addr32[2]);
1137 key[3] = htonl(ip->ip6.sin6_addr.s6_addr32[3]);
1140 DEBUG(DEBUG_ERR, (__location__ " ERROR, unknown family passed :%u\n", ip->sa.sa_family));
1147 static void *add_ip_callback(void *parm, void *data)
1152 void getips_count_callback(void *param, void *data)
1154 struct ctdb_public_ip_list **ip_list = (struct ctdb_public_ip_list **)param;
1155 struct ctdb_public_ip_list *new_ip = (struct ctdb_public_ip_list *)data;
1157 new_ip->next = *ip_list;
1161 static struct ctdb_public_ip_list *
1162 create_merged_ip_list(struct ctdb_context *ctdb)
1165 struct ctdb_public_ip_list *ip_list;
1166 struct ctdb_all_public_ips *public_ips;
1168 if (ctdb->ip_tree != NULL) {
1169 talloc_free(ctdb->ip_tree);
1170 ctdb->ip_tree = NULL;
1172 ctdb->ip_tree = trbt_create(ctdb, 0);
1174 for (i=0;i<ctdb->num_nodes;i++) {
1175 public_ips = ctdb->nodes[i]->known_public_ips;
1177 if (ctdb->nodes[i]->flags & NODE_FLAGS_DELETED) {
1181 /* there were no public ips for this node */
1182 if (public_ips == NULL) {
1186 for (j=0;j<public_ips->num;j++) {
1187 struct ctdb_public_ip_list *tmp_ip;
1189 tmp_ip = talloc_zero(ctdb->ip_tree, struct ctdb_public_ip_list);
1190 CTDB_NO_MEMORY_NULL(ctdb, tmp_ip);
1191 tmp_ip->pnn = public_ips->ips[j].pnn;
1192 tmp_ip->addr = public_ips->ips[j].addr;
1193 tmp_ip->next = NULL;
1195 trbt_insertarray32_callback(ctdb->ip_tree,
1196 IP_KEYLEN, ip_key(&public_ips->ips[j].addr),
1203 trbt_traversearray32(ctdb->ip_tree, IP_KEYLEN, getips_count_callback, &ip_list);
1209 make any IP alias changes for public addresses that are necessary
1211 int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap)
1213 int i, num_healthy, retries;
1214 struct ctdb_public_ip ip;
1215 struct ctdb_public_ipv4 ipv4;
1217 struct ctdb_public_ip_list *all_ips, *tmp_ip;
1218 int maxnode, maxnum=0, minnode, minnum=0, num;
1220 struct timeval timeout;
1221 struct client_async_data *async_data;
1222 struct ctdb_client_control_state *state;
1223 TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
1228 /* Count how many completely healthy nodes we have */
1230 for (i=0;i<nodemap->num;i++) {
1231 if (!(nodemap->nodes[i].flags & (NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED))) {
1236 if (num_healthy > 0) {
1237 /* We have healthy nodes, so only consider them for
1238 serving public addresses
1240 mask = NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED;
1242 /* We didnt have any completely healthy nodes so
1243 use "disabled" nodes as a fallback
1245 mask = NODE_FLAGS_INACTIVE;
1248 /* since nodes only know about those public addresses that
1249 can be served by that particular node, no single node has
1250 a full list of all public addresses that exist in the cluster.
1251 Walk over all node structures and create a merged list of
1252 all public addresses that exist in the cluster.
1254 keep the tree of ips around as ctdb->ip_tree
1256 all_ips = create_merged_ip_list(ctdb);
1258 /* If we want deterministic ip allocations, i.e. that the ip addresses
1259 will always be allocated the same way for a specific set of
1260 available/unavailable nodes.
1262 if (1 == ctdb->tunable.deterministic_public_ips) {
1263 DEBUG(DEBUG_NOTICE,("Deterministic IPs enabled. Resetting all ip allocations\n"));
1264 for (i=0,tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next,i++) {
1265 tmp_ip->pnn = i%nodemap->num;
1270 /* mark all public addresses with a masked node as being served by
1273 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1274 if (tmp_ip->pnn == -1) {
1277 if (nodemap->nodes[tmp_ip->pnn].flags & mask) {
1282 /* verify that the assigned nodes can serve that public ip
1283 and set it to -1 if not
1285 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1286 if (tmp_ip->pnn == -1) {
1289 if (can_node_serve_ip(ctdb, tmp_ip->pnn, tmp_ip) != 0) {
1290 /* this node can not serve this ip. */
1296 /* now we must redistribute all public addresses with takeover node
1297 -1 among the nodes available
1301 /* loop over all ip's and find a physical node to cover for
1304 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1305 if (tmp_ip->pnn == -1) {
1306 if (find_takeover_node(ctdb, nodemap, mask, tmp_ip, all_ips)) {
1307 DEBUG(DEBUG_WARNING,("Failed to find node to cover ip %s\n",
1308 ctdb_addr_to_str(&tmp_ip->addr)));
1313 /* If we dont want ips to fail back after a node becomes healthy
1314 again, we wont even try to reallocat the ip addresses so that
1315 they are evenly spread out.
1316 This can NOT be used at the same time as DeterministicIPs !
1318 if (1 == ctdb->tunable.no_ip_failback) {
1319 if (1 == ctdb->tunable.deterministic_public_ips) {
1320 DEBUG(DEBUG_ERR, ("ERROR: You can not use 'DeterministicIPs' and 'NoIPFailback' at the same time\n"));
1326 /* now, try to make sure the ip adresses are evenly distributed
1328 for each ip address, loop over all nodes that can serve this
1329 ip and make sure that the difference between the node
1330 serving the most and the node serving the least ip's are not greater
1333 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1334 if (tmp_ip->pnn == -1) {
1338 /* Get the highest and lowest number of ips's served by any
1339 valid node which can serve this ip.
1343 for (i=0;i<nodemap->num;i++) {
1344 if (nodemap->nodes[i].flags & mask) {
1348 /* only check nodes that can actually serve this ip */
1349 if (can_node_serve_ip(ctdb, i, tmp_ip)) {
1350 /* no it couldnt so skip to the next node */
1354 num = node_ip_coverage(ctdb, i, all_ips);
1355 if (maxnode == -1) {
1364 if (minnode == -1) {
1374 if (maxnode == -1) {
1375 DEBUG(DEBUG_WARNING,(__location__ " Could not find maxnode. May not be able to serve ip '%s'\n",
1376 ctdb_addr_to_str(&tmp_ip->addr)));
1381 /* If we want deterministic IPs then dont try to reallocate
1382 them to spread out the load.
1384 if (1 == ctdb->tunable.deterministic_public_ips) {
1388 /* if the spread between the smallest and largest coverage by
1389 a node is >=2 we steal one of the ips from the node with
1390 most coverage to even things out a bit.
1391 try to do this at most 5 times since we dont want to spend
1392 too much time balancing the ip coverage.
1394 if ( (maxnum > minnum+1)
1396 struct ctdb_public_ip_list *tmp;
1398 /* mark one of maxnode's vnn's as unassigned and try
1401 for (tmp=all_ips;tmp;tmp=tmp->next) {
1402 if (tmp->pnn == maxnode) {
1412 /* finished distributing the public addresses, now just send the
1413 info out to the nodes
1417 /* at this point ->pnn is the node which will own each IP
1418 or -1 if there is no node that can cover this ip
1421 /* now tell all nodes to delete any alias that they should not
1422 have. This will be a NOOP on nodes that don't currently
1423 hold the given alias */
1424 async_data = talloc_zero(tmp_ctx, struct client_async_data);
1425 CTDB_NO_MEMORY_FATAL(ctdb, async_data);
1427 for (i=0;i<nodemap->num;i++) {
1428 /* don't talk to unconnected nodes, but do talk to banned nodes */
1429 if (nodemap->nodes[i].flags & NODE_FLAGS_DISCONNECTED) {
1433 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1434 if (tmp_ip->pnn == nodemap->nodes[i].pnn) {
1435 /* This node should be serving this
1436 vnn so dont tell it to release the ip
1440 if (tmp_ip->addr.sa.sa_family == AF_INET) {
1441 ipv4.pnn = tmp_ip->pnn;
1442 ipv4.sin = tmp_ip->addr.ip;
1444 timeout = TAKEOVER_TIMEOUT();
1445 data.dsize = sizeof(ipv4);
1446 data.dptr = (uint8_t *)&ipv4;
1447 state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
1448 0, CTDB_CONTROL_RELEASE_IPv4, 0,
1452 ip.pnn = tmp_ip->pnn;
1453 ip.addr = tmp_ip->addr;
1455 timeout = TAKEOVER_TIMEOUT();
1456 data.dsize = sizeof(ip);
1457 data.dptr = (uint8_t *)&ip;
1458 state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
1459 0, CTDB_CONTROL_RELEASE_IP, 0,
1464 if (state == NULL) {
1465 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_RELEASE_IP to node %u\n", nodemap->nodes[i].pnn));
1466 talloc_free(tmp_ctx);
1470 ctdb_client_async_add(async_data, state);
1473 if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1474 DEBUG(DEBUG_ERR,(__location__ " Async control CTDB_CONTROL_RELEASE_IP failed\n"));
1475 talloc_free(tmp_ctx);
1478 talloc_free(async_data);
1481 /* tell all nodes to get their own IPs */
1482 async_data = talloc_zero(tmp_ctx, struct client_async_data);
1483 CTDB_NO_MEMORY_FATAL(ctdb, async_data);
1484 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1485 if (tmp_ip->pnn == -1) {
1486 /* this IP won't be taken over */
1490 if (tmp_ip->addr.sa.sa_family == AF_INET) {
1491 ipv4.pnn = tmp_ip->pnn;
1492 ipv4.sin = tmp_ip->addr.ip;
1494 timeout = TAKEOVER_TIMEOUT();
1495 data.dsize = sizeof(ipv4);
1496 data.dptr = (uint8_t *)&ipv4;
1497 state = ctdb_control_send(ctdb, tmp_ip->pnn,
1498 0, CTDB_CONTROL_TAKEOVER_IPv4, 0,
1502 ip.pnn = tmp_ip->pnn;
1503 ip.addr = tmp_ip->addr;
1505 timeout = TAKEOVER_TIMEOUT();
1506 data.dsize = sizeof(ip);
1507 data.dptr = (uint8_t *)&ip;
1508 state = ctdb_control_send(ctdb, tmp_ip->pnn,
1509 0, CTDB_CONTROL_TAKEOVER_IP, 0,
1513 if (state == NULL) {
1514 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_TAKEOVER_IP to node %u\n", tmp_ip->pnn));
1515 talloc_free(tmp_ctx);
1519 ctdb_client_async_add(async_data, state);
1521 if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1522 DEBUG(DEBUG_ERR,(__location__ " Async control CTDB_CONTROL_TAKEOVER_IP failed\n"));
1523 talloc_free(tmp_ctx);
1527 talloc_free(tmp_ctx);
1533 destroy a ctdb_client_ip structure
1535 static int ctdb_client_ip_destructor(struct ctdb_client_ip *ip)
1537 DEBUG(DEBUG_DEBUG,("destroying client tcp for %s:%u (client_id %u)\n",
1538 ctdb_addr_to_str(&ip->addr),
1539 ntohs(ip->addr.ip.sin_port),
1542 DLIST_REMOVE(ip->ctdb->client_ip_list, ip);
1547 called by a client to inform us of a TCP connection that it is managing
1548 that should tickled with an ACK when IP takeover is done
1549 we handle both the old ipv4 style of packets as well as the new ipv4/6
1552 int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id,
1555 struct ctdb_client *client = ctdb_reqid_find(ctdb, client_id, struct ctdb_client);
1556 struct ctdb_control_tcp *old_addr = NULL;
1557 struct ctdb_control_tcp_addr new_addr;
1558 struct ctdb_control_tcp_addr *tcp_sock = NULL;
1559 struct ctdb_tcp_list *tcp;
1560 struct ctdb_control_tcp_vnn t;
1563 struct ctdb_client_ip *ip;
1564 struct ctdb_vnn *vnn;
1565 ctdb_sock_addr addr;
1567 switch (indata.dsize) {
1568 case sizeof(struct ctdb_control_tcp):
1569 old_addr = (struct ctdb_control_tcp *)indata.dptr;
1570 ZERO_STRUCT(new_addr);
1571 tcp_sock = &new_addr;
1572 tcp_sock->src.ip = old_addr->src;
1573 tcp_sock->dest.ip = old_addr->dest;
1575 case sizeof(struct ctdb_control_tcp_addr):
1576 tcp_sock = (struct ctdb_control_tcp_addr *)indata.dptr;
1579 DEBUG(DEBUG_ERR,(__location__ " Invalid data structure passed "
1580 "to ctdb_control_tcp_client. size was %d but "
1581 "only allowed sizes are %lu and %lu\n",
1583 (long unsigned)sizeof(struct ctdb_control_tcp),
1584 (long unsigned)sizeof(struct ctdb_control_tcp_addr)));
1588 addr = tcp_sock->src;
1589 ctdb_canonicalize_ip(&addr, &tcp_sock->src);
1590 addr = tcp_sock->dest;
1591 ctdb_canonicalize_ip(&addr, &tcp_sock->dest);
1594 memcpy(&addr, &tcp_sock->dest, sizeof(addr));
1595 vnn = find_public_ip_vnn(ctdb, &addr);
1597 switch (addr.sa.sa_family) {
1599 if (ntohl(addr.ip.sin_addr.s_addr) != INADDR_LOOPBACK) {
1600 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public address.\n",
1601 ctdb_addr_to_str(&addr)));
1605 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public ipv6 address.\n",
1606 ctdb_addr_to_str(&addr)));
1609 DEBUG(DEBUG_ERR,(__location__ " Unknown family type %d\n", addr.sa.sa_family));
1615 if (vnn->pnn != ctdb->pnn) {
1616 DEBUG(DEBUG_ERR,("Attempt to register tcp client for IP %s we don't hold - failing (client_id %u pid %u)\n",
1617 ctdb_addr_to_str(&addr),
1618 client_id, client->pid));
1619 /* failing this call will tell smbd to die */
1623 ip = talloc(client, struct ctdb_client_ip);
1624 CTDB_NO_MEMORY(ctdb, ip);
1628 ip->client_id = client_id;
1629 talloc_set_destructor(ip, ctdb_client_ip_destructor);
1630 DLIST_ADD(ctdb->client_ip_list, ip);
1632 tcp = talloc(client, struct ctdb_tcp_list);
1633 CTDB_NO_MEMORY(ctdb, tcp);
1635 tcp->connection.src_addr = tcp_sock->src;
1636 tcp->connection.dst_addr = tcp_sock->dest;
1638 DLIST_ADD(client->tcp_list, tcp);
1640 t.src = tcp_sock->src;
1641 t.dest = tcp_sock->dest;
1643 data.dptr = (uint8_t *)&t;
1644 data.dsize = sizeof(t);
1646 switch (addr.sa.sa_family) {
1648 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1649 (unsigned)ntohs(tcp_sock->dest.ip.sin_port),
1650 ctdb_addr_to_str(&tcp_sock->src),
1651 (unsigned)ntohs(tcp_sock->src.ip.sin_port), client_id, client->pid));
1654 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1655 (unsigned)ntohs(tcp_sock->dest.ip6.sin6_port),
1656 ctdb_addr_to_str(&tcp_sock->src),
1657 (unsigned)ntohs(tcp_sock->src.ip6.sin6_port), client_id, client->pid));
1660 DEBUG(DEBUG_ERR,(__location__ " Unknown family %d\n", addr.sa.sa_family));
1664 /* tell all nodes about this tcp connection */
1665 ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0,
1666 CTDB_CONTROL_TCP_ADD,
1667 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
1669 DEBUG(DEBUG_ERR,(__location__ " Failed to send CTDB_CONTROL_TCP_ADD\n"));
1677 find a tcp address on a list
1679 static struct ctdb_tcp_connection *ctdb_tcp_find(struct ctdb_tcp_array *array,
1680 struct ctdb_tcp_connection *tcp)
1684 if (array == NULL) {
1688 for (i=0;i<array->num;i++) {
1689 if (ctdb_same_sockaddr(&array->connections[i].src_addr, &tcp->src_addr) &&
1690 ctdb_same_sockaddr(&array->connections[i].dst_addr, &tcp->dst_addr)) {
1691 return &array->connections[i];
1698 called by a daemon to inform us of a TCP connection that one of its
1699 clients managing that should tickled with an ACK when IP takeover is
1702 int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata)
1704 struct ctdb_control_tcp_vnn *p = (struct ctdb_control_tcp_vnn *)indata.dptr;
1705 struct ctdb_tcp_array *tcparray;
1706 struct ctdb_tcp_connection tcp;
1707 struct ctdb_vnn *vnn;
1709 vnn = find_public_ip_vnn(ctdb, &p->dest);
1711 DEBUG(DEBUG_INFO,(__location__ " got TCP_ADD control for an address which is not a public address '%s'\n",
1712 ctdb_addr_to_str(&p->dest)));
1718 tcparray = vnn->tcp_array;
1720 /* If this is the first tickle */
1721 if (tcparray == NULL) {
1722 tcparray = talloc_size(ctdb->nodes,
1723 offsetof(struct ctdb_tcp_array, connections) +
1724 sizeof(struct ctdb_tcp_connection) * 1);
1725 CTDB_NO_MEMORY(ctdb, tcparray);
1726 vnn->tcp_array = tcparray;
1729 tcparray->connections = talloc_size(tcparray, sizeof(struct ctdb_tcp_connection));
1730 CTDB_NO_MEMORY(ctdb, tcparray->connections);
1732 tcparray->connections[tcparray->num].src_addr = p->src;
1733 tcparray->connections[tcparray->num].dst_addr = p->dest;
1739 /* Do we already have this tickle ?*/
1740 tcp.src_addr = p->src;
1741 tcp.dst_addr = p->dest;
1742 if (ctdb_tcp_find(vnn->tcp_array, &tcp) != NULL) {
1743 DEBUG(DEBUG_DEBUG,("Already had tickle info for %s:%u for vnn:%u\n",
1744 ctdb_addr_to_str(&tcp.dst_addr),
1745 ntohs(tcp.dst_addr.ip.sin_port),
1750 /* A new tickle, we must add it to the array */
1751 tcparray->connections = talloc_realloc(tcparray, tcparray->connections,
1752 struct ctdb_tcp_connection,
1754 CTDB_NO_MEMORY(ctdb, tcparray->connections);
1756 vnn->tcp_array = tcparray;
1757 tcparray->connections[tcparray->num].src_addr = p->src;
1758 tcparray->connections[tcparray->num].dst_addr = p->dest;
1761 DEBUG(DEBUG_INFO,("Added tickle info for %s:%u from vnn %u\n",
1762 ctdb_addr_to_str(&tcp.dst_addr),
1763 ntohs(tcp.dst_addr.ip.sin_port),
1771 called by a daemon to inform us of a TCP connection that one of its
1772 clients managing that should tickled with an ACK when IP takeover is
1775 static void ctdb_remove_tcp_connection(struct ctdb_context *ctdb, struct ctdb_tcp_connection *conn)
1777 struct ctdb_tcp_connection *tcpp;
1778 struct ctdb_vnn *vnn = find_public_ip_vnn(ctdb, &conn->dst_addr);
1781 DEBUG(DEBUG_ERR,(__location__ " unable to find public address %s\n",
1782 ctdb_addr_to_str(&conn->dst_addr)));
1786 /* if the array is empty we cant remove it
1787 and we dont need to do anything
1789 if (vnn->tcp_array == NULL) {
1790 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist (array is empty) %s:%u\n",
1791 ctdb_addr_to_str(&conn->dst_addr),
1792 ntohs(conn->dst_addr.ip.sin_port)));
1797 /* See if we know this connection
1798 if we dont know this connection then we dont need to do anything
1800 tcpp = ctdb_tcp_find(vnn->tcp_array, conn);
1802 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist %s:%u\n",
1803 ctdb_addr_to_str(&conn->dst_addr),
1804 ntohs(conn->dst_addr.ip.sin_port)));
1809 /* We need to remove this entry from the array.
1810 Instead of allocating a new array and copying data to it
1811 we cheat and just copy the last entry in the existing array
1812 to the entry that is to be removed and just shring the
1815 *tcpp = vnn->tcp_array->connections[vnn->tcp_array->num - 1];
1816 vnn->tcp_array->num--;
1818 /* If we deleted the last entry we also need to remove the entire array
1820 if (vnn->tcp_array->num == 0) {
1821 talloc_free(vnn->tcp_array);
1822 vnn->tcp_array = NULL;
1825 vnn->tcp_update_needed = true;
1827 DEBUG(DEBUG_INFO,("Removed tickle info for %s:%u\n",
1828 ctdb_addr_to_str(&conn->src_addr),
1829 ntohs(conn->src_addr.ip.sin_port)));
1834 called when a daemon restarts - send all tickes for all public addresses
1835 we are serving immediately to the new node.
1837 int32_t ctdb_control_startup(struct ctdb_context *ctdb, uint32_t vnn)
1839 /*XXX here we should send all tickes we are serving to the new node */
1845 called when a client structure goes away - hook to remove
1846 elements from the tcp_list in all daemons
1848 void ctdb_takeover_client_destructor_hook(struct ctdb_client *client)
1850 while (client->tcp_list) {
1851 struct ctdb_tcp_list *tcp = client->tcp_list;
1852 DLIST_REMOVE(client->tcp_list, tcp);
1853 ctdb_remove_tcp_connection(client->ctdb, &tcp->connection);
1859 release all IPs on shutdown
1861 void ctdb_release_all_ips(struct ctdb_context *ctdb)
1863 struct ctdb_vnn *vnn;
1865 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1866 if (!ctdb_sys_have_ip(&vnn->public_address)) {
1867 ctdb_vnn_unassign_iface(ctdb, vnn);
1873 ctdb_event_script_args(ctdb, CTDB_EVENT_RELEASE_IP, "%s %s %u",
1874 ctdb_vnn_iface_string(vnn),
1875 ctdb_addr_to_str(&vnn->public_address),
1876 vnn->public_netmask_bits);
1877 release_kill_clients(ctdb, &vnn->public_address);
1878 ctdb_vnn_unassign_iface(ctdb, vnn);
1884 get list of public IPs
1886 int32_t ctdb_control_get_public_ips(struct ctdb_context *ctdb,
1887 struct ctdb_req_control *c, TDB_DATA *outdata)
1890 struct ctdb_all_public_ips *ips;
1891 struct ctdb_vnn *vnn;
1892 bool only_available = false;
1894 if (c->flags & CTDB_PUBLIC_IP_FLAGS_ONLY_AVAILABLE) {
1895 only_available = true;
1898 /* count how many public ip structures we have */
1900 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1904 len = offsetof(struct ctdb_all_public_ips, ips) +
1905 num*sizeof(struct ctdb_public_ip);
1906 ips = talloc_zero_size(outdata, len);
1907 CTDB_NO_MEMORY(ctdb, ips);
1910 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1911 if (only_available && !ctdb_vnn_available(ctdb, vnn)) {
1914 ips->ips[i].pnn = vnn->pnn;
1915 ips->ips[i].addr = vnn->public_address;
1919 len = offsetof(struct ctdb_all_public_ips, ips) +
1920 i*sizeof(struct ctdb_public_ip);
1922 outdata->dsize = len;
1923 outdata->dptr = (uint8_t *)ips;
1930 get list of public IPs, old ipv4 style. only returns ipv4 addresses
1932 int32_t ctdb_control_get_public_ipsv4(struct ctdb_context *ctdb,
1933 struct ctdb_req_control *c, TDB_DATA *outdata)
1936 struct ctdb_all_public_ipsv4 *ips;
1937 struct ctdb_vnn *vnn;
1939 /* count how many public ip structures we have */
1941 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1942 if (vnn->public_address.sa.sa_family != AF_INET) {
1948 len = offsetof(struct ctdb_all_public_ipsv4, ips) +
1949 num*sizeof(struct ctdb_public_ipv4);
1950 ips = talloc_zero_size(outdata, len);
1951 CTDB_NO_MEMORY(ctdb, ips);
1953 outdata->dsize = len;
1954 outdata->dptr = (uint8_t *)ips;
1958 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1959 if (vnn->public_address.sa.sa_family != AF_INET) {
1962 ips->ips[i].pnn = vnn->pnn;
1963 ips->ips[i].sin = vnn->public_address.ip;
1970 int32_t ctdb_control_get_public_ip_info(struct ctdb_context *ctdb,
1971 struct ctdb_req_control *c,
1976 ctdb_sock_addr *addr;
1977 struct ctdb_control_public_ip_info *info;
1978 struct ctdb_vnn *vnn;
1980 addr = (ctdb_sock_addr *)indata.dptr;
1982 vnn = find_public_ip_vnn(ctdb, addr);
1984 /* if it is not a public ip it could be our 'single ip' */
1985 if (ctdb->single_ip_vnn) {
1986 if (ctdb_same_ip(&ctdb->single_ip_vnn->public_address, addr)) {
1987 vnn = ctdb->single_ip_vnn;
1992 DEBUG(DEBUG_ERR,(__location__ " Could not get public ip info, "
1993 "'%s'not a public address\n",
1994 ctdb_addr_to_str(addr)));
1998 /* count how many public ip structures we have */
2000 for (;vnn->ifaces[num];) {
2004 len = offsetof(struct ctdb_control_public_ip_info, ifaces) +
2005 num*sizeof(struct ctdb_control_iface_info);
2006 info = talloc_zero_size(outdata, len);
2007 CTDB_NO_MEMORY(ctdb, info);
2009 info->ip.addr = vnn->public_address;
2010 info->ip.pnn = vnn->pnn;
2011 info->active_idx = 0xFFFFFFFF;
2013 for (i=0; vnn->ifaces[i]; i++) {
2014 struct ctdb_iface *cur;
2016 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
2018 DEBUG(DEBUG_CRIT, (__location__ " internal error iface[%s] unknown\n",
2022 if (vnn->iface == cur) {
2023 info->active_idx = i;
2025 strcpy(info->ifaces[i].name, cur->name);
2026 info->ifaces[i].link_state = cur->link_up;
2027 info->ifaces[i].references = cur->references;
2030 len = offsetof(struct ctdb_control_public_ip_info, ifaces) +
2031 i*sizeof(struct ctdb_control_iface_info);
2033 outdata->dsize = len;
2034 outdata->dptr = (uint8_t *)info;
2039 int32_t ctdb_control_get_ifaces(struct ctdb_context *ctdb,
2040 struct ctdb_req_control *c,
2044 struct ctdb_control_get_ifaces *ifaces;
2045 struct ctdb_iface *cur;
2047 /* count how many public ip structures we have */
2049 for (cur=ctdb->ifaces;cur;cur=cur->next) {
2053 len = offsetof(struct ctdb_control_get_ifaces, ifaces) +
2054 num*sizeof(struct ctdb_control_iface_info);
2055 ifaces = talloc_zero_size(outdata, len);
2056 CTDB_NO_MEMORY(ctdb, ifaces);
2059 for (cur=ctdb->ifaces;cur;cur=cur->next) {
2060 strcpy(ifaces->ifaces[i].name, cur->name);
2061 ifaces->ifaces[i].link_state = cur->link_up;
2062 ifaces->ifaces[i].references = cur->references;
2066 len = offsetof(struct ctdb_control_get_ifaces, ifaces) +
2067 i*sizeof(struct ctdb_control_iface_info);
2069 outdata->dsize = len;
2070 outdata->dptr = (uint8_t *)ifaces;
2075 int32_t ctdb_control_set_iface_link(struct ctdb_context *ctdb,
2076 struct ctdb_req_control *c,
2079 struct ctdb_control_iface_info *info;
2080 struct ctdb_iface *iface;
2081 bool link_up = false;
2083 info = (struct ctdb_control_iface_info *)indata.dptr;
2085 if (info->name[CTDB_IFACE_SIZE] != '\0') {
2086 int len = strnlen(info->name, CTDB_IFACE_SIZE);
2087 DEBUG(DEBUG_ERR, (__location__ " name[%*.*s] not terminated\n",
2088 len, len, info->name));
2092 switch (info->link_state) {
2100 DEBUG(DEBUG_ERR, (__location__ " link_state[%u] invalid\n",
2101 (unsigned int)info->link_state));
2105 if (info->references != 0) {
2106 DEBUG(DEBUG_ERR, (__location__ " references[%u] should be 0\n",
2107 (unsigned int)info->references));
2111 iface = ctdb_find_iface(ctdb, info->name);
2112 if (iface == NULL) {
2113 DEBUG(DEBUG_ERR, (__location__ "iface[%s] is unknown\n",
2118 if (link_up == iface->link_up) {
2122 DEBUG(iface->link_up?DEBUG_ERR:DEBUG_NOTICE,
2123 ("iface[%s] has changed it's link status %s => %s\n",
2125 iface->link_up?"up":"down",
2126 link_up?"up":"down"));
2128 iface->link_up = link_up;
2134 structure containing the listening socket and the list of tcp connections
2135 that the ctdb daemon is to kill
2137 struct ctdb_kill_tcp {
2138 struct ctdb_vnn *vnn;
2139 struct ctdb_context *ctdb;
2141 struct fd_event *fde;
2142 trbt_tree_t *connections;
2147 a tcp connection that is to be killed
2149 struct ctdb_killtcp_con {
2150 ctdb_sock_addr src_addr;
2151 ctdb_sock_addr dst_addr;
2153 struct ctdb_kill_tcp *killtcp;
2156 /* this function is used to create a key to represent this socketpair
2157 in the killtcp tree.
2158 this key is used to insert and lookup matching socketpairs that are
2159 to be tickled and RST
2161 #define KILLTCP_KEYLEN 10
2162 static uint32_t *killtcp_key(ctdb_sock_addr *src, ctdb_sock_addr *dst)
2164 static uint32_t key[KILLTCP_KEYLEN];
2166 bzero(key, sizeof(key));
2168 if (src->sa.sa_family != dst->sa.sa_family) {
2169 DEBUG(DEBUG_ERR, (__location__ " ERROR, different families passed :%u vs %u\n", src->sa.sa_family, dst->sa.sa_family));
2173 switch (src->sa.sa_family) {
2175 key[0] = dst->ip.sin_addr.s_addr;
2176 key[1] = src->ip.sin_addr.s_addr;
2177 key[2] = dst->ip.sin_port;
2178 key[3] = src->ip.sin_port;
2181 key[0] = dst->ip6.sin6_addr.s6_addr32[3];
2182 key[1] = src->ip6.sin6_addr.s6_addr32[3];
2183 key[2] = dst->ip6.sin6_addr.s6_addr32[2];
2184 key[3] = src->ip6.sin6_addr.s6_addr32[2];
2185 key[4] = dst->ip6.sin6_addr.s6_addr32[1];
2186 key[5] = src->ip6.sin6_addr.s6_addr32[1];
2187 key[6] = dst->ip6.sin6_addr.s6_addr32[0];
2188 key[7] = src->ip6.sin6_addr.s6_addr32[0];
2189 key[8] = dst->ip6.sin6_port;
2190 key[9] = src->ip6.sin6_port;
2193 DEBUG(DEBUG_ERR, (__location__ " ERROR, unknown family passed :%u\n", src->sa.sa_family));
2201 called when we get a read event on the raw socket
2203 static void capture_tcp_handler(struct event_context *ev, struct fd_event *fde,
2204 uint16_t flags, void *private_data)
2206 struct ctdb_kill_tcp *killtcp = talloc_get_type(private_data, struct ctdb_kill_tcp);
2207 struct ctdb_killtcp_con *con;
2208 ctdb_sock_addr src, dst;
2209 uint32_t ack_seq, seq;
2211 if (!(flags & EVENT_FD_READ)) {
2215 if (ctdb_sys_read_tcp_packet(killtcp->capture_fd,
2216 killtcp->private_data,
2218 &ack_seq, &seq) != 0) {
2219 /* probably a non-tcp ACK packet */
2223 /* check if we have this guy in our list of connections
2226 con = trbt_lookuparray32(killtcp->connections,
2227 KILLTCP_KEYLEN, killtcp_key(&src, &dst));
2229 /* no this was some other packet we can just ignore */
2233 /* This one has been tickled !
2234 now reset him and remove him from the list.
2236 DEBUG(DEBUG_INFO, ("sending a tcp reset to kill connection :%d -> %s:%d\n",
2237 ntohs(con->dst_addr.ip.sin_port),
2238 ctdb_addr_to_str(&con->src_addr),
2239 ntohs(con->src_addr.ip.sin_port)));
2241 ctdb_sys_send_tcp(&con->dst_addr, &con->src_addr, ack_seq, seq, 1);
2246 /* when traversing the list of all tcp connections to send tickle acks to
2247 (so that we can capture the ack coming back and kill the connection
2249 this callback is called for each connection we are currently trying to kill
2251 static void tickle_connection_traverse(void *param, void *data)
2253 struct ctdb_killtcp_con *con = talloc_get_type(data, struct ctdb_killtcp_con);
2255 /* have tried too many times, just give up */
2256 if (con->count >= 5) {
2261 /* othervise, try tickling it again */
2264 (ctdb_sock_addr *)&con->dst_addr,
2265 (ctdb_sock_addr *)&con->src_addr,
2271 called every second until all sentenced connections have been reset
2273 static void ctdb_tickle_sentenced_connections(struct event_context *ev, struct timed_event *te,
2274 struct timeval t, void *private_data)
2276 struct ctdb_kill_tcp *killtcp = talloc_get_type(private_data, struct ctdb_kill_tcp);
2279 /* loop over all connections sending tickle ACKs */
2280 trbt_traversearray32(killtcp->connections, KILLTCP_KEYLEN, tickle_connection_traverse, NULL);
2283 /* If there are no more connections to kill we can remove the
2284 entire killtcp structure
2286 if ( (killtcp->connections == NULL) ||
2287 (killtcp->connections->root == NULL) ) {
2288 talloc_free(killtcp);
2292 /* try tickling them again in a seconds time
2294 event_add_timed(killtcp->ctdb->ev, killtcp, timeval_current_ofs(1, 0),
2295 ctdb_tickle_sentenced_connections, killtcp);
2299 destroy the killtcp structure
2301 static int ctdb_killtcp_destructor(struct ctdb_kill_tcp *killtcp)
2303 killtcp->vnn->killtcp = NULL;
2308 /* nothing fancy here, just unconditionally replace any existing
2309 connection structure with the new one.
2311 dont even free the old one if it did exist, that one is talloc_stolen
2312 by the same node in the tree anyway and will be deleted when the new data
2315 static void *add_killtcp_callback(void *parm, void *data)
2321 add a tcp socket to the list of connections we want to RST
2323 static int ctdb_killtcp_add_connection(struct ctdb_context *ctdb,
2327 ctdb_sock_addr src, dst;
2328 struct ctdb_kill_tcp *killtcp;
2329 struct ctdb_killtcp_con *con;
2330 struct ctdb_vnn *vnn;
2332 ctdb_canonicalize_ip(s, &src);
2333 ctdb_canonicalize_ip(d, &dst);
2335 vnn = find_public_ip_vnn(ctdb, &dst);
2337 vnn = find_public_ip_vnn(ctdb, &src);
2340 /* if it is not a public ip it could be our 'single ip' */
2341 if (ctdb->single_ip_vnn) {
2342 if (ctdb_same_ip(&ctdb->single_ip_vnn->public_address, &dst)) {
2343 vnn = ctdb->single_ip_vnn;
2348 DEBUG(DEBUG_ERR,(__location__ " Could not killtcp, not a public address\n"));
2352 killtcp = vnn->killtcp;
2354 /* If this is the first connection to kill we must allocate
2357 if (killtcp == NULL) {
2358 killtcp = talloc_zero(ctdb, struct ctdb_kill_tcp);
2359 CTDB_NO_MEMORY(ctdb, killtcp);
2362 killtcp->ctdb = ctdb;
2363 killtcp->capture_fd = -1;
2364 killtcp->connections = trbt_create(killtcp, 0);
2366 vnn->killtcp = killtcp;
2367 talloc_set_destructor(killtcp, ctdb_killtcp_destructor);
2372 /* create a structure that describes this connection we want to
2373 RST and store it in killtcp->connections
2375 con = talloc(killtcp, struct ctdb_killtcp_con);
2376 CTDB_NO_MEMORY(ctdb, con);
2377 con->src_addr = src;
2378 con->dst_addr = dst;
2380 con->killtcp = killtcp;
2383 trbt_insertarray32_callback(killtcp->connections,
2384 KILLTCP_KEYLEN, killtcp_key(&con->dst_addr, &con->src_addr),
2385 add_killtcp_callback, con);
2388 If we dont have a socket to listen on yet we must create it
2390 if (killtcp->capture_fd == -1) {
2391 const char *iface = ctdb_vnn_iface_string(vnn);
2392 killtcp->capture_fd = ctdb_sys_open_capture_socket(iface, &killtcp->private_data);
2393 if (killtcp->capture_fd == -1) {
2394 DEBUG(DEBUG_CRIT,(__location__ " Failed to open capturing "
2395 "socket on iface '%s' for killtcp (%s)\n",
2396 iface, strerror(errno)));
2402 if (killtcp->fde == NULL) {
2403 killtcp->fde = event_add_fd(ctdb->ev, killtcp, killtcp->capture_fd,
2404 EVENT_FD_READ | EVENT_FD_AUTOCLOSE,
2405 capture_tcp_handler, killtcp);
2407 /* We also need to set up some events to tickle all these connections
2408 until they are all reset
2410 event_add_timed(ctdb->ev, killtcp, timeval_current_ofs(1, 0),
2411 ctdb_tickle_sentenced_connections, killtcp);
2414 /* tickle him once now */
2423 talloc_free(vnn->killtcp);
2424 vnn->killtcp = NULL;
2429 kill a TCP connection.
2431 int32_t ctdb_control_kill_tcp(struct ctdb_context *ctdb, TDB_DATA indata)
2433 struct ctdb_control_killtcp *killtcp = (struct ctdb_control_killtcp *)indata.dptr;
2435 return ctdb_killtcp_add_connection(ctdb, &killtcp->src_addr, &killtcp->dst_addr);
2439 called by a daemon to inform us of the entire list of TCP tickles for
2440 a particular public address.
2441 this control should only be sent by the node that is currently serving
2442 that public address.
2444 int32_t ctdb_control_set_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata)
2446 struct ctdb_control_tcp_tickle_list *list = (struct ctdb_control_tcp_tickle_list *)indata.dptr;
2447 struct ctdb_tcp_array *tcparray;
2448 struct ctdb_vnn *vnn;
2450 /* We must at least have tickles.num or else we cant verify the size
2451 of the received data blob
2453 if (indata.dsize < offsetof(struct ctdb_control_tcp_tickle_list,
2454 tickles.connections)) {
2455 DEBUG(DEBUG_ERR,("Bad indata in ctdb_control_set_tcp_tickle_list. Not enough data for the tickle.num field\n"));
2459 /* verify that the size of data matches what we expect */
2460 if (indata.dsize < offsetof(struct ctdb_control_tcp_tickle_list,
2461 tickles.connections)
2462 + sizeof(struct ctdb_tcp_connection)
2463 * list->tickles.num) {
2464 DEBUG(DEBUG_ERR,("Bad indata in ctdb_control_set_tcp_tickle_list\n"));
2468 vnn = find_public_ip_vnn(ctdb, &list->addr);
2470 DEBUG(DEBUG_INFO,(__location__ " Could not set tcp tickle list, '%s' is not a public address\n",
2471 ctdb_addr_to_str(&list->addr)));
2476 /* remove any old ticklelist we might have */
2477 talloc_free(vnn->tcp_array);
2478 vnn->tcp_array = NULL;
2480 tcparray = talloc(ctdb->nodes, struct ctdb_tcp_array);
2481 CTDB_NO_MEMORY(ctdb, tcparray);
2483 tcparray->num = list->tickles.num;
2485 tcparray->connections = talloc_array(tcparray, struct ctdb_tcp_connection, tcparray->num);
2486 CTDB_NO_MEMORY(ctdb, tcparray->connections);
2488 memcpy(tcparray->connections, &list->tickles.connections[0],
2489 sizeof(struct ctdb_tcp_connection)*tcparray->num);
2491 /* We now have a new fresh tickle list array for this vnn */
2492 vnn->tcp_array = talloc_steal(vnn, tcparray);
2498 called to return the full list of tickles for the puclic address associated
2499 with the provided vnn
2501 int32_t ctdb_control_get_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata)
2503 ctdb_sock_addr *addr = (ctdb_sock_addr *)indata.dptr;
2504 struct ctdb_control_tcp_tickle_list *list;
2505 struct ctdb_tcp_array *tcparray;
2507 struct ctdb_vnn *vnn;
2509 vnn = find_public_ip_vnn(ctdb, addr);
2511 DEBUG(DEBUG_ERR,(__location__ " Could not get tcp tickle list, '%s' is not a public address\n",
2512 ctdb_addr_to_str(addr)));
2517 tcparray = vnn->tcp_array;
2519 num = tcparray->num;
2524 outdata->dsize = offsetof(struct ctdb_control_tcp_tickle_list,
2525 tickles.connections)
2526 + sizeof(struct ctdb_tcp_connection) * num;
2528 outdata->dptr = talloc_size(outdata, outdata->dsize);
2529 CTDB_NO_MEMORY(ctdb, outdata->dptr);
2530 list = (struct ctdb_control_tcp_tickle_list *)outdata->dptr;
2533 list->tickles.num = num;
2535 memcpy(&list->tickles.connections[0], tcparray->connections,
2536 sizeof(struct ctdb_tcp_connection) * num);
2544 set the list of all tcp tickles for a public address
2546 static int ctdb_ctrl_set_tcp_tickles(struct ctdb_context *ctdb,
2547 struct timeval timeout, uint32_t destnode,
2548 ctdb_sock_addr *addr,
2549 struct ctdb_tcp_array *tcparray)
2553 struct ctdb_control_tcp_tickle_list *list;
2556 num = tcparray->num;
2561 data.dsize = offsetof(struct ctdb_control_tcp_tickle_list,
2562 tickles.connections) +
2563 sizeof(struct ctdb_tcp_connection) * num;
2564 data.dptr = talloc_size(ctdb, data.dsize);
2565 CTDB_NO_MEMORY(ctdb, data.dptr);
2567 list = (struct ctdb_control_tcp_tickle_list *)data.dptr;
2569 list->tickles.num = num;
2571 memcpy(&list->tickles.connections[0], tcparray->connections, sizeof(struct ctdb_tcp_connection) * num);
2574 ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0,
2575 CTDB_CONTROL_SET_TCP_TICKLE_LIST,
2576 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
2578 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set tcp tickles failed\n"));
2582 talloc_free(data.dptr);
2589 perform tickle updates if required
2591 static void ctdb_update_tcp_tickles(struct event_context *ev,
2592 struct timed_event *te,
2593 struct timeval t, void *private_data)
2595 struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
2597 struct ctdb_vnn *vnn;
2599 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2600 /* we only send out updates for public addresses that
2603 if (ctdb->pnn != vnn->pnn) {
2606 /* We only send out the updates if we need to */
2607 if (!vnn->tcp_update_needed) {
2610 ret = ctdb_ctrl_set_tcp_tickles(ctdb,
2612 CTDB_BROADCAST_CONNECTED,
2613 &vnn->public_address,
2616 DEBUG(DEBUG_ERR,("Failed to send the tickle update for public address %s\n",
2617 ctdb_addr_to_str(&vnn->public_address)));
2621 event_add_timed(ctdb->ev, ctdb->tickle_update_context,
2622 timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
2623 ctdb_update_tcp_tickles, ctdb);
2628 start periodic update of tcp tickles
2630 void ctdb_start_tcp_tickle_update(struct ctdb_context *ctdb)
2632 ctdb->tickle_update_context = talloc_new(ctdb);
2634 event_add_timed(ctdb->ev, ctdb->tickle_update_context,
2635 timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
2636 ctdb_update_tcp_tickles, ctdb);
2642 struct control_gratious_arp {
2643 struct ctdb_context *ctdb;
2644 ctdb_sock_addr addr;
2650 send a control_gratuitous arp
2652 static void send_gratious_arp(struct event_context *ev, struct timed_event *te,
2653 struct timeval t, void *private_data)
2656 struct control_gratious_arp *arp = talloc_get_type(private_data,
2657 struct control_gratious_arp);
2659 ret = ctdb_sys_send_arp(&arp->addr, arp->iface);
2661 DEBUG(DEBUG_ERR,(__location__ " sending of gratious arp on iface '%s' failed (%s)\n",
2662 arp->iface, strerror(errno)));
2667 if (arp->count == CTDB_ARP_REPEAT) {
2672 event_add_timed(arp->ctdb->ev, arp,
2673 timeval_current_ofs(CTDB_ARP_INTERVAL, 0),
2674 send_gratious_arp, arp);
2681 int32_t ctdb_control_send_gratious_arp(struct ctdb_context *ctdb, TDB_DATA indata)
2683 struct ctdb_control_gratious_arp *gratious_arp = (struct ctdb_control_gratious_arp *)indata.dptr;
2684 struct control_gratious_arp *arp;
2686 /* verify the size of indata */
2687 if (indata.dsize < offsetof(struct ctdb_control_gratious_arp, iface)) {
2688 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_gratious_arp structure. Got %u require %u bytes\n",
2689 (unsigned)indata.dsize,
2690 (unsigned)offsetof(struct ctdb_control_gratious_arp, iface)));
2694 ( offsetof(struct ctdb_control_gratious_arp, iface)
2695 + gratious_arp->len ) ){
2697 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2698 "but should be %u bytes\n",
2699 (unsigned)indata.dsize,
2700 (unsigned)(offsetof(struct ctdb_control_gratious_arp, iface)+gratious_arp->len)));
2705 arp = talloc(ctdb, struct control_gratious_arp);
2706 CTDB_NO_MEMORY(ctdb, arp);
2709 arp->addr = gratious_arp->addr;
2710 arp->iface = talloc_strdup(arp, gratious_arp->iface);
2711 CTDB_NO_MEMORY(ctdb, arp->iface);
2714 event_add_timed(arp->ctdb->ev, arp,
2715 timeval_zero(), send_gratious_arp, arp);
2720 int32_t ctdb_control_add_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2722 struct ctdb_control_ip_iface *pub = (struct ctdb_control_ip_iface *)indata.dptr;
2725 /* verify the size of indata */
2726 if (indata.dsize < offsetof(struct ctdb_control_ip_iface, iface)) {
2727 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_ip_iface structure\n"));
2731 ( offsetof(struct ctdb_control_ip_iface, iface)
2734 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2735 "but should be %u bytes\n",
2736 (unsigned)indata.dsize,
2737 (unsigned)(offsetof(struct ctdb_control_ip_iface, iface)+pub->len)));
2741 ret = ctdb_add_public_address(ctdb, &pub->addr, pub->mask, &pub->iface[0]);
2744 DEBUG(DEBUG_ERR,(__location__ " Failed to add public address\n"));
2752 called when releaseip event finishes for del_public_address
2754 static void delete_ip_callback(struct ctdb_context *ctdb, int status,
2757 talloc_free(private_data);
2760 int32_t ctdb_control_del_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2762 struct ctdb_control_ip_iface *pub = (struct ctdb_control_ip_iface *)indata.dptr;
2763 struct ctdb_vnn *vnn;
2766 /* verify the size of indata */
2767 if (indata.dsize < offsetof(struct ctdb_control_ip_iface, iface)) {
2768 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_ip_iface structure\n"));
2772 ( offsetof(struct ctdb_control_ip_iface, iface)
2775 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2776 "but should be %u bytes\n",
2777 (unsigned)indata.dsize,
2778 (unsigned)(offsetof(struct ctdb_control_ip_iface, iface)+pub->len)));
2782 /* walk over all public addresses until we find a match */
2783 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2784 if (ctdb_same_ip(&vnn->public_address, &pub->addr)) {
2785 TALLOC_CTX *mem_ctx;
2787 DLIST_REMOVE(ctdb->vnn, vnn);
2788 if (vnn->iface == NULL) {
2793 mem_ctx = talloc_new(ctdb);
2794 ret = ctdb_event_script_callback(ctdb,
2795 mem_ctx, delete_ip_callback, mem_ctx,
2797 CTDB_EVENT_RELEASE_IP,
2799 ctdb_vnn_iface_string(vnn),
2800 ctdb_addr_to_str(&vnn->public_address),
2801 vnn->public_netmask_bits);
2802 ctdb_vnn_unassign_iface(ctdb, vnn);
2814 /* This function is called from the recovery daemon to verify that a remote
2815 node has the expected ip allocation.
2816 This is verified against ctdb->ip_tree
2818 int verify_remote_ip_allocation(struct ctdb_context *ctdb, struct ctdb_all_public_ips *ips)
2820 struct ctdb_public_ip_list *tmp_ip;
2823 if (ctdb->ip_tree == NULL) {
2824 /* dont know the expected allocation yet, assume remote node
2833 for (i=0; i<ips->num; i++) {
2834 tmp_ip = trbt_lookuparray32(ctdb->ip_tree, IP_KEYLEN, ip_key(&ips->ips[i].addr));
2835 if (tmp_ip == NULL) {
2836 DEBUG(DEBUG_ERR,(__location__ " Could not find host for address %s, reassign ips\n", ctdb_addr_to_str(&ips->ips[i].addr)));
2840 if (tmp_ip->pnn == -1 || ips->ips[i].pnn == -1) {
2844 if (tmp_ip->pnn != ips->ips[i].pnn) {
2845 DEBUG(DEBUG_ERR,("Inconsistent ip allocation. Trigger reallocation. Thinks %s is held by node %u while it is held by node %u\n", ctdb_addr_to_str(&ips->ips[i].addr), ips->ips[i].pnn, tmp_ip->pnn));
2853 int update_ip_assignment_tree(struct ctdb_context *ctdb, struct ctdb_public_ip *ip)
2855 struct ctdb_public_ip_list *tmp_ip;
2857 if (ctdb->ip_tree == NULL) {
2858 DEBUG(DEBUG_ERR,("No ctdb->ip_tree yet. Failed to update ip assignment\n"));
2862 tmp_ip = trbt_lookuparray32(ctdb->ip_tree, IP_KEYLEN, ip_key(&ip->addr));
2863 if (tmp_ip == NULL) {
2864 DEBUG(DEBUG_ERR,(__location__ " Could not find record for address %s, update ip\n", ctdb_addr_to_str(&ip->addr)));
2868 DEBUG(DEBUG_NOTICE,("Updated ip assignment tree for ip : %s from node %u to node %u\n", ctdb_addr_to_str(&ip->addr), tmp_ip->pnn, ip->pnn));
2869 tmp_ip->pnn = ip->pnn;