4 Copyright (C) Ronnie Sahlberg 2007
5 Copyright (C) Andrew Tridgell 2007
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, see <http://www.gnu.org/licenses/>.
21 #include "lib/tevent/tevent.h"
22 #include "lib/tdb/include/tdb.h"
23 #include "lib/util/dlinklist.h"
24 #include "system/network.h"
25 #include "system/filesys.h"
26 #include "system/wait.h"
27 #include "../include/ctdb_private.h"
28 #include "../common/rb_tree.h"
31 #define TAKEOVER_TIMEOUT() timeval_current_ofs(ctdb->tunable.takeover_timeout,0)
33 #define CTDB_ARP_INTERVAL 1
34 #define CTDB_ARP_REPEAT 3
37 struct ctdb_iface *prev, *next;
43 static const char *ctdb_vnn_iface_string(const struct ctdb_vnn *vnn)
46 return vnn->iface->name;
52 static int ctdb_add_local_iface(struct ctdb_context *ctdb, const char *iface)
56 /* Verify that we dont have an entry for this ip yet */
57 for (i=ctdb->ifaces;i;i=i->next) {
58 if (strcmp(i->name, iface) == 0) {
63 /* create a new structure for this interface */
64 i = talloc_zero(ctdb, struct ctdb_iface);
65 CTDB_NO_MEMORY_FATAL(ctdb, i);
66 i->name = talloc_strdup(i, iface);
67 CTDB_NO_MEMORY(ctdb, i->name);
70 DLIST_ADD(ctdb->ifaces, i);
75 static struct ctdb_iface *ctdb_find_iface(struct ctdb_context *ctdb,
80 /* Verify that we dont have an entry for this ip yet */
81 for (i=ctdb->ifaces;i;i=i->next) {
82 if (strcmp(i->name, iface) == 0) {
90 static struct ctdb_iface *ctdb_vnn_best_iface(struct ctdb_context *ctdb,
94 struct ctdb_iface *cur = NULL;
95 struct ctdb_iface *best = NULL;
97 for (i=0; vnn->ifaces[i]; i++) {
99 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
113 if (cur->references < best->references) {
122 static int32_t ctdb_vnn_assign_iface(struct ctdb_context *ctdb,
123 struct ctdb_vnn *vnn)
125 struct ctdb_iface *best = NULL;
128 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
129 "still assigned to iface '%s'\n",
130 ctdb_addr_to_str(&vnn->public_address),
131 ctdb_vnn_iface_string(vnn)));
135 best = ctdb_vnn_best_iface(ctdb, vnn);
137 DEBUG(DEBUG_ERR, (__location__ " public address '%s' "
138 "cannot assign to iface any iface\n",
139 ctdb_addr_to_str(&vnn->public_address)));
145 vnn->pnn = ctdb->pnn;
147 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
148 "now assigned to iface '%s' refs[%d]\n",
149 ctdb_addr_to_str(&vnn->public_address),
150 ctdb_vnn_iface_string(vnn),
155 static void ctdb_vnn_unassign_iface(struct ctdb_context *ctdb,
156 struct ctdb_vnn *vnn)
158 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
159 "now unassigned (old iface '%s' refs[%d])\n",
160 ctdb_addr_to_str(&vnn->public_address),
161 ctdb_vnn_iface_string(vnn),
162 vnn->iface?vnn->iface->references:0));
164 vnn->iface->references--;
167 if (vnn->pnn == ctdb->pnn) {
172 static bool ctdb_vnn_available(struct ctdb_context *ctdb,
173 struct ctdb_vnn *vnn)
177 if (vnn->iface && vnn->iface->link_up) {
181 for (i=0; vnn->ifaces[i]; i++) {
182 struct ctdb_iface *cur;
184 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
197 struct ctdb_takeover_arp {
198 struct ctdb_context *ctdb;
201 struct ctdb_tcp_array *tcparray;
202 struct ctdb_vnn *vnn;
207 lists of tcp endpoints
209 struct ctdb_tcp_list {
210 struct ctdb_tcp_list *prev, *next;
211 struct ctdb_tcp_connection connection;
215 list of clients to kill on IP release
217 struct ctdb_client_ip {
218 struct ctdb_client_ip *prev, *next;
219 struct ctdb_context *ctdb;
226 send a gratuitous arp
228 static void ctdb_control_send_arp(struct event_context *ev, struct timed_event *te,
229 struct timeval t, void *private_data)
231 struct ctdb_takeover_arp *arp = talloc_get_type(private_data,
232 struct ctdb_takeover_arp);
234 struct ctdb_tcp_array *tcparray;
235 const char *iface = ctdb_vnn_iface_string(arp->vnn);
237 ret = ctdb_sys_send_arp(&arp->addr, iface);
239 DEBUG(DEBUG_CRIT,(__location__ " sending of arp failed on iface '%s' (%s)\n",
240 iface, strerror(errno)));
243 tcparray = arp->tcparray;
245 for (i=0;i<tcparray->num;i++) {
246 struct ctdb_tcp_connection *tcon;
248 tcon = &tcparray->connections[i];
249 DEBUG(DEBUG_INFO,("sending tcp tickle ack for %u->%s:%u\n",
250 (unsigned)ntohs(tcon->dst_addr.ip.sin_port),
251 ctdb_addr_to_str(&tcon->src_addr),
252 (unsigned)ntohs(tcon->src_addr.ip.sin_port)));
253 ret = ctdb_sys_send_tcp(
258 DEBUG(DEBUG_CRIT,(__location__ " Failed to send tcp tickle ack for %s\n",
259 ctdb_addr_to_str(&tcon->src_addr)));
266 if (arp->count == CTDB_ARP_REPEAT) {
271 event_add_timed(arp->ctdb->ev, arp->vnn->takeover_ctx,
272 timeval_current_ofs(CTDB_ARP_INTERVAL, 100000),
273 ctdb_control_send_arp, arp);
276 static int32_t ctdb_announce_vnn_iface(struct ctdb_context *ctdb,
277 struct ctdb_vnn *vnn)
279 struct ctdb_takeover_arp *arp;
280 struct ctdb_tcp_array *tcparray;
282 if (!vnn->takeover_ctx) {
283 vnn->takeover_ctx = talloc_new(vnn);
284 if (!vnn->takeover_ctx) {
289 arp = talloc_zero(vnn->takeover_ctx, struct ctdb_takeover_arp);
295 arp->addr = vnn->public_address;
298 tcparray = vnn->tcp_array;
300 /* add all of the known tcp connections for this IP to the
301 list of tcp connections to send tickle acks for */
302 arp->tcparray = talloc_steal(arp, tcparray);
304 vnn->tcp_array = NULL;
305 vnn->tcp_update_needed = true;
308 event_add_timed(arp->ctdb->ev, vnn->takeover_ctx,
309 timeval_zero(), ctdb_control_send_arp, arp);
314 struct takeover_callback_state {
315 struct ctdb_req_control *c;
316 ctdb_sock_addr *addr;
317 struct ctdb_vnn *vnn;
320 struct ctdb_do_takeip_state {
321 struct ctdb_req_control *c;
322 struct ctdb_vnn *vnn;
326 called when takeip event finishes
328 static void ctdb_do_takeip_callback(struct ctdb_context *ctdb, int status,
331 struct ctdb_do_takeip_state *state =
332 talloc_get_type(private_data, struct ctdb_do_takeip_state);
336 if (status == -ETIME) {
339 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
340 ctdb_addr_to_str(&state->vnn->public_address),
341 ctdb_vnn_iface_string(state->vnn)));
342 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
347 ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
349 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
354 /* the control succeeded */
355 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
361 take over an ip address
363 static int32_t ctdb_do_takeip(struct ctdb_context *ctdb,
364 struct ctdb_req_control *c,
365 struct ctdb_vnn *vnn)
368 struct ctdb_do_takeip_state *state;
370 ret = ctdb_vnn_assign_iface(ctdb, vnn);
372 DEBUG(DEBUG_ERR,("Takeover of IP %s/%u failed to "
373 "assin a usable interface\n",
374 ctdb_addr_to_str(&vnn->public_address),
375 vnn->public_netmask_bits));
379 state = talloc(vnn, struct ctdb_do_takeip_state);
380 CTDB_NO_MEMORY(ctdb, state);
382 state->c = talloc_steal(ctdb, c);
385 DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u on interface %s\n",
386 ctdb_addr_to_str(&vnn->public_address),
387 vnn->public_netmask_bits,
388 ctdb_vnn_iface_string(vnn)));
390 ret = ctdb_event_script_callback(ctdb,
392 ctdb_do_takeip_callback,
397 ctdb_vnn_iface_string(vnn),
398 ctdb_addr_to_str(&vnn->public_address),
399 vnn->public_netmask_bits);
402 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
403 ctdb_addr_to_str(&vnn->public_address),
404 ctdb_vnn_iface_string(vnn)));
412 struct ctdb_do_updateip_state {
413 struct ctdb_req_control *c;
414 struct ctdb_iface *old;
415 struct ctdb_vnn *vnn;
419 called when updateip event finishes
421 static void ctdb_do_updateip_callback(struct ctdb_context *ctdb, int status,
424 struct ctdb_do_updateip_state *state =
425 talloc_get_type(private_data, struct ctdb_do_updateip_state);
429 if (status == -ETIME) {
432 DEBUG(DEBUG_ERR,(__location__ " Failed to move IP %s from interface %s to %s\n",
433 ctdb_addr_to_str(&state->vnn->public_address),
435 ctdb_vnn_iface_string(state->vnn)));
438 * All we can do is reset the old interface
439 * and let the next run fix it
441 ctdb_vnn_unassign_iface(ctdb, state->vnn);
442 state->vnn->iface = state->old;
443 state->vnn->iface->references++;
445 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
450 ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
452 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
457 /* the control succeeded */
458 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
464 update (move) an ip address
466 static int32_t ctdb_do_updateip(struct ctdb_context *ctdb,
467 struct ctdb_req_control *c,
468 struct ctdb_vnn *vnn)
471 struct ctdb_do_updateip_state *state;
472 struct ctdb_iface *old = vnn->iface;
474 ctdb_vnn_unassign_iface(ctdb, vnn);
475 ret = ctdb_vnn_assign_iface(ctdb, vnn);
477 DEBUG(DEBUG_ERR,("update of IP %s/%u failed to "
478 "assin a usable interface (old iface '%s')\n",
479 ctdb_addr_to_str(&vnn->public_address),
480 vnn->public_netmask_bits,
485 if (vnn->iface == old) {
486 DEBUG(DEBUG_ERR,("update of IP %s/%u trying to "
487 "assin a same interface '%s'\n",
488 ctdb_addr_to_str(&vnn->public_address),
489 vnn->public_netmask_bits,
494 state = talloc(vnn, struct ctdb_do_updateip_state);
495 CTDB_NO_MEMORY(ctdb, state);
497 state->c = talloc_steal(ctdb, c);
501 DEBUG(DEBUG_NOTICE,("Update of IP %s/%u from "
502 "interface %s to %s\n",
503 ctdb_addr_to_str(&vnn->public_address),
504 vnn->public_netmask_bits,
506 ctdb_vnn_iface_string(vnn)));
508 ret = ctdb_event_script_callback(ctdb,
510 ctdb_do_updateip_callback,
513 CTDB_EVENT_UPDATE_IP,
516 ctdb_vnn_iface_string(vnn),
517 ctdb_addr_to_str(&vnn->public_address),
518 vnn->public_netmask_bits);
520 DEBUG(DEBUG_ERR,(__location__ " Failed update IP %s from interface %s to %s\n",
521 ctdb_addr_to_str(&vnn->public_address),
522 old->name, ctdb_vnn_iface_string(vnn)));
531 Find the vnn of the node that has a public ip address
532 returns -1 if the address is not known as a public address
534 static struct ctdb_vnn *find_public_ip_vnn(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
536 struct ctdb_vnn *vnn;
538 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
539 if (ctdb_same_ip(&vnn->public_address, addr)) {
548 take over an ip address
550 int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb,
551 struct ctdb_req_control *c,
556 struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
557 struct ctdb_vnn *vnn;
558 bool have_ip = false;
559 bool do_updateip = false;
560 bool do_takeip = false;
561 struct ctdb_iface *best_iface = NULL;
563 if (pip->pnn != ctdb->pnn) {
564 DEBUG(DEBUG_ERR,(__location__" takeoverip called for an ip '%s' "
565 "with pnn %d, but we're node %d\n",
566 ctdb_addr_to_str(&pip->addr),
567 pip->pnn, ctdb->pnn));
571 /* update out vnn list */
572 vnn = find_public_ip_vnn(ctdb, &pip->addr);
574 DEBUG(DEBUG_INFO,("takeoverip called for an ip '%s' that is not a public address\n",
575 ctdb_addr_to_str(&pip->addr)));
579 have_ip = ctdb_sys_have_ip(&pip->addr);
580 best_iface = ctdb_vnn_best_iface(ctdb, vnn);
581 if (best_iface == NULL) {
582 DEBUG(DEBUG_ERR,("takeoverip of IP %s/%u failed to find"
583 "a usable interface (old %s, have_ip %d)\n",
584 ctdb_addr_to_str(&vnn->public_address),
585 vnn->public_netmask_bits,
586 ctdb_vnn_iface_string(vnn),
591 if (vnn->iface == NULL && vnn->pnn == -1 && have_ip && best_iface != NULL) {
592 DEBUG(DEBUG_ERR,("Taking over newly created ip\n"));
596 if (vnn->iface == NULL && have_ip) {
597 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
598 "but we have no interface assigned, has someone manually configured it?"
600 ctdb_addr_to_str(&vnn->public_address)));
605 if (vnn->pnn != ctdb->pnn && have_ip) {
606 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
607 "and we have it on iface[%s], but it was assigned to node %d"
608 "and we are node %d, banning ourself\n",
609 ctdb_addr_to_str(&vnn->public_address),
610 ctdb_vnn_iface_string(vnn), vnn->pnn, ctdb->pnn));
616 if (vnn->iface->link_up) {
617 /* only move when the rebalance gains something */
618 if (vnn->iface->references > (best_iface->references + 1)) {
621 } else if (vnn->iface != best_iface) {
628 ctdb_vnn_unassign_iface(ctdb, vnn);
635 ret = ctdb_do_takeip(ctdb, c, vnn);
639 } else if (do_updateip) {
640 ret = ctdb_do_updateip(ctdb, c, vnn);
646 * The interface is up and the kernel known the ip
649 DEBUG(DEBUG_INFO,("Redundant takeover of IP %s/%u on interface %s (ip already held)\n",
650 ctdb_addr_to_str(&pip->addr),
651 vnn->public_netmask_bits,
652 ctdb_vnn_iface_string(vnn)));
656 /* tell ctdb_control.c that we will be replying asynchronously */
663 takeover an ip address old v4 style
665 int32_t ctdb_control_takeover_ipv4(struct ctdb_context *ctdb,
666 struct ctdb_req_control *c,
672 data.dsize = sizeof(struct ctdb_public_ip);
673 data.dptr = (uint8_t *)talloc_zero(c, struct ctdb_public_ip);
674 CTDB_NO_MEMORY(ctdb, data.dptr);
676 memcpy(data.dptr, indata.dptr, indata.dsize);
677 return ctdb_control_takeover_ip(ctdb, c, data, async_reply);
681 kill any clients that are registered with a IP that is being released
683 static void release_kill_clients(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
685 struct ctdb_client_ip *ip;
687 DEBUG(DEBUG_INFO,("release_kill_clients for ip %s\n",
688 ctdb_addr_to_str(addr)));
690 for (ip=ctdb->client_ip_list; ip; ip=ip->next) {
691 ctdb_sock_addr tmp_addr;
694 DEBUG(DEBUG_INFO,("checking for client %u with IP %s\n",
696 ctdb_addr_to_str(&ip->addr)));
698 if (ctdb_same_ip(&tmp_addr, addr)) {
699 struct ctdb_client *client = ctdb_reqid_find(ctdb,
702 DEBUG(DEBUG_INFO,("matched client %u with IP %s and pid %u\n",
704 ctdb_addr_to_str(&ip->addr),
707 if (client->pid != 0) {
708 DEBUG(DEBUG_INFO,(__location__ " Killing client pid %u for IP %s on client_id %u\n",
709 (unsigned)client->pid,
710 ctdb_addr_to_str(addr),
712 kill(client->pid, SIGKILL);
719 called when releaseip event finishes
721 static void release_ip_callback(struct ctdb_context *ctdb, int status,
724 struct takeover_callback_state *state =
725 talloc_get_type(private_data, struct takeover_callback_state);
728 if (status == -ETIME) {
732 /* send a message to all clients of this node telling them
733 that the cluster has been reconfigured and they should
734 release any sockets on this IP */
735 data.dptr = (uint8_t *)talloc_strdup(state, ctdb_addr_to_str(state->addr));
736 CTDB_NO_MEMORY_VOID(ctdb, data.dptr);
737 data.dsize = strlen((char *)data.dptr)+1;
739 DEBUG(DEBUG_INFO,(__location__ " sending RELEASE_IP for '%s'\n", data.dptr));
741 ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_RELEASE_IP, data);
743 /* kill clients that have registered with this IP */
744 release_kill_clients(ctdb, state->addr);
746 ctdb_vnn_unassign_iface(ctdb, state->vnn);
748 /* the control succeeded */
749 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
754 release an ip address
756 int32_t ctdb_control_release_ip(struct ctdb_context *ctdb,
757 struct ctdb_req_control *c,
762 struct takeover_callback_state *state;
763 struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
764 struct ctdb_vnn *vnn;
766 /* update our vnn list */
767 vnn = find_public_ip_vnn(ctdb, &pip->addr);
769 DEBUG(DEBUG_INFO,("releaseip called for an ip '%s' that is not a public address\n",
770 ctdb_addr_to_str(&pip->addr)));
775 /* stop any previous arps */
776 talloc_free(vnn->takeover_ctx);
777 vnn->takeover_ctx = NULL;
779 if (!ctdb_sys_have_ip(&pip->addr)) {
780 DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u on interface %s (ip not held)\n",
781 ctdb_addr_to_str(&pip->addr),
782 vnn->public_netmask_bits,
783 ctdb_vnn_iface_string(vnn)));
784 ctdb_vnn_unassign_iface(ctdb, vnn);
788 if (vnn->iface == NULL) {
789 DEBUG(DEBUG_CRIT,(__location__ " release_ip of IP %s is known to the kernel, "
790 "but we have no interface assigned, has someone manually configured it?"
792 ctdb_addr_to_str(&vnn->public_address)));
797 DEBUG(DEBUG_NOTICE,("Release of IP %s/%u on interface %s node:%d\n",
798 ctdb_addr_to_str(&pip->addr),
799 vnn->public_netmask_bits,
800 ctdb_vnn_iface_string(vnn),
803 state = talloc(ctdb, struct takeover_callback_state);
804 CTDB_NO_MEMORY(ctdb, state);
806 state->c = talloc_steal(state, c);
807 state->addr = talloc(state, ctdb_sock_addr);
808 CTDB_NO_MEMORY(ctdb, state->addr);
809 *state->addr = pip->addr;
812 ret = ctdb_event_script_callback(ctdb,
813 state, release_ip_callback, state,
815 CTDB_EVENT_RELEASE_IP,
817 ctdb_vnn_iface_string(vnn),
818 ctdb_addr_to_str(&pip->addr),
819 vnn->public_netmask_bits);
821 DEBUG(DEBUG_ERR,(__location__ " Failed to release IP %s on interface %s\n",
822 ctdb_addr_to_str(&pip->addr),
823 ctdb_vnn_iface_string(vnn)));
828 /* tell the control that we will be reply asynchronously */
834 release an ip address old v4 style
836 int32_t ctdb_control_release_ipv4(struct ctdb_context *ctdb,
837 struct ctdb_req_control *c,
843 data.dsize = sizeof(struct ctdb_public_ip);
844 data.dptr = (uint8_t *)talloc_zero(c, struct ctdb_public_ip);
845 CTDB_NO_MEMORY(ctdb, data.dptr);
847 memcpy(data.dptr, indata.dptr, indata.dsize);
848 return ctdb_control_release_ip(ctdb, c, data, async_reply);
852 static int ctdb_add_public_address(struct ctdb_context *ctdb,
853 ctdb_sock_addr *addr,
854 unsigned mask, const char *ifaces)
856 struct ctdb_vnn *vnn;
863 /* Verify that we dont have an entry for this ip yet */
864 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
865 if (ctdb_same_sockaddr(addr, &vnn->public_address)) {
866 DEBUG(DEBUG_CRIT,("Same ip '%s' specified multiple times in the public address list \n",
867 ctdb_addr_to_str(addr)));
872 /* create a new vnn structure for this ip address */
873 vnn = talloc_zero(ctdb, struct ctdb_vnn);
874 CTDB_NO_MEMORY_FATAL(ctdb, vnn);
875 vnn->ifaces = talloc_array(vnn, const char *, num + 2);
876 tmp = talloc_strdup(vnn, ifaces);
877 CTDB_NO_MEMORY_FATAL(ctdb, tmp);
878 for (iface = strtok(tmp, ","); iface; iface = strtok(NULL, ",")) {
879 vnn->ifaces = talloc_realloc(vnn, vnn->ifaces, const char *, num + 2);
880 CTDB_NO_MEMORY_FATAL(ctdb, vnn->ifaces);
881 vnn->ifaces[num] = talloc_strdup(vnn, iface);
882 CTDB_NO_MEMORY_FATAL(ctdb, vnn->ifaces[num]);
886 vnn->ifaces[num] = NULL;
887 vnn->public_address = *addr;
888 vnn->public_netmask_bits = mask;
891 for (i=0; vnn->ifaces[i]; i++) {
892 ret = ctdb_add_local_iface(ctdb, vnn->ifaces[i]);
894 DEBUG(DEBUG_CRIT, (__location__ " failed to add iface[%s] "
895 "for public_address[%s]\n",
896 vnn->ifaces[i], ctdb_addr_to_str(addr)));
902 DLIST_ADD(ctdb->vnn, vnn);
908 setup the event script directory
910 int ctdb_set_event_script_dir(struct ctdb_context *ctdb, const char *script_dir)
912 ctdb->event_script_dir = talloc_strdup(ctdb, script_dir);
913 CTDB_NO_MEMORY(ctdb, ctdb->event_script_dir);
918 setup the public address lists from a file
920 int ctdb_set_public_addresses(struct ctdb_context *ctdb, const char *alist)
926 lines = file_lines_load(alist, &nlines, ctdb);
928 ctdb_set_error(ctdb, "Failed to load public address list '%s'\n", alist);
931 while (nlines > 0 && strcmp(lines[nlines-1], "") == 0) {
935 for (i=0;i<nlines;i++) {
943 while ((*line == ' ') || (*line == '\t')) {
949 if (strcmp(line, "") == 0) {
952 tok = strtok(line, " \t");
954 tok = strtok(NULL, " \t");
956 if (NULL == ctdb->default_public_interface) {
957 DEBUG(DEBUG_CRIT,("No default public interface and no interface specified at line %u of public address list\n",
962 ifaces = ctdb->default_public_interface;
967 if (!addrstr || !parse_ip_mask(addrstr, ifaces, &addr, &mask)) {
968 DEBUG(DEBUG_CRIT,("Badly formed line %u in public address list\n", i+1));
972 if (ctdb_add_public_address(ctdb, &addr, mask, ifaces)) {
973 DEBUG(DEBUG_CRIT,("Failed to add line %u to the public address list\n", i+1));
983 int ctdb_set_single_public_ip(struct ctdb_context *ctdb,
987 struct ctdb_vnn *svnn;
991 svnn = talloc_zero(ctdb, struct ctdb_vnn);
992 CTDB_NO_MEMORY(ctdb, svnn);
994 svnn->ifaces = talloc_array(svnn, const char *, 2);
995 CTDB_NO_MEMORY(ctdb, svnn->ifaces);
996 svnn->ifaces[0] = talloc_strdup(svnn->ifaces, iface);
997 CTDB_NO_MEMORY(ctdb, svnn->ifaces[0]);
998 svnn->ifaces[1] = NULL;
1000 ok = parse_ip(ip, iface, 0, &svnn->public_address);
1006 ret = ctdb_add_local_iface(ctdb, svnn->ifaces[0]);
1008 DEBUG(DEBUG_CRIT, (__location__ " failed to add iface[%s] "
1009 "for single_ip[%s]\n",
1011 ctdb_addr_to_str(&svnn->public_address)));
1016 ret = ctdb_vnn_assign_iface(ctdb, svnn);
1022 ctdb->single_ip_vnn = svnn;
1026 struct ctdb_public_ip_list {
1027 struct ctdb_public_ip_list *next;
1029 ctdb_sock_addr addr;
1033 /* Given a physical node, return the number of
1034 public addresses that is currently assigned to this node.
1036 static int node_ip_coverage(struct ctdb_context *ctdb,
1038 struct ctdb_public_ip_list *ips)
1042 for (;ips;ips=ips->next) {
1043 if (ips->pnn == pnn) {
1051 /* Check if this is a public ip known to the node, i.e. can that
1052 node takeover this ip ?
1054 static int can_node_serve_ip(struct ctdb_context *ctdb, int32_t pnn,
1055 struct ctdb_public_ip_list *ip)
1057 struct ctdb_all_public_ips *public_ips;
1060 public_ips = ctdb->nodes[pnn]->available_public_ips;
1062 if (public_ips == NULL) {
1066 for (i=0;i<public_ips->num;i++) {
1067 if (ctdb_same_ip(&ip->addr, &public_ips->ips[i].addr)) {
1068 /* yes, this node can serve this public ip */
1077 /* search the node lists list for a node to takeover this ip.
1078 pick the node that currently are serving the least number of ips
1079 so that the ips get spread out evenly.
1081 static int find_takeover_node(struct ctdb_context *ctdb,
1082 struct ctdb_node_map *nodemap, uint32_t mask,
1083 struct ctdb_public_ip_list *ip,
1084 struct ctdb_public_ip_list *all_ips)
1086 int pnn, min=0, num;
1090 for (i=0;i<nodemap->num;i++) {
1091 if (nodemap->nodes[i].flags & mask) {
1092 /* This node is not healty and can not be used to serve
1098 /* verify that this node can serve this ip */
1099 if (can_node_serve_ip(ctdb, i, ip)) {
1100 /* no it couldnt so skip to the next node */
1104 num = node_ip_coverage(ctdb, i, all_ips);
1105 /* was this the first node we checked ? */
1117 DEBUG(DEBUG_WARNING,(__location__ " Could not find node to take over public address '%s'\n",
1118 ctdb_addr_to_str(&ip->addr)));
1128 static uint32_t *ip_key(ctdb_sock_addr *ip)
1130 static uint32_t key[IP_KEYLEN];
1132 bzero(key, sizeof(key));
1134 switch (ip->sa.sa_family) {
1136 key[3] = htonl(ip->ip.sin_addr.s_addr);
1139 key[0] = htonl(ip->ip6.sin6_addr.s6_addr32[0]);
1140 key[1] = htonl(ip->ip6.sin6_addr.s6_addr32[1]);
1141 key[2] = htonl(ip->ip6.sin6_addr.s6_addr32[2]);
1142 key[3] = htonl(ip->ip6.sin6_addr.s6_addr32[3]);
1145 DEBUG(DEBUG_ERR, (__location__ " ERROR, unknown family passed :%u\n", ip->sa.sa_family));
1152 static void *add_ip_callback(void *parm, void *data)
1157 void getips_count_callback(void *param, void *data)
1159 struct ctdb_public_ip_list **ip_list = (struct ctdb_public_ip_list **)param;
1160 struct ctdb_public_ip_list *new_ip = (struct ctdb_public_ip_list *)data;
1162 new_ip->next = *ip_list;
1166 static struct ctdb_public_ip_list *
1167 create_merged_ip_list(struct ctdb_context *ctdb)
1170 struct ctdb_public_ip_list *ip_list;
1171 struct ctdb_all_public_ips *public_ips;
1173 if (ctdb->ip_tree != NULL) {
1174 talloc_free(ctdb->ip_tree);
1175 ctdb->ip_tree = NULL;
1177 ctdb->ip_tree = trbt_create(ctdb, 0);
1179 for (i=0;i<ctdb->num_nodes;i++) {
1180 public_ips = ctdb->nodes[i]->known_public_ips;
1182 if (ctdb->nodes[i]->flags & NODE_FLAGS_DELETED) {
1186 /* there were no public ips for this node */
1187 if (public_ips == NULL) {
1191 for (j=0;j<public_ips->num;j++) {
1192 struct ctdb_public_ip_list *tmp_ip;
1194 tmp_ip = talloc_zero(ctdb->ip_tree, struct ctdb_public_ip_list);
1195 CTDB_NO_MEMORY_NULL(ctdb, tmp_ip);
1196 tmp_ip->pnn = public_ips->ips[j].pnn;
1197 tmp_ip->addr = public_ips->ips[j].addr;
1198 tmp_ip->next = NULL;
1200 trbt_insertarray32_callback(ctdb->ip_tree,
1201 IP_KEYLEN, ip_key(&public_ips->ips[j].addr),
1208 trbt_traversearray32(ctdb->ip_tree, IP_KEYLEN, getips_count_callback, &ip_list);
1214 make any IP alias changes for public addresses that are necessary
1216 int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap)
1218 int i, num_healthy, retries, num_ips;
1219 struct ctdb_public_ip ip;
1220 struct ctdb_public_ipv4 ipv4;
1221 uint32_t mask, *nodes;
1222 struct ctdb_public_ip_list *all_ips, *tmp_ip;
1223 int maxnode, maxnum=0, minnode, minnum=0, num;
1225 struct timeval timeout;
1226 struct client_async_data *async_data;
1227 struct ctdb_client_control_state *state;
1228 TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
1233 /* Count how many completely healthy nodes we have */
1235 for (i=0;i<nodemap->num;i++) {
1236 if (!(nodemap->nodes[i].flags & (NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED))) {
1241 if (num_healthy > 0) {
1242 /* We have healthy nodes, so only consider them for
1243 serving public addresses
1245 mask = NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED;
1247 /* We didnt have any completely healthy nodes so
1248 use "disabled" nodes as a fallback
1250 mask = NODE_FLAGS_INACTIVE;
1253 /* since nodes only know about those public addresses that
1254 can be served by that particular node, no single node has
1255 a full list of all public addresses that exist in the cluster.
1256 Walk over all node structures and create a merged list of
1257 all public addresses that exist in the cluster.
1259 keep the tree of ips around as ctdb->ip_tree
1261 all_ips = create_merged_ip_list(ctdb);
1263 /* If we want deterministic ip allocations, i.e. that the ip addresses
1264 will always be allocated the same way for a specific set of
1265 available/unavailable nodes.
1267 if (1 == ctdb->tunable.deterministic_public_ips) {
1268 DEBUG(DEBUG_NOTICE,("Deterministic IPs enabled. Resetting all ip allocations\n"));
1269 for (i=0,tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next,i++) {
1270 tmp_ip->pnn = i%nodemap->num;
1275 /* mark all public addresses with a masked node as being served by
1278 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1279 if (tmp_ip->pnn == -1) {
1282 if (nodemap->nodes[tmp_ip->pnn].flags & mask) {
1287 /* verify that the assigned nodes can serve that public ip
1288 and set it to -1 if not
1290 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1291 if (tmp_ip->pnn == -1) {
1294 if (can_node_serve_ip(ctdb, tmp_ip->pnn, tmp_ip) != 0) {
1295 /* this node can not serve this ip. */
1301 /* now we must redistribute all public addresses with takeover node
1302 -1 among the nodes available
1306 /* loop over all ip's and find a physical node to cover for
1309 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1310 if (tmp_ip->pnn == -1) {
1311 if (find_takeover_node(ctdb, nodemap, mask, tmp_ip, all_ips)) {
1312 DEBUG(DEBUG_WARNING,("Failed to find node to cover ip %s\n",
1313 ctdb_addr_to_str(&tmp_ip->addr)));
1318 /* If we dont want ips to fail back after a node becomes healthy
1319 again, we wont even try to reallocat the ip addresses so that
1320 they are evenly spread out.
1321 This can NOT be used at the same time as DeterministicIPs !
1323 if (1 == ctdb->tunable.no_ip_failback) {
1324 if (1 == ctdb->tunable.deterministic_public_ips) {
1325 DEBUG(DEBUG_ERR, ("ERROR: You can not use 'DeterministicIPs' and 'NoIPFailback' at the same time\n"));
1331 /* now, try to make sure the ip adresses are evenly distributed
1333 for each ip address, loop over all nodes that can serve this
1334 ip and make sure that the difference between the node
1335 serving the most and the node serving the least ip's are not greater
1338 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1339 if (tmp_ip->pnn == -1) {
1343 /* Get the highest and lowest number of ips's served by any
1344 valid node which can serve this ip.
1348 for (i=0;i<nodemap->num;i++) {
1349 if (nodemap->nodes[i].flags & mask) {
1353 /* only check nodes that can actually serve this ip */
1354 if (can_node_serve_ip(ctdb, i, tmp_ip)) {
1355 /* no it couldnt so skip to the next node */
1359 num = node_ip_coverage(ctdb, i, all_ips);
1360 if (maxnode == -1) {
1369 if (minnode == -1) {
1379 if (maxnode == -1) {
1380 DEBUG(DEBUG_WARNING,(__location__ " Could not find maxnode. May not be able to serve ip '%s'\n",
1381 ctdb_addr_to_str(&tmp_ip->addr)));
1386 /* If we want deterministic IPs then dont try to reallocate
1387 them to spread out the load.
1389 if (1 == ctdb->tunable.deterministic_public_ips) {
1393 /* if the spread between the smallest and largest coverage by
1394 a node is >=2 we steal one of the ips from the node with
1395 most coverage to even things out a bit.
1396 try to do this at most 5 times since we dont want to spend
1397 too much time balancing the ip coverage.
1399 if ( (maxnum > minnum+1)
1401 struct ctdb_public_ip_list *tmp;
1403 /* mark one of maxnode's vnn's as unassigned and try
1406 for (tmp=all_ips;tmp;tmp=tmp->next) {
1407 if (tmp->pnn == maxnode) {
1417 /* finished distributing the public addresses, now just send the
1418 info out to the nodes
1422 /* at this point ->pnn is the node which will own each IP
1423 or -1 if there is no node that can cover this ip
1426 /* now tell all nodes to delete any alias that they should not
1427 have. This will be a NOOP on nodes that don't currently
1428 hold the given alias */
1429 async_data = talloc_zero(tmp_ctx, struct client_async_data);
1430 CTDB_NO_MEMORY_FATAL(ctdb, async_data);
1432 for (i=0;i<nodemap->num;i++) {
1433 /* don't talk to unconnected nodes, but do talk to banned nodes */
1434 if (nodemap->nodes[i].flags & NODE_FLAGS_DISCONNECTED) {
1438 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1439 if (tmp_ip->pnn == nodemap->nodes[i].pnn) {
1440 /* This node should be serving this
1441 vnn so dont tell it to release the ip
1445 if (tmp_ip->addr.sa.sa_family == AF_INET) {
1446 ipv4.pnn = tmp_ip->pnn;
1447 ipv4.sin = tmp_ip->addr.ip;
1449 timeout = TAKEOVER_TIMEOUT();
1450 data.dsize = sizeof(ipv4);
1451 data.dptr = (uint8_t *)&ipv4;
1452 state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
1453 0, CTDB_CONTROL_RELEASE_IPv4, 0,
1457 ip.pnn = tmp_ip->pnn;
1458 ip.addr = tmp_ip->addr;
1460 timeout = TAKEOVER_TIMEOUT();
1461 data.dsize = sizeof(ip);
1462 data.dptr = (uint8_t *)&ip;
1463 state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
1464 0, CTDB_CONTROL_RELEASE_IP, 0,
1469 if (state == NULL) {
1470 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_RELEASE_IP to node %u\n", nodemap->nodes[i].pnn));
1471 talloc_free(tmp_ctx);
1475 ctdb_client_async_add(async_data, state);
1478 if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1479 DEBUG(DEBUG_ERR,(__location__ " Async control CTDB_CONTROL_RELEASE_IP failed\n"));
1480 talloc_free(tmp_ctx);
1483 talloc_free(async_data);
1486 /* tell all nodes to get their own IPs */
1487 async_data = talloc_zero(tmp_ctx, struct client_async_data);
1488 CTDB_NO_MEMORY_FATAL(ctdb, async_data);
1489 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1490 if (tmp_ip->pnn == -1) {
1491 /* this IP won't be taken over */
1495 if (tmp_ip->addr.sa.sa_family == AF_INET) {
1496 ipv4.pnn = tmp_ip->pnn;
1497 ipv4.sin = tmp_ip->addr.ip;
1499 timeout = TAKEOVER_TIMEOUT();
1500 data.dsize = sizeof(ipv4);
1501 data.dptr = (uint8_t *)&ipv4;
1502 state = ctdb_control_send(ctdb, tmp_ip->pnn,
1503 0, CTDB_CONTROL_TAKEOVER_IPv4, 0,
1507 ip.pnn = tmp_ip->pnn;
1508 ip.addr = tmp_ip->addr;
1510 timeout = TAKEOVER_TIMEOUT();
1511 data.dsize = sizeof(ip);
1512 data.dptr = (uint8_t *)&ip;
1513 state = ctdb_control_send(ctdb, tmp_ip->pnn,
1514 0, CTDB_CONTROL_TAKEOVER_IP, 0,
1518 if (state == NULL) {
1519 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_TAKEOVER_IP to node %u\n", tmp_ip->pnn));
1520 talloc_free(tmp_ctx);
1524 ctdb_client_async_add(async_data, state);
1526 if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1527 DEBUG(DEBUG_ERR,(__location__ " Async control CTDB_CONTROL_TAKEOVER_IP failed\n"));
1528 talloc_free(tmp_ctx);
1532 /* tell all nodes to update natwg */
1533 /* send the flags update natgw on all connected nodes */
1534 data.dptr = discard_const("ipreallocated");
1535 data.dsize = strlen((char *)data.dptr) + 1;
1536 nodes = list_of_connected_nodes(ctdb, nodemap, tmp_ctx, true);
1537 if (ctdb_client_async_control(ctdb, CTDB_CONTROL_RUN_EVENTSCRIPTS,
1538 nodes, 0, TAKEOVER_TIMEOUT(),
1542 DEBUG(DEBUG_ERR, (__location__ " ctdb_control to updatenatgw failed\n"));
1545 talloc_free(tmp_ctx);
1551 destroy a ctdb_client_ip structure
1553 static int ctdb_client_ip_destructor(struct ctdb_client_ip *ip)
1555 DEBUG(DEBUG_DEBUG,("destroying client tcp for %s:%u (client_id %u)\n",
1556 ctdb_addr_to_str(&ip->addr),
1557 ntohs(ip->addr.ip.sin_port),
1560 DLIST_REMOVE(ip->ctdb->client_ip_list, ip);
1565 called by a client to inform us of a TCP connection that it is managing
1566 that should tickled with an ACK when IP takeover is done
1567 we handle both the old ipv4 style of packets as well as the new ipv4/6
1570 int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id,
1573 struct ctdb_client *client = ctdb_reqid_find(ctdb, client_id, struct ctdb_client);
1574 struct ctdb_control_tcp *old_addr = NULL;
1575 struct ctdb_control_tcp_addr new_addr;
1576 struct ctdb_control_tcp_addr *tcp_sock = NULL;
1577 struct ctdb_tcp_list *tcp;
1578 struct ctdb_tcp_connection t;
1581 struct ctdb_client_ip *ip;
1582 struct ctdb_vnn *vnn;
1583 ctdb_sock_addr addr;
1585 switch (indata.dsize) {
1586 case sizeof(struct ctdb_control_tcp):
1587 old_addr = (struct ctdb_control_tcp *)indata.dptr;
1588 ZERO_STRUCT(new_addr);
1589 tcp_sock = &new_addr;
1590 tcp_sock->src.ip = old_addr->src;
1591 tcp_sock->dest.ip = old_addr->dest;
1593 case sizeof(struct ctdb_control_tcp_addr):
1594 tcp_sock = (struct ctdb_control_tcp_addr *)indata.dptr;
1597 DEBUG(DEBUG_ERR,(__location__ " Invalid data structure passed "
1598 "to ctdb_control_tcp_client. size was %d but "
1599 "only allowed sizes are %lu and %lu\n",
1601 (long unsigned)sizeof(struct ctdb_control_tcp),
1602 (long unsigned)sizeof(struct ctdb_control_tcp_addr)));
1606 addr = tcp_sock->src;
1607 ctdb_canonicalize_ip(&addr, &tcp_sock->src);
1608 addr = tcp_sock->dest;
1609 ctdb_canonicalize_ip(&addr, &tcp_sock->dest);
1612 memcpy(&addr, &tcp_sock->dest, sizeof(addr));
1613 vnn = find_public_ip_vnn(ctdb, &addr);
1615 switch (addr.sa.sa_family) {
1617 if (ntohl(addr.ip.sin_addr.s_addr) != INADDR_LOOPBACK) {
1618 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public address.\n",
1619 ctdb_addr_to_str(&addr)));
1623 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public ipv6 address.\n",
1624 ctdb_addr_to_str(&addr)));
1627 DEBUG(DEBUG_ERR,(__location__ " Unknown family type %d\n", addr.sa.sa_family));
1633 if (vnn->pnn != ctdb->pnn) {
1634 DEBUG(DEBUG_ERR,("Attempt to register tcp client for IP %s we don't hold - failing (client_id %u pid %u)\n",
1635 ctdb_addr_to_str(&addr),
1636 client_id, client->pid));
1637 /* failing this call will tell smbd to die */
1641 ip = talloc(client, struct ctdb_client_ip);
1642 CTDB_NO_MEMORY(ctdb, ip);
1646 ip->client_id = client_id;
1647 talloc_set_destructor(ip, ctdb_client_ip_destructor);
1648 DLIST_ADD(ctdb->client_ip_list, ip);
1650 tcp = talloc(client, struct ctdb_tcp_list);
1651 CTDB_NO_MEMORY(ctdb, tcp);
1653 tcp->connection.src_addr = tcp_sock->src;
1654 tcp->connection.dst_addr = tcp_sock->dest;
1656 DLIST_ADD(client->tcp_list, tcp);
1658 t.src_addr = tcp_sock->src;
1659 t.dst_addr = tcp_sock->dest;
1661 data.dptr = (uint8_t *)&t;
1662 data.dsize = sizeof(t);
1664 switch (addr.sa.sa_family) {
1666 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1667 (unsigned)ntohs(tcp_sock->dest.ip.sin_port),
1668 ctdb_addr_to_str(&tcp_sock->src),
1669 (unsigned)ntohs(tcp_sock->src.ip.sin_port), client_id, client->pid));
1672 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1673 (unsigned)ntohs(tcp_sock->dest.ip6.sin6_port),
1674 ctdb_addr_to_str(&tcp_sock->src),
1675 (unsigned)ntohs(tcp_sock->src.ip6.sin6_port), client_id, client->pid));
1678 DEBUG(DEBUG_ERR,(__location__ " Unknown family %d\n", addr.sa.sa_family));
1682 /* tell all nodes about this tcp connection */
1683 ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0,
1684 CTDB_CONTROL_TCP_ADD,
1685 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
1687 DEBUG(DEBUG_ERR,(__location__ " Failed to send CTDB_CONTROL_TCP_ADD\n"));
1695 find a tcp address on a list
1697 static struct ctdb_tcp_connection *ctdb_tcp_find(struct ctdb_tcp_array *array,
1698 struct ctdb_tcp_connection *tcp)
1702 if (array == NULL) {
1706 for (i=0;i<array->num;i++) {
1707 if (ctdb_same_sockaddr(&array->connections[i].src_addr, &tcp->src_addr) &&
1708 ctdb_same_sockaddr(&array->connections[i].dst_addr, &tcp->dst_addr)) {
1709 return &array->connections[i];
1718 called by a daemon to inform us of a TCP connection that one of its
1719 clients managing that should tickled with an ACK when IP takeover is
1722 int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata, bool tcp_update_needed)
1724 struct ctdb_tcp_connection *p = (struct ctdb_tcp_connection *)indata.dptr;
1725 struct ctdb_tcp_array *tcparray;
1726 struct ctdb_tcp_connection tcp;
1727 struct ctdb_vnn *vnn;
1729 vnn = find_public_ip_vnn(ctdb, &p->dst_addr);
1731 DEBUG(DEBUG_INFO,(__location__ " got TCP_ADD control for an address which is not a public address '%s'\n",
1732 ctdb_addr_to_str(&p->dst_addr)));
1738 tcparray = vnn->tcp_array;
1740 /* If this is the first tickle */
1741 if (tcparray == NULL) {
1742 tcparray = talloc_size(ctdb->nodes,
1743 offsetof(struct ctdb_tcp_array, connections) +
1744 sizeof(struct ctdb_tcp_connection) * 1);
1745 CTDB_NO_MEMORY(ctdb, tcparray);
1746 vnn->tcp_array = tcparray;
1749 tcparray->connections = talloc_size(tcparray, sizeof(struct ctdb_tcp_connection));
1750 CTDB_NO_MEMORY(ctdb, tcparray->connections);
1752 tcparray->connections[tcparray->num].src_addr = p->src_addr;
1753 tcparray->connections[tcparray->num].dst_addr = p->dst_addr;
1756 if (tcp_update_needed) {
1757 vnn->tcp_update_needed = true;
1763 /* Do we already have this tickle ?*/
1764 tcp.src_addr = p->src_addr;
1765 tcp.dst_addr = p->dst_addr;
1766 if (ctdb_tcp_find(vnn->tcp_array, &tcp) != NULL) {
1767 DEBUG(DEBUG_DEBUG,("Already had tickle info for %s:%u for vnn:%u\n",
1768 ctdb_addr_to_str(&tcp.dst_addr),
1769 ntohs(tcp.dst_addr.ip.sin_port),
1774 /* A new tickle, we must add it to the array */
1775 tcparray->connections = talloc_realloc(tcparray, tcparray->connections,
1776 struct ctdb_tcp_connection,
1778 CTDB_NO_MEMORY(ctdb, tcparray->connections);
1780 vnn->tcp_array = tcparray;
1781 tcparray->connections[tcparray->num].src_addr = p->src_addr;
1782 tcparray->connections[tcparray->num].dst_addr = p->dst_addr;
1785 DEBUG(DEBUG_INFO,("Added tickle info for %s:%u from vnn %u\n",
1786 ctdb_addr_to_str(&tcp.dst_addr),
1787 ntohs(tcp.dst_addr.ip.sin_port),
1790 if (tcp_update_needed) {
1791 vnn->tcp_update_needed = true;
1799 called by a daemon to inform us of a TCP connection that one of its
1800 clients managing that should tickled with an ACK when IP takeover is
1803 static void ctdb_remove_tcp_connection(struct ctdb_context *ctdb, struct ctdb_tcp_connection *conn)
1805 struct ctdb_tcp_connection *tcpp;
1806 struct ctdb_vnn *vnn = find_public_ip_vnn(ctdb, &conn->dst_addr);
1809 DEBUG(DEBUG_ERR,(__location__ " unable to find public address %s\n",
1810 ctdb_addr_to_str(&conn->dst_addr)));
1814 /* if the array is empty we cant remove it
1815 and we dont need to do anything
1817 if (vnn->tcp_array == NULL) {
1818 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist (array is empty) %s:%u\n",
1819 ctdb_addr_to_str(&conn->dst_addr),
1820 ntohs(conn->dst_addr.ip.sin_port)));
1825 /* See if we know this connection
1826 if we dont know this connection then we dont need to do anything
1828 tcpp = ctdb_tcp_find(vnn->tcp_array, conn);
1830 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist %s:%u\n",
1831 ctdb_addr_to_str(&conn->dst_addr),
1832 ntohs(conn->dst_addr.ip.sin_port)));
1837 /* We need to remove this entry from the array.
1838 Instead of allocating a new array and copying data to it
1839 we cheat and just copy the last entry in the existing array
1840 to the entry that is to be removed and just shring the
1843 *tcpp = vnn->tcp_array->connections[vnn->tcp_array->num - 1];
1844 vnn->tcp_array->num--;
1846 /* If we deleted the last entry we also need to remove the entire array
1848 if (vnn->tcp_array->num == 0) {
1849 talloc_free(vnn->tcp_array);
1850 vnn->tcp_array = NULL;
1853 vnn->tcp_update_needed = true;
1855 DEBUG(DEBUG_INFO,("Removed tickle info for %s:%u\n",
1856 ctdb_addr_to_str(&conn->src_addr),
1857 ntohs(conn->src_addr.ip.sin_port)));
1862 called by a daemon to inform us of a TCP connection that one of its
1863 clients used are no longer needed in the tickle database
1865 int32_t ctdb_control_tcp_remove(struct ctdb_context *ctdb, TDB_DATA indata)
1867 struct ctdb_tcp_connection *conn = (struct ctdb_tcp_connection *)indata.dptr;
1869 ctdb_remove_tcp_connection(ctdb, conn);
1876 called when a daemon restarts - send all tickes for all public addresses
1877 we are serving immediately to the new node.
1879 int32_t ctdb_control_startup(struct ctdb_context *ctdb, uint32_t vnn)
1881 /*XXX here we should send all tickes we are serving to the new node */
1887 called when a client structure goes away - hook to remove
1888 elements from the tcp_list in all daemons
1890 void ctdb_takeover_client_destructor_hook(struct ctdb_client *client)
1892 while (client->tcp_list) {
1893 struct ctdb_tcp_list *tcp = client->tcp_list;
1894 DLIST_REMOVE(client->tcp_list, tcp);
1895 ctdb_remove_tcp_connection(client->ctdb, &tcp->connection);
1901 release all IPs on shutdown
1903 void ctdb_release_all_ips(struct ctdb_context *ctdb)
1905 struct ctdb_vnn *vnn;
1907 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1908 if (!ctdb_sys_have_ip(&vnn->public_address)) {
1909 ctdb_vnn_unassign_iface(ctdb, vnn);
1915 ctdb_event_script_args(ctdb, CTDB_EVENT_RELEASE_IP, "%s %s %u",
1916 ctdb_vnn_iface_string(vnn),
1917 ctdb_addr_to_str(&vnn->public_address),
1918 vnn->public_netmask_bits);
1919 release_kill_clients(ctdb, &vnn->public_address);
1920 ctdb_vnn_unassign_iface(ctdb, vnn);
1926 get list of public IPs
1928 int32_t ctdb_control_get_public_ips(struct ctdb_context *ctdb,
1929 struct ctdb_req_control *c, TDB_DATA *outdata)
1932 struct ctdb_all_public_ips *ips;
1933 struct ctdb_vnn *vnn;
1934 bool only_available = false;
1936 if (c->flags & CTDB_PUBLIC_IP_FLAGS_ONLY_AVAILABLE) {
1937 only_available = true;
1940 /* count how many public ip structures we have */
1942 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1946 len = offsetof(struct ctdb_all_public_ips, ips) +
1947 num*sizeof(struct ctdb_public_ip);
1948 ips = talloc_zero_size(outdata, len);
1949 CTDB_NO_MEMORY(ctdb, ips);
1952 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1953 if (only_available && !ctdb_vnn_available(ctdb, vnn)) {
1956 ips->ips[i].pnn = vnn->pnn;
1957 ips->ips[i].addr = vnn->public_address;
1961 len = offsetof(struct ctdb_all_public_ips, ips) +
1962 i*sizeof(struct ctdb_public_ip);
1964 outdata->dsize = len;
1965 outdata->dptr = (uint8_t *)ips;
1972 get list of public IPs, old ipv4 style. only returns ipv4 addresses
1974 int32_t ctdb_control_get_public_ipsv4(struct ctdb_context *ctdb,
1975 struct ctdb_req_control *c, TDB_DATA *outdata)
1978 struct ctdb_all_public_ipsv4 *ips;
1979 struct ctdb_vnn *vnn;
1981 /* count how many public ip structures we have */
1983 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1984 if (vnn->public_address.sa.sa_family != AF_INET) {
1990 len = offsetof(struct ctdb_all_public_ipsv4, ips) +
1991 num*sizeof(struct ctdb_public_ipv4);
1992 ips = talloc_zero_size(outdata, len);
1993 CTDB_NO_MEMORY(ctdb, ips);
1995 outdata->dsize = len;
1996 outdata->dptr = (uint8_t *)ips;
2000 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2001 if (vnn->public_address.sa.sa_family != AF_INET) {
2004 ips->ips[i].pnn = vnn->pnn;
2005 ips->ips[i].sin = vnn->public_address.ip;
2012 int32_t ctdb_control_get_public_ip_info(struct ctdb_context *ctdb,
2013 struct ctdb_req_control *c,
2018 ctdb_sock_addr *addr;
2019 struct ctdb_control_public_ip_info *info;
2020 struct ctdb_vnn *vnn;
2022 addr = (ctdb_sock_addr *)indata.dptr;
2024 vnn = find_public_ip_vnn(ctdb, addr);
2026 /* if it is not a public ip it could be our 'single ip' */
2027 if (ctdb->single_ip_vnn) {
2028 if (ctdb_same_ip(&ctdb->single_ip_vnn->public_address, addr)) {
2029 vnn = ctdb->single_ip_vnn;
2034 DEBUG(DEBUG_ERR,(__location__ " Could not get public ip info, "
2035 "'%s'not a public address\n",
2036 ctdb_addr_to_str(addr)));
2040 /* count how many public ip structures we have */
2042 for (;vnn->ifaces[num];) {
2046 len = offsetof(struct ctdb_control_public_ip_info, ifaces) +
2047 num*sizeof(struct ctdb_control_iface_info);
2048 info = talloc_zero_size(outdata, len);
2049 CTDB_NO_MEMORY(ctdb, info);
2051 info->ip.addr = vnn->public_address;
2052 info->ip.pnn = vnn->pnn;
2053 info->active_idx = 0xFFFFFFFF;
2055 for (i=0; vnn->ifaces[i]; i++) {
2056 struct ctdb_iface *cur;
2058 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
2060 DEBUG(DEBUG_CRIT, (__location__ " internal error iface[%s] unknown\n",
2064 if (vnn->iface == cur) {
2065 info->active_idx = i;
2067 strcpy(info->ifaces[i].name, cur->name);
2068 info->ifaces[i].link_state = cur->link_up;
2069 info->ifaces[i].references = cur->references;
2072 len = offsetof(struct ctdb_control_public_ip_info, ifaces) +
2073 i*sizeof(struct ctdb_control_iface_info);
2075 outdata->dsize = len;
2076 outdata->dptr = (uint8_t *)info;
2081 int32_t ctdb_control_get_ifaces(struct ctdb_context *ctdb,
2082 struct ctdb_req_control *c,
2086 struct ctdb_control_get_ifaces *ifaces;
2087 struct ctdb_iface *cur;
2089 /* count how many public ip structures we have */
2091 for (cur=ctdb->ifaces;cur;cur=cur->next) {
2095 len = offsetof(struct ctdb_control_get_ifaces, ifaces) +
2096 num*sizeof(struct ctdb_control_iface_info);
2097 ifaces = talloc_zero_size(outdata, len);
2098 CTDB_NO_MEMORY(ctdb, ifaces);
2101 for (cur=ctdb->ifaces;cur;cur=cur->next) {
2102 strcpy(ifaces->ifaces[i].name, cur->name);
2103 ifaces->ifaces[i].link_state = cur->link_up;
2104 ifaces->ifaces[i].references = cur->references;
2108 len = offsetof(struct ctdb_control_get_ifaces, ifaces) +
2109 i*sizeof(struct ctdb_control_iface_info);
2111 outdata->dsize = len;
2112 outdata->dptr = (uint8_t *)ifaces;
2117 int32_t ctdb_control_set_iface_link(struct ctdb_context *ctdb,
2118 struct ctdb_req_control *c,
2121 struct ctdb_control_iface_info *info;
2122 struct ctdb_iface *iface;
2123 bool link_up = false;
2125 info = (struct ctdb_control_iface_info *)indata.dptr;
2127 if (info->name[CTDB_IFACE_SIZE] != '\0') {
2128 int len = strnlen(info->name, CTDB_IFACE_SIZE);
2129 DEBUG(DEBUG_ERR, (__location__ " name[%*.*s] not terminated\n",
2130 len, len, info->name));
2134 switch (info->link_state) {
2142 DEBUG(DEBUG_ERR, (__location__ " link_state[%u] invalid\n",
2143 (unsigned int)info->link_state));
2147 if (info->references != 0) {
2148 DEBUG(DEBUG_ERR, (__location__ " references[%u] should be 0\n",
2149 (unsigned int)info->references));
2153 iface = ctdb_find_iface(ctdb, info->name);
2154 if (iface == NULL) {
2155 DEBUG(DEBUG_ERR, (__location__ "iface[%s] is unknown\n",
2160 if (link_up == iface->link_up) {
2164 DEBUG(iface->link_up?DEBUG_ERR:DEBUG_NOTICE,
2165 ("iface[%s] has changed it's link status %s => %s\n",
2167 iface->link_up?"up":"down",
2168 link_up?"up":"down"));
2170 iface->link_up = link_up;
2176 structure containing the listening socket and the list of tcp connections
2177 that the ctdb daemon is to kill
2179 struct ctdb_kill_tcp {
2180 struct ctdb_vnn *vnn;
2181 struct ctdb_context *ctdb;
2183 struct fd_event *fde;
2184 trbt_tree_t *connections;
2189 a tcp connection that is to be killed
2191 struct ctdb_killtcp_con {
2192 ctdb_sock_addr src_addr;
2193 ctdb_sock_addr dst_addr;
2195 struct ctdb_kill_tcp *killtcp;
2198 /* this function is used to create a key to represent this socketpair
2199 in the killtcp tree.
2200 this key is used to insert and lookup matching socketpairs that are
2201 to be tickled and RST
2203 #define KILLTCP_KEYLEN 10
2204 static uint32_t *killtcp_key(ctdb_sock_addr *src, ctdb_sock_addr *dst)
2206 static uint32_t key[KILLTCP_KEYLEN];
2208 bzero(key, sizeof(key));
2210 if (src->sa.sa_family != dst->sa.sa_family) {
2211 DEBUG(DEBUG_ERR, (__location__ " ERROR, different families passed :%u vs %u\n", src->sa.sa_family, dst->sa.sa_family));
2215 switch (src->sa.sa_family) {
2217 key[0] = dst->ip.sin_addr.s_addr;
2218 key[1] = src->ip.sin_addr.s_addr;
2219 key[2] = dst->ip.sin_port;
2220 key[3] = src->ip.sin_port;
2223 key[0] = dst->ip6.sin6_addr.s6_addr32[3];
2224 key[1] = src->ip6.sin6_addr.s6_addr32[3];
2225 key[2] = dst->ip6.sin6_addr.s6_addr32[2];
2226 key[3] = src->ip6.sin6_addr.s6_addr32[2];
2227 key[4] = dst->ip6.sin6_addr.s6_addr32[1];
2228 key[5] = src->ip6.sin6_addr.s6_addr32[1];
2229 key[6] = dst->ip6.sin6_addr.s6_addr32[0];
2230 key[7] = src->ip6.sin6_addr.s6_addr32[0];
2231 key[8] = dst->ip6.sin6_port;
2232 key[9] = src->ip6.sin6_port;
2235 DEBUG(DEBUG_ERR, (__location__ " ERROR, unknown family passed :%u\n", src->sa.sa_family));
2243 called when we get a read event on the raw socket
2245 static void capture_tcp_handler(struct event_context *ev, struct fd_event *fde,
2246 uint16_t flags, void *private_data)
2248 struct ctdb_kill_tcp *killtcp = talloc_get_type(private_data, struct ctdb_kill_tcp);
2249 struct ctdb_killtcp_con *con;
2250 ctdb_sock_addr src, dst;
2251 uint32_t ack_seq, seq;
2253 if (!(flags & EVENT_FD_READ)) {
2257 if (ctdb_sys_read_tcp_packet(killtcp->capture_fd,
2258 killtcp->private_data,
2260 &ack_seq, &seq) != 0) {
2261 /* probably a non-tcp ACK packet */
2265 /* check if we have this guy in our list of connections
2268 con = trbt_lookuparray32(killtcp->connections,
2269 KILLTCP_KEYLEN, killtcp_key(&src, &dst));
2271 /* no this was some other packet we can just ignore */
2275 /* This one has been tickled !
2276 now reset him and remove him from the list.
2278 DEBUG(DEBUG_INFO, ("sending a tcp reset to kill connection :%d -> %s:%d\n",
2279 ntohs(con->dst_addr.ip.sin_port),
2280 ctdb_addr_to_str(&con->src_addr),
2281 ntohs(con->src_addr.ip.sin_port)));
2283 ctdb_sys_send_tcp(&con->dst_addr, &con->src_addr, ack_seq, seq, 1);
2288 /* when traversing the list of all tcp connections to send tickle acks to
2289 (so that we can capture the ack coming back and kill the connection
2291 this callback is called for each connection we are currently trying to kill
2293 static void tickle_connection_traverse(void *param, void *data)
2295 struct ctdb_killtcp_con *con = talloc_get_type(data, struct ctdb_killtcp_con);
2297 /* have tried too many times, just give up */
2298 if (con->count >= 5) {
2299 /* can't delete in traverse: reparent to delete_cons */
2300 talloc_steal(param, con);
2304 /* othervise, try tickling it again */
2307 (ctdb_sock_addr *)&con->dst_addr,
2308 (ctdb_sock_addr *)&con->src_addr,
2314 called every second until all sentenced connections have been reset
2316 static void ctdb_tickle_sentenced_connections(struct event_context *ev, struct timed_event *te,
2317 struct timeval t, void *private_data)
2319 struct ctdb_kill_tcp *killtcp = talloc_get_type(private_data, struct ctdb_kill_tcp);
2320 void *delete_cons = talloc_new(NULL);
2322 /* loop over all connections sending tickle ACKs */
2323 trbt_traversearray32(killtcp->connections, KILLTCP_KEYLEN, tickle_connection_traverse, delete_cons);
2325 /* now we've finished traverse, it's safe to do deletion. */
2326 talloc_free(delete_cons);
2328 /* If there are no more connections to kill we can remove the
2329 entire killtcp structure
2331 if ( (killtcp->connections == NULL) ||
2332 (killtcp->connections->root == NULL) ) {
2333 talloc_free(killtcp);
2337 /* try tickling them again in a seconds time
2339 event_add_timed(killtcp->ctdb->ev, killtcp, timeval_current_ofs(1, 0),
2340 ctdb_tickle_sentenced_connections, killtcp);
2344 destroy the killtcp structure
2346 static int ctdb_killtcp_destructor(struct ctdb_kill_tcp *killtcp)
2349 killtcp->vnn->killtcp = NULL;
2355 /* nothing fancy here, just unconditionally replace any existing
2356 connection structure with the new one.
2358 dont even free the old one if it did exist, that one is talloc_stolen
2359 by the same node in the tree anyway and will be deleted when the new data
2362 static void *add_killtcp_callback(void *parm, void *data)
2368 add a tcp socket to the list of connections we want to RST
2370 static int ctdb_killtcp_add_connection(struct ctdb_context *ctdb,
2374 ctdb_sock_addr src, dst;
2375 struct ctdb_kill_tcp *killtcp;
2376 struct ctdb_killtcp_con *con;
2377 struct ctdb_vnn *vnn;
2379 ctdb_canonicalize_ip(s, &src);
2380 ctdb_canonicalize_ip(d, &dst);
2382 vnn = find_public_ip_vnn(ctdb, &dst);
2384 vnn = find_public_ip_vnn(ctdb, &src);
2387 /* if it is not a public ip it could be our 'single ip' */
2388 if (ctdb->single_ip_vnn) {
2389 if (ctdb_same_ip(&ctdb->single_ip_vnn->public_address, &dst)) {
2390 vnn = ctdb->single_ip_vnn;
2395 DEBUG(DEBUG_ERR,(__location__ " Could not killtcp, not a public address\n"));
2399 killtcp = vnn->killtcp;
2401 /* If this is the first connection to kill we must allocate
2404 if (killtcp == NULL) {
2405 killtcp = talloc_zero(ctdb, struct ctdb_kill_tcp);
2406 CTDB_NO_MEMORY(ctdb, killtcp);
2409 killtcp->ctdb = ctdb;
2410 killtcp->capture_fd = -1;
2411 killtcp->connections = trbt_create(killtcp, 0);
2413 vnn->killtcp = killtcp;
2414 talloc_set_destructor(killtcp, ctdb_killtcp_destructor);
2419 /* create a structure that describes this connection we want to
2420 RST and store it in killtcp->connections
2422 con = talloc(killtcp, struct ctdb_killtcp_con);
2423 CTDB_NO_MEMORY(ctdb, con);
2424 con->src_addr = src;
2425 con->dst_addr = dst;
2427 con->killtcp = killtcp;
2430 trbt_insertarray32_callback(killtcp->connections,
2431 KILLTCP_KEYLEN, killtcp_key(&con->dst_addr, &con->src_addr),
2432 add_killtcp_callback, con);
2435 If we dont have a socket to listen on yet we must create it
2437 if (killtcp->capture_fd == -1) {
2438 const char *iface = ctdb_vnn_iface_string(vnn);
2439 killtcp->capture_fd = ctdb_sys_open_capture_socket(iface, &killtcp->private_data);
2440 if (killtcp->capture_fd == -1) {
2441 DEBUG(DEBUG_CRIT,(__location__ " Failed to open capturing "
2442 "socket on iface '%s' for killtcp (%s)\n",
2443 iface, strerror(errno)));
2449 if (killtcp->fde == NULL) {
2450 killtcp->fde = event_add_fd(ctdb->ev, killtcp, killtcp->capture_fd,
2452 capture_tcp_handler, killtcp);
2453 tevent_fd_set_auto_close(killtcp->fde);
2455 /* We also need to set up some events to tickle all these connections
2456 until they are all reset
2458 event_add_timed(ctdb->ev, killtcp, timeval_current_ofs(1, 0),
2459 ctdb_tickle_sentenced_connections, killtcp);
2462 /* tickle him once now */
2471 talloc_free(vnn->killtcp);
2472 vnn->killtcp = NULL;
2477 kill a TCP connection.
2479 int32_t ctdb_control_kill_tcp(struct ctdb_context *ctdb, TDB_DATA indata)
2481 struct ctdb_control_killtcp *killtcp = (struct ctdb_control_killtcp *)indata.dptr;
2483 return ctdb_killtcp_add_connection(ctdb, &killtcp->src_addr, &killtcp->dst_addr);
2487 called by a daemon to inform us of the entire list of TCP tickles for
2488 a particular public address.
2489 this control should only be sent by the node that is currently serving
2490 that public address.
2492 int32_t ctdb_control_set_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata)
2494 struct ctdb_control_tcp_tickle_list *list = (struct ctdb_control_tcp_tickle_list *)indata.dptr;
2495 struct ctdb_tcp_array *tcparray;
2496 struct ctdb_vnn *vnn;
2498 /* We must at least have tickles.num or else we cant verify the size
2499 of the received data blob
2501 if (indata.dsize < offsetof(struct ctdb_control_tcp_tickle_list,
2502 tickles.connections)) {
2503 DEBUG(DEBUG_ERR,("Bad indata in ctdb_control_set_tcp_tickle_list. Not enough data for the tickle.num field\n"));
2507 /* verify that the size of data matches what we expect */
2508 if (indata.dsize < offsetof(struct ctdb_control_tcp_tickle_list,
2509 tickles.connections)
2510 + sizeof(struct ctdb_tcp_connection)
2511 * list->tickles.num) {
2512 DEBUG(DEBUG_ERR,("Bad indata in ctdb_control_set_tcp_tickle_list\n"));
2516 vnn = find_public_ip_vnn(ctdb, &list->addr);
2518 DEBUG(DEBUG_INFO,(__location__ " Could not set tcp tickle list, '%s' is not a public address\n",
2519 ctdb_addr_to_str(&list->addr)));
2524 /* remove any old ticklelist we might have */
2525 talloc_free(vnn->tcp_array);
2526 vnn->tcp_array = NULL;
2528 tcparray = talloc(ctdb->nodes, struct ctdb_tcp_array);
2529 CTDB_NO_MEMORY(ctdb, tcparray);
2531 tcparray->num = list->tickles.num;
2533 tcparray->connections = talloc_array(tcparray, struct ctdb_tcp_connection, tcparray->num);
2534 CTDB_NO_MEMORY(ctdb, tcparray->connections);
2536 memcpy(tcparray->connections, &list->tickles.connections[0],
2537 sizeof(struct ctdb_tcp_connection)*tcparray->num);
2539 /* We now have a new fresh tickle list array for this vnn */
2540 vnn->tcp_array = talloc_steal(vnn, tcparray);
2546 called to return the full list of tickles for the puclic address associated
2547 with the provided vnn
2549 int32_t ctdb_control_get_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata)
2551 ctdb_sock_addr *addr = (ctdb_sock_addr *)indata.dptr;
2552 struct ctdb_control_tcp_tickle_list *list;
2553 struct ctdb_tcp_array *tcparray;
2555 struct ctdb_vnn *vnn;
2557 vnn = find_public_ip_vnn(ctdb, addr);
2559 DEBUG(DEBUG_ERR,(__location__ " Could not get tcp tickle list, '%s' is not a public address\n",
2560 ctdb_addr_to_str(addr)));
2565 tcparray = vnn->tcp_array;
2567 num = tcparray->num;
2572 outdata->dsize = offsetof(struct ctdb_control_tcp_tickle_list,
2573 tickles.connections)
2574 + sizeof(struct ctdb_tcp_connection) * num;
2576 outdata->dptr = talloc_size(outdata, outdata->dsize);
2577 CTDB_NO_MEMORY(ctdb, outdata->dptr);
2578 list = (struct ctdb_control_tcp_tickle_list *)outdata->dptr;
2581 list->tickles.num = num;
2583 memcpy(&list->tickles.connections[0], tcparray->connections,
2584 sizeof(struct ctdb_tcp_connection) * num);
2592 set the list of all tcp tickles for a public address
2594 static int ctdb_ctrl_set_tcp_tickles(struct ctdb_context *ctdb,
2595 struct timeval timeout, uint32_t destnode,
2596 ctdb_sock_addr *addr,
2597 struct ctdb_tcp_array *tcparray)
2601 struct ctdb_control_tcp_tickle_list *list;
2604 num = tcparray->num;
2609 data.dsize = offsetof(struct ctdb_control_tcp_tickle_list,
2610 tickles.connections) +
2611 sizeof(struct ctdb_tcp_connection) * num;
2612 data.dptr = talloc_size(ctdb, data.dsize);
2613 CTDB_NO_MEMORY(ctdb, data.dptr);
2615 list = (struct ctdb_control_tcp_tickle_list *)data.dptr;
2617 list->tickles.num = num;
2619 memcpy(&list->tickles.connections[0], tcparray->connections, sizeof(struct ctdb_tcp_connection) * num);
2622 ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0,
2623 CTDB_CONTROL_SET_TCP_TICKLE_LIST,
2624 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
2626 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set tcp tickles failed\n"));
2630 talloc_free(data.dptr);
2637 perform tickle updates if required
2639 static void ctdb_update_tcp_tickles(struct event_context *ev,
2640 struct timed_event *te,
2641 struct timeval t, void *private_data)
2643 struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
2645 struct ctdb_vnn *vnn;
2647 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2648 /* we only send out updates for public addresses that
2651 if (ctdb->pnn != vnn->pnn) {
2654 /* We only send out the updates if we need to */
2655 if (!vnn->tcp_update_needed) {
2658 ret = ctdb_ctrl_set_tcp_tickles(ctdb,
2660 CTDB_BROADCAST_CONNECTED,
2661 &vnn->public_address,
2664 DEBUG(DEBUG_ERR,("Failed to send the tickle update for public address %s\n",
2665 ctdb_addr_to_str(&vnn->public_address)));
2669 event_add_timed(ctdb->ev, ctdb->tickle_update_context,
2670 timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
2671 ctdb_update_tcp_tickles, ctdb);
2676 start periodic update of tcp tickles
2678 void ctdb_start_tcp_tickle_update(struct ctdb_context *ctdb)
2680 ctdb->tickle_update_context = talloc_new(ctdb);
2682 event_add_timed(ctdb->ev, ctdb->tickle_update_context,
2683 timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
2684 ctdb_update_tcp_tickles, ctdb);
2690 struct control_gratious_arp {
2691 struct ctdb_context *ctdb;
2692 ctdb_sock_addr addr;
2698 send a control_gratuitous arp
2700 static void send_gratious_arp(struct event_context *ev, struct timed_event *te,
2701 struct timeval t, void *private_data)
2704 struct control_gratious_arp *arp = talloc_get_type(private_data,
2705 struct control_gratious_arp);
2707 ret = ctdb_sys_send_arp(&arp->addr, arp->iface);
2709 DEBUG(DEBUG_ERR,(__location__ " sending of gratious arp on iface '%s' failed (%s)\n",
2710 arp->iface, strerror(errno)));
2715 if (arp->count == CTDB_ARP_REPEAT) {
2720 event_add_timed(arp->ctdb->ev, arp,
2721 timeval_current_ofs(CTDB_ARP_INTERVAL, 0),
2722 send_gratious_arp, arp);
2729 int32_t ctdb_control_send_gratious_arp(struct ctdb_context *ctdb, TDB_DATA indata)
2731 struct ctdb_control_gratious_arp *gratious_arp = (struct ctdb_control_gratious_arp *)indata.dptr;
2732 struct control_gratious_arp *arp;
2734 /* verify the size of indata */
2735 if (indata.dsize < offsetof(struct ctdb_control_gratious_arp, iface)) {
2736 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_gratious_arp structure. Got %u require %u bytes\n",
2737 (unsigned)indata.dsize,
2738 (unsigned)offsetof(struct ctdb_control_gratious_arp, iface)));
2742 ( offsetof(struct ctdb_control_gratious_arp, iface)
2743 + gratious_arp->len ) ){
2745 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2746 "but should be %u bytes\n",
2747 (unsigned)indata.dsize,
2748 (unsigned)(offsetof(struct ctdb_control_gratious_arp, iface)+gratious_arp->len)));
2753 arp = talloc(ctdb, struct control_gratious_arp);
2754 CTDB_NO_MEMORY(ctdb, arp);
2757 arp->addr = gratious_arp->addr;
2758 arp->iface = talloc_strdup(arp, gratious_arp->iface);
2759 CTDB_NO_MEMORY(ctdb, arp->iface);
2762 event_add_timed(arp->ctdb->ev, arp,
2763 timeval_zero(), send_gratious_arp, arp);
2768 int32_t ctdb_control_add_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2770 struct ctdb_control_ip_iface *pub = (struct ctdb_control_ip_iface *)indata.dptr;
2773 /* verify the size of indata */
2774 if (indata.dsize < offsetof(struct ctdb_control_ip_iface, iface)) {
2775 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_ip_iface structure\n"));
2779 ( offsetof(struct ctdb_control_ip_iface, iface)
2782 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2783 "but should be %u bytes\n",
2784 (unsigned)indata.dsize,
2785 (unsigned)(offsetof(struct ctdb_control_ip_iface, iface)+pub->len)));
2789 ret = ctdb_add_public_address(ctdb, &pub->addr, pub->mask, &pub->iface[0]);
2792 DEBUG(DEBUG_ERR,(__location__ " Failed to add public address\n"));
2800 called when releaseip event finishes for del_public_address
2802 static void delete_ip_callback(struct ctdb_context *ctdb, int status,
2805 talloc_free(private_data);
2808 int32_t ctdb_control_del_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2810 struct ctdb_control_ip_iface *pub = (struct ctdb_control_ip_iface *)indata.dptr;
2811 struct ctdb_vnn *vnn;
2814 /* verify the size of indata */
2815 if (indata.dsize < offsetof(struct ctdb_control_ip_iface, iface)) {
2816 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_ip_iface structure\n"));
2820 ( offsetof(struct ctdb_control_ip_iface, iface)
2823 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2824 "but should be %u bytes\n",
2825 (unsigned)indata.dsize,
2826 (unsigned)(offsetof(struct ctdb_control_ip_iface, iface)+pub->len)));
2830 /* walk over all public addresses until we find a match */
2831 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2832 if (ctdb_same_ip(&vnn->public_address, &pub->addr)) {
2833 TALLOC_CTX *mem_ctx;
2835 DLIST_REMOVE(ctdb->vnn, vnn);
2836 if (vnn->iface == NULL) {
2841 mem_ctx = talloc_new(ctdb);
2842 ret = ctdb_event_script_callback(ctdb,
2843 mem_ctx, delete_ip_callback, mem_ctx,
2845 CTDB_EVENT_RELEASE_IP,
2847 ctdb_vnn_iface_string(vnn),
2848 ctdb_addr_to_str(&vnn->public_address),
2849 vnn->public_netmask_bits);
2850 ctdb_vnn_unassign_iface(ctdb, vnn);
2862 /* This function is called from the recovery daemon to verify that a remote
2863 node has the expected ip allocation.
2864 This is verified against ctdb->ip_tree
2866 int verify_remote_ip_allocation(struct ctdb_context *ctdb, struct ctdb_all_public_ips *ips)
2868 struct ctdb_public_ip_list *tmp_ip;
2871 if (ctdb->ip_tree == NULL) {
2872 /* dont know the expected allocation yet, assume remote node
2881 for (i=0; i<ips->num; i++) {
2882 tmp_ip = trbt_lookuparray32(ctdb->ip_tree, IP_KEYLEN, ip_key(&ips->ips[i].addr));
2883 if (tmp_ip == NULL) {
2884 DEBUG(DEBUG_ERR,(__location__ " Could not find host for address %s, reassign ips\n", ctdb_addr_to_str(&ips->ips[i].addr)));
2888 if (tmp_ip->pnn == -1 || ips->ips[i].pnn == -1) {
2892 if (tmp_ip->pnn != ips->ips[i].pnn) {
2893 DEBUG(DEBUG_ERR,("Inconsistent ip allocation. Trigger reallocation. Thinks %s is held by node %u while it is held by node %u\n", ctdb_addr_to_str(&ips->ips[i].addr), ips->ips[i].pnn, tmp_ip->pnn));
2901 int update_ip_assignment_tree(struct ctdb_context *ctdb, struct ctdb_public_ip *ip)
2903 struct ctdb_public_ip_list *tmp_ip;
2905 if (ctdb->ip_tree == NULL) {
2906 DEBUG(DEBUG_ERR,("No ctdb->ip_tree yet. Failed to update ip assignment\n"));
2910 tmp_ip = trbt_lookuparray32(ctdb->ip_tree, IP_KEYLEN, ip_key(&ip->addr));
2911 if (tmp_ip == NULL) {
2912 DEBUG(DEBUG_ERR,(__location__ " Could not find record for address %s, update ip\n", ctdb_addr_to_str(&ip->addr)));
2916 DEBUG(DEBUG_NOTICE,("Updated ip assignment tree for ip : %s from node %u to node %u\n", ctdb_addr_to_str(&ip->addr), tmp_ip->pnn, ip->pnn));
2917 tmp_ip->pnn = ip->pnn;