4 Copyright (C) Ronnie Sahlberg 2007
5 Copyright (C) Andrew Tridgell 2007
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, see <http://www.gnu.org/licenses/>.
21 #include "lib/tevent/tevent.h"
22 #include "lib/tdb/include/tdb.h"
23 #include "lib/util/dlinklist.h"
24 #include "system/network.h"
25 #include "system/filesys.h"
26 #include "system/wait.h"
27 #include "../include/ctdb_private.h"
28 #include "../common/rb_tree.h"
31 #define TAKEOVER_TIMEOUT() timeval_current_ofs(ctdb->tunable.takeover_timeout,0)
33 #define CTDB_ARP_INTERVAL 1
34 #define CTDB_ARP_REPEAT 3
37 struct ctdb_iface *prev, *next;
43 static const char *ctdb_vnn_iface_string(const struct ctdb_vnn *vnn)
46 return vnn->iface->name;
52 static int ctdb_add_local_iface(struct ctdb_context *ctdb, const char *iface)
56 /* Verify that we dont have an entry for this ip yet */
57 for (i=ctdb->ifaces;i;i=i->next) {
58 if (strcmp(i->name, iface) == 0) {
63 /* create a new structure for this interface */
64 i = talloc_zero(ctdb, struct ctdb_iface);
65 CTDB_NO_MEMORY_FATAL(ctdb, i);
66 i->name = talloc_strdup(i, iface);
67 CTDB_NO_MEMORY(ctdb, i->name);
70 DLIST_ADD(ctdb->ifaces, i);
75 static struct ctdb_iface *ctdb_find_iface(struct ctdb_context *ctdb,
80 /* Verify that we dont have an entry for this ip yet */
81 for (i=ctdb->ifaces;i;i=i->next) {
82 if (strcmp(i->name, iface) == 0) {
90 static struct ctdb_iface *ctdb_vnn_best_iface(struct ctdb_context *ctdb,
94 struct ctdb_iface *cur = NULL;
95 struct ctdb_iface *best = NULL;
97 for (i=0; vnn->ifaces[i]; i++) {
99 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
113 if (cur->references < best->references) {
122 static int32_t ctdb_vnn_assign_iface(struct ctdb_context *ctdb,
123 struct ctdb_vnn *vnn)
125 struct ctdb_iface *best = NULL;
128 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
129 "still assigned to iface '%s'\n",
130 ctdb_addr_to_str(&vnn->public_address),
131 ctdb_vnn_iface_string(vnn)));
135 best = ctdb_vnn_best_iface(ctdb, vnn);
137 DEBUG(DEBUG_ERR, (__location__ " public address '%s' "
138 "cannot assign to iface any iface\n",
139 ctdb_addr_to_str(&vnn->public_address)));
145 vnn->pnn = ctdb->pnn;
147 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
148 "now assigned to iface '%s' refs[%d]\n",
149 ctdb_addr_to_str(&vnn->public_address),
150 ctdb_vnn_iface_string(vnn),
155 static void ctdb_vnn_unassign_iface(struct ctdb_context *ctdb,
156 struct ctdb_vnn *vnn)
158 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
159 "now unassigned (old iface '%s' refs[%d])\n",
160 ctdb_addr_to_str(&vnn->public_address),
161 ctdb_vnn_iface_string(vnn),
162 vnn->iface?vnn->iface->references:0));
164 vnn->iface->references--;
167 if (vnn->pnn == ctdb->pnn) {
172 static bool ctdb_vnn_available(struct ctdb_context *ctdb,
173 struct ctdb_vnn *vnn)
177 if (vnn->iface && vnn->iface->link_up) {
181 for (i=0; vnn->ifaces[i]; i++) {
182 struct ctdb_iface *cur;
184 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
197 struct ctdb_takeover_arp {
198 struct ctdb_context *ctdb;
201 struct ctdb_tcp_array *tcparray;
202 struct ctdb_vnn *vnn;
207 lists of tcp endpoints
209 struct ctdb_tcp_list {
210 struct ctdb_tcp_list *prev, *next;
211 struct ctdb_tcp_connection connection;
215 list of clients to kill on IP release
217 struct ctdb_client_ip {
218 struct ctdb_client_ip *prev, *next;
219 struct ctdb_context *ctdb;
226 send a gratuitous arp
228 static void ctdb_control_send_arp(struct event_context *ev, struct timed_event *te,
229 struct timeval t, void *private_data)
231 struct ctdb_takeover_arp *arp = talloc_get_type(private_data,
232 struct ctdb_takeover_arp);
234 struct ctdb_tcp_array *tcparray;
235 const char *iface = ctdb_vnn_iface_string(arp->vnn);
237 ret = ctdb_sys_send_arp(&arp->addr, iface);
239 DEBUG(DEBUG_CRIT,(__location__ " sending of arp failed on iface '%s' (%s)\n",
240 iface, strerror(errno)));
243 tcparray = arp->tcparray;
245 for (i=0;i<tcparray->num;i++) {
246 struct ctdb_tcp_connection *tcon;
248 tcon = &tcparray->connections[i];
249 DEBUG(DEBUG_INFO,("sending tcp tickle ack for %u->%s:%u\n",
250 (unsigned)ntohs(tcon->dst_addr.ip.sin_port),
251 ctdb_addr_to_str(&tcon->src_addr),
252 (unsigned)ntohs(tcon->src_addr.ip.sin_port)));
253 ret = ctdb_sys_send_tcp(
258 DEBUG(DEBUG_CRIT,(__location__ " Failed to send tcp tickle ack for %s\n",
259 ctdb_addr_to_str(&tcon->src_addr)));
266 if (arp->count == CTDB_ARP_REPEAT) {
271 event_add_timed(arp->ctdb->ev, arp->vnn->takeover_ctx,
272 timeval_current_ofs(CTDB_ARP_INTERVAL, 100000),
273 ctdb_control_send_arp, arp);
276 static int32_t ctdb_announce_vnn_iface(struct ctdb_context *ctdb,
277 struct ctdb_vnn *vnn)
279 struct ctdb_takeover_arp *arp;
280 struct ctdb_tcp_array *tcparray;
282 if (!vnn->takeover_ctx) {
283 vnn->takeover_ctx = talloc_new(vnn);
284 if (!vnn->takeover_ctx) {
289 arp = talloc_zero(vnn->takeover_ctx, struct ctdb_takeover_arp);
295 arp->addr = vnn->public_address;
298 tcparray = vnn->tcp_array;
300 /* add all of the known tcp connections for this IP to the
301 list of tcp connections to send tickle acks for */
302 arp->tcparray = talloc_steal(arp, tcparray);
304 vnn->tcp_array = NULL;
305 vnn->tcp_update_needed = true;
308 event_add_timed(arp->ctdb->ev, vnn->takeover_ctx,
309 timeval_zero(), ctdb_control_send_arp, arp);
314 struct takeover_callback_state {
315 struct ctdb_req_control *c;
316 ctdb_sock_addr *addr;
317 struct ctdb_vnn *vnn;
320 struct ctdb_do_takeip_state {
321 struct ctdb_req_control *c;
322 struct ctdb_vnn *vnn;
326 called when takeip event finishes
328 static void ctdb_do_takeip_callback(struct ctdb_context *ctdb, int status,
331 struct ctdb_do_takeip_state *state =
332 talloc_get_type(private_data, struct ctdb_do_takeip_state);
337 if (status == -ETIME) {
340 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
341 ctdb_addr_to_str(&state->vnn->public_address),
342 ctdb_vnn_iface_string(state->vnn)));
343 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
348 ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
350 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
355 data.dptr = (uint8_t *)ctdb_addr_to_str(&state->vnn->public_address);
356 data.dsize = strlen((char *)data.dptr) + 1;
357 DEBUG(DEBUG_INFO,(__location__ " sending TAKE_IP for '%s'\n", data.dptr));
359 ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_TAKE_IP, data);
362 /* the control succeeded */
363 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
369 take over an ip address
371 static int32_t ctdb_do_takeip(struct ctdb_context *ctdb,
372 struct ctdb_req_control *c,
373 struct ctdb_vnn *vnn)
376 struct ctdb_do_takeip_state *state;
378 ret = ctdb_vnn_assign_iface(ctdb, vnn);
380 DEBUG(DEBUG_ERR,("Takeover of IP %s/%u failed to "
381 "assin a usable interface\n",
382 ctdb_addr_to_str(&vnn->public_address),
383 vnn->public_netmask_bits));
387 state = talloc(vnn, struct ctdb_do_takeip_state);
388 CTDB_NO_MEMORY(ctdb, state);
390 state->c = talloc_steal(ctdb, c);
393 DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u on interface %s\n",
394 ctdb_addr_to_str(&vnn->public_address),
395 vnn->public_netmask_bits,
396 ctdb_vnn_iface_string(vnn)));
398 ret = ctdb_event_script_callback(ctdb,
400 ctdb_do_takeip_callback,
405 ctdb_vnn_iface_string(vnn),
406 ctdb_addr_to_str(&vnn->public_address),
407 vnn->public_netmask_bits);
410 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
411 ctdb_addr_to_str(&vnn->public_address),
412 ctdb_vnn_iface_string(vnn)));
420 struct ctdb_do_updateip_state {
421 struct ctdb_req_control *c;
422 struct ctdb_iface *old;
423 struct ctdb_vnn *vnn;
427 called when updateip event finishes
429 static void ctdb_do_updateip_callback(struct ctdb_context *ctdb, int status,
432 struct ctdb_do_updateip_state *state =
433 talloc_get_type(private_data, struct ctdb_do_updateip_state);
437 if (status == -ETIME) {
440 DEBUG(DEBUG_ERR,(__location__ " Failed to move IP %s from interface %s to %s\n",
441 ctdb_addr_to_str(&state->vnn->public_address),
443 ctdb_vnn_iface_string(state->vnn)));
446 * All we can do is reset the old interface
447 * and let the next run fix it
449 ctdb_vnn_unassign_iface(ctdb, state->vnn);
450 state->vnn->iface = state->old;
451 state->vnn->iface->references++;
453 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
458 ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
460 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
465 /* the control succeeded */
466 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
472 update (move) an ip address
474 static int32_t ctdb_do_updateip(struct ctdb_context *ctdb,
475 struct ctdb_req_control *c,
476 struct ctdb_vnn *vnn)
479 struct ctdb_do_updateip_state *state;
480 struct ctdb_iface *old = vnn->iface;
482 ctdb_vnn_unassign_iface(ctdb, vnn);
483 ret = ctdb_vnn_assign_iface(ctdb, vnn);
485 DEBUG(DEBUG_ERR,("update of IP %s/%u failed to "
486 "assin a usable interface (old iface '%s')\n",
487 ctdb_addr_to_str(&vnn->public_address),
488 vnn->public_netmask_bits,
493 if (vnn->iface == old) {
494 DEBUG(DEBUG_ERR,("update of IP %s/%u trying to "
495 "assin a same interface '%s'\n",
496 ctdb_addr_to_str(&vnn->public_address),
497 vnn->public_netmask_bits,
502 state = talloc(vnn, struct ctdb_do_updateip_state);
503 CTDB_NO_MEMORY(ctdb, state);
505 state->c = talloc_steal(ctdb, c);
509 DEBUG(DEBUG_NOTICE,("Update of IP %s/%u from "
510 "interface %s to %s\n",
511 ctdb_addr_to_str(&vnn->public_address),
512 vnn->public_netmask_bits,
514 ctdb_vnn_iface_string(vnn)));
516 ret = ctdb_event_script_callback(ctdb,
518 ctdb_do_updateip_callback,
521 CTDB_EVENT_UPDATE_IP,
524 ctdb_vnn_iface_string(vnn),
525 ctdb_addr_to_str(&vnn->public_address),
526 vnn->public_netmask_bits);
528 DEBUG(DEBUG_ERR,(__location__ " Failed update IP %s from interface %s to %s\n",
529 ctdb_addr_to_str(&vnn->public_address),
530 old->name, ctdb_vnn_iface_string(vnn)));
539 Find the vnn of the node that has a public ip address
540 returns -1 if the address is not known as a public address
542 static struct ctdb_vnn *find_public_ip_vnn(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
544 struct ctdb_vnn *vnn;
546 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
547 if (ctdb_same_ip(&vnn->public_address, addr)) {
556 take over an ip address
558 int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb,
559 struct ctdb_req_control *c,
564 struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
565 struct ctdb_vnn *vnn;
566 bool have_ip = false;
567 bool do_updateip = false;
568 bool do_takeip = false;
569 struct ctdb_iface *best_iface = NULL;
571 if (pip->pnn != ctdb->pnn) {
572 DEBUG(DEBUG_ERR,(__location__" takeoverip called for an ip '%s' "
573 "with pnn %d, but we're node %d\n",
574 ctdb_addr_to_str(&pip->addr),
575 pip->pnn, ctdb->pnn));
579 /* update out vnn list */
580 vnn = find_public_ip_vnn(ctdb, &pip->addr);
582 DEBUG(DEBUG_INFO,("takeoverip called for an ip '%s' that is not a public address\n",
583 ctdb_addr_to_str(&pip->addr)));
587 have_ip = ctdb_sys_have_ip(&pip->addr);
588 best_iface = ctdb_vnn_best_iface(ctdb, vnn);
589 if (best_iface == NULL) {
590 DEBUG(DEBUG_ERR,("takeoverip of IP %s/%u failed to find"
591 "a usable interface (old %s, have_ip %d)\n",
592 ctdb_addr_to_str(&vnn->public_address),
593 vnn->public_netmask_bits,
594 ctdb_vnn_iface_string(vnn),
599 if (vnn->iface == NULL && vnn->pnn == -1 && have_ip && best_iface != NULL) {
600 DEBUG(DEBUG_ERR,("Taking over newly created ip\n"));
604 if (vnn->iface == NULL && have_ip) {
605 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
606 "but we have no interface assigned, has someone manually configured it?"
608 ctdb_addr_to_str(&vnn->public_address)));
613 if (vnn->pnn != ctdb->pnn && have_ip) {
614 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
615 "and we have it on iface[%s], but it was assigned to node %d"
616 "and we are node %d, banning ourself\n",
617 ctdb_addr_to_str(&vnn->public_address),
618 ctdb_vnn_iface_string(vnn), vnn->pnn, ctdb->pnn));
624 if (vnn->iface->link_up) {
625 /* only move when the rebalance gains something */
626 if (vnn->iface->references > (best_iface->references + 1)) {
629 } else if (vnn->iface != best_iface) {
636 ctdb_vnn_unassign_iface(ctdb, vnn);
643 ret = ctdb_do_takeip(ctdb, c, vnn);
647 } else if (do_updateip) {
648 ret = ctdb_do_updateip(ctdb, c, vnn);
654 * The interface is up and the kernel known the ip
657 DEBUG(DEBUG_INFO,("Redundant takeover of IP %s/%u on interface %s (ip already held)\n",
658 ctdb_addr_to_str(&pip->addr),
659 vnn->public_netmask_bits,
660 ctdb_vnn_iface_string(vnn)));
664 /* tell ctdb_control.c that we will be replying asynchronously */
671 takeover an ip address old v4 style
673 int32_t ctdb_control_takeover_ipv4(struct ctdb_context *ctdb,
674 struct ctdb_req_control *c,
680 data.dsize = sizeof(struct ctdb_public_ip);
681 data.dptr = (uint8_t *)talloc_zero(c, struct ctdb_public_ip);
682 CTDB_NO_MEMORY(ctdb, data.dptr);
684 memcpy(data.dptr, indata.dptr, indata.dsize);
685 return ctdb_control_takeover_ip(ctdb, c, data, async_reply);
689 kill any clients that are registered with a IP that is being released
691 static void release_kill_clients(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
693 struct ctdb_client_ip *ip;
695 DEBUG(DEBUG_INFO,("release_kill_clients for ip %s\n",
696 ctdb_addr_to_str(addr)));
698 for (ip=ctdb->client_ip_list; ip; ip=ip->next) {
699 ctdb_sock_addr tmp_addr;
702 DEBUG(DEBUG_INFO,("checking for client %u with IP %s\n",
704 ctdb_addr_to_str(&ip->addr)));
706 if (ctdb_same_ip(&tmp_addr, addr)) {
707 struct ctdb_client *client = ctdb_reqid_find(ctdb,
710 DEBUG(DEBUG_INFO,("matched client %u with IP %s and pid %u\n",
712 ctdb_addr_to_str(&ip->addr),
715 if (client->pid != 0) {
716 DEBUG(DEBUG_INFO,(__location__ " Killing client pid %u for IP %s on client_id %u\n",
717 (unsigned)client->pid,
718 ctdb_addr_to_str(addr),
720 kill(client->pid, SIGKILL);
727 called when releaseip event finishes
729 static void release_ip_callback(struct ctdb_context *ctdb, int status,
732 struct takeover_callback_state *state =
733 talloc_get_type(private_data, struct takeover_callback_state);
736 if (status == -ETIME) {
740 /* send a message to all clients of this node telling them
741 that the cluster has been reconfigured and they should
742 release any sockets on this IP */
743 data.dptr = (uint8_t *)talloc_strdup(state, ctdb_addr_to_str(state->addr));
744 CTDB_NO_MEMORY_VOID(ctdb, data.dptr);
745 data.dsize = strlen((char *)data.dptr)+1;
747 DEBUG(DEBUG_INFO,(__location__ " sending RELEASE_IP for '%s'\n", data.dptr));
749 ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_RELEASE_IP, data);
751 /* kill clients that have registered with this IP */
752 release_kill_clients(ctdb, state->addr);
754 ctdb_vnn_unassign_iface(ctdb, state->vnn);
756 /* the control succeeded */
757 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
762 release an ip address
764 int32_t ctdb_control_release_ip(struct ctdb_context *ctdb,
765 struct ctdb_req_control *c,
770 struct takeover_callback_state *state;
771 struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
772 struct ctdb_vnn *vnn;
774 /* update our vnn list */
775 vnn = find_public_ip_vnn(ctdb, &pip->addr);
777 DEBUG(DEBUG_INFO,("releaseip called for an ip '%s' that is not a public address\n",
778 ctdb_addr_to_str(&pip->addr)));
783 /* stop any previous arps */
784 talloc_free(vnn->takeover_ctx);
785 vnn->takeover_ctx = NULL;
787 if (!ctdb_sys_have_ip(&pip->addr)) {
788 DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u on interface %s (ip not held)\n",
789 ctdb_addr_to_str(&pip->addr),
790 vnn->public_netmask_bits,
791 ctdb_vnn_iface_string(vnn)));
792 ctdb_vnn_unassign_iface(ctdb, vnn);
796 if (vnn->iface == NULL) {
797 DEBUG(DEBUG_CRIT,(__location__ " release_ip of IP %s is known to the kernel, "
798 "but we have no interface assigned, has someone manually configured it?"
800 ctdb_addr_to_str(&vnn->public_address)));
805 DEBUG(DEBUG_NOTICE,("Release of IP %s/%u on interface %s node:%d\n",
806 ctdb_addr_to_str(&pip->addr),
807 vnn->public_netmask_bits,
808 ctdb_vnn_iface_string(vnn),
811 state = talloc(ctdb, struct takeover_callback_state);
812 CTDB_NO_MEMORY(ctdb, state);
814 state->c = talloc_steal(state, c);
815 state->addr = talloc(state, ctdb_sock_addr);
816 CTDB_NO_MEMORY(ctdb, state->addr);
817 *state->addr = pip->addr;
820 ret = ctdb_event_script_callback(ctdb,
821 state, release_ip_callback, state,
823 CTDB_EVENT_RELEASE_IP,
825 ctdb_vnn_iface_string(vnn),
826 ctdb_addr_to_str(&pip->addr),
827 vnn->public_netmask_bits);
829 DEBUG(DEBUG_ERR,(__location__ " Failed to release IP %s on interface %s\n",
830 ctdb_addr_to_str(&pip->addr),
831 ctdb_vnn_iface_string(vnn)));
836 /* tell the control that we will be reply asynchronously */
842 release an ip address old v4 style
844 int32_t ctdb_control_release_ipv4(struct ctdb_context *ctdb,
845 struct ctdb_req_control *c,
851 data.dsize = sizeof(struct ctdb_public_ip);
852 data.dptr = (uint8_t *)talloc_zero(c, struct ctdb_public_ip);
853 CTDB_NO_MEMORY(ctdb, data.dptr);
855 memcpy(data.dptr, indata.dptr, indata.dsize);
856 return ctdb_control_release_ip(ctdb, c, data, async_reply);
860 static int ctdb_add_public_address(struct ctdb_context *ctdb,
861 ctdb_sock_addr *addr,
862 unsigned mask, const char *ifaces)
864 struct ctdb_vnn *vnn;
871 /* Verify that we dont have an entry for this ip yet */
872 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
873 if (ctdb_same_sockaddr(addr, &vnn->public_address)) {
874 DEBUG(DEBUG_CRIT,("Same ip '%s' specified multiple times in the public address list \n",
875 ctdb_addr_to_str(addr)));
880 /* create a new vnn structure for this ip address */
881 vnn = talloc_zero(ctdb, struct ctdb_vnn);
882 CTDB_NO_MEMORY_FATAL(ctdb, vnn);
883 vnn->ifaces = talloc_array(vnn, const char *, num + 2);
884 tmp = talloc_strdup(vnn, ifaces);
885 CTDB_NO_MEMORY_FATAL(ctdb, tmp);
886 for (iface = strtok(tmp, ","); iface; iface = strtok(NULL, ",")) {
887 vnn->ifaces = talloc_realloc(vnn, vnn->ifaces, const char *, num + 2);
888 CTDB_NO_MEMORY_FATAL(ctdb, vnn->ifaces);
889 vnn->ifaces[num] = talloc_strdup(vnn, iface);
890 CTDB_NO_MEMORY_FATAL(ctdb, vnn->ifaces[num]);
894 vnn->ifaces[num] = NULL;
895 vnn->public_address = *addr;
896 vnn->public_netmask_bits = mask;
899 for (i=0; vnn->ifaces[i]; i++) {
900 ret = ctdb_add_local_iface(ctdb, vnn->ifaces[i]);
902 DEBUG(DEBUG_CRIT, (__location__ " failed to add iface[%s] "
903 "for public_address[%s]\n",
904 vnn->ifaces[i], ctdb_addr_to_str(addr)));
910 DLIST_ADD(ctdb->vnn, vnn);
916 setup the event script directory
918 int ctdb_set_event_script_dir(struct ctdb_context *ctdb, const char *script_dir)
920 ctdb->event_script_dir = talloc_strdup(ctdb, script_dir);
921 CTDB_NO_MEMORY(ctdb, ctdb->event_script_dir);
926 setup the public address lists from a file
928 int ctdb_set_public_addresses(struct ctdb_context *ctdb, const char *alist)
934 lines = file_lines_load(alist, &nlines, ctdb);
936 ctdb_set_error(ctdb, "Failed to load public address list '%s'\n", alist);
939 while (nlines > 0 && strcmp(lines[nlines-1], "") == 0) {
943 for (i=0;i<nlines;i++) {
951 while ((*line == ' ') || (*line == '\t')) {
957 if (strcmp(line, "") == 0) {
960 tok = strtok(line, " \t");
962 tok = strtok(NULL, " \t");
964 if (NULL == ctdb->default_public_interface) {
965 DEBUG(DEBUG_CRIT,("No default public interface and no interface specified at line %u of public address list\n",
970 ifaces = ctdb->default_public_interface;
975 if (!addrstr || !parse_ip_mask(addrstr, ifaces, &addr, &mask)) {
976 DEBUG(DEBUG_CRIT,("Badly formed line %u in public address list\n", i+1));
980 if (ctdb_add_public_address(ctdb, &addr, mask, ifaces)) {
981 DEBUG(DEBUG_CRIT,("Failed to add line %u to the public address list\n", i+1));
991 int ctdb_set_single_public_ip(struct ctdb_context *ctdb,
995 struct ctdb_vnn *svnn;
999 svnn = talloc_zero(ctdb, struct ctdb_vnn);
1000 CTDB_NO_MEMORY(ctdb, svnn);
1002 svnn->ifaces = talloc_array(svnn, const char *, 2);
1003 CTDB_NO_MEMORY(ctdb, svnn->ifaces);
1004 svnn->ifaces[0] = talloc_strdup(svnn->ifaces, iface);
1005 CTDB_NO_MEMORY(ctdb, svnn->ifaces[0]);
1006 svnn->ifaces[1] = NULL;
1008 ok = parse_ip(ip, iface, 0, &svnn->public_address);
1014 ret = ctdb_add_local_iface(ctdb, svnn->ifaces[0]);
1016 DEBUG(DEBUG_CRIT, (__location__ " failed to add iface[%s] "
1017 "for single_ip[%s]\n",
1019 ctdb_addr_to_str(&svnn->public_address)));
1024 ret = ctdb_vnn_assign_iface(ctdb, svnn);
1030 ctdb->single_ip_vnn = svnn;
1034 struct ctdb_public_ip_list {
1035 struct ctdb_public_ip_list *next;
1037 ctdb_sock_addr addr;
1041 /* Given a physical node, return the number of
1042 public addresses that is currently assigned to this node.
1044 static int node_ip_coverage(struct ctdb_context *ctdb,
1046 struct ctdb_public_ip_list *ips)
1050 for (;ips;ips=ips->next) {
1051 if (ips->pnn == pnn) {
1059 /* Check if this is a public ip known to the node, i.e. can that
1060 node takeover this ip ?
1062 static int can_node_serve_ip(struct ctdb_context *ctdb, int32_t pnn,
1063 struct ctdb_public_ip_list *ip)
1065 struct ctdb_all_public_ips *public_ips;
1068 public_ips = ctdb->nodes[pnn]->available_public_ips;
1070 if (public_ips == NULL) {
1074 for (i=0;i<public_ips->num;i++) {
1075 if (ctdb_same_ip(&ip->addr, &public_ips->ips[i].addr)) {
1076 /* yes, this node can serve this public ip */
1085 /* search the node lists list for a node to takeover this ip.
1086 pick the node that currently are serving the least number of ips
1087 so that the ips get spread out evenly.
1089 static int find_takeover_node(struct ctdb_context *ctdb,
1090 struct ctdb_node_map *nodemap, uint32_t mask,
1091 struct ctdb_public_ip_list *ip,
1092 struct ctdb_public_ip_list *all_ips)
1094 int pnn, min=0, num;
1098 for (i=0;i<nodemap->num;i++) {
1099 if (nodemap->nodes[i].flags & mask) {
1100 /* This node is not healty and can not be used to serve
1106 /* verify that this node can serve this ip */
1107 if (can_node_serve_ip(ctdb, i, ip)) {
1108 /* no it couldnt so skip to the next node */
1112 num = node_ip_coverage(ctdb, i, all_ips);
1113 /* was this the first node we checked ? */
1125 DEBUG(DEBUG_WARNING,(__location__ " Could not find node to take over public address '%s'\n",
1126 ctdb_addr_to_str(&ip->addr)));
1136 static uint32_t *ip_key(ctdb_sock_addr *ip)
1138 static uint32_t key[IP_KEYLEN];
1140 bzero(key, sizeof(key));
1142 switch (ip->sa.sa_family) {
1144 key[3] = htonl(ip->ip.sin_addr.s_addr);
1147 key[0] = htonl(ip->ip6.sin6_addr.s6_addr32[0]);
1148 key[1] = htonl(ip->ip6.sin6_addr.s6_addr32[1]);
1149 key[2] = htonl(ip->ip6.sin6_addr.s6_addr32[2]);
1150 key[3] = htonl(ip->ip6.sin6_addr.s6_addr32[3]);
1153 DEBUG(DEBUG_ERR, (__location__ " ERROR, unknown family passed :%u\n", ip->sa.sa_family));
1160 static void *add_ip_callback(void *parm, void *data)
1165 void getips_count_callback(void *param, void *data)
1167 struct ctdb_public_ip_list **ip_list = (struct ctdb_public_ip_list **)param;
1168 struct ctdb_public_ip_list *new_ip = (struct ctdb_public_ip_list *)data;
1170 new_ip->next = *ip_list;
1174 static struct ctdb_public_ip_list *
1175 create_merged_ip_list(struct ctdb_context *ctdb)
1178 struct ctdb_public_ip_list *ip_list;
1179 struct ctdb_all_public_ips *public_ips;
1181 if (ctdb->ip_tree != NULL) {
1182 talloc_free(ctdb->ip_tree);
1183 ctdb->ip_tree = NULL;
1185 ctdb->ip_tree = trbt_create(ctdb, 0);
1187 for (i=0;i<ctdb->num_nodes;i++) {
1188 public_ips = ctdb->nodes[i]->known_public_ips;
1190 if (ctdb->nodes[i]->flags & NODE_FLAGS_DELETED) {
1194 /* there were no public ips for this node */
1195 if (public_ips == NULL) {
1199 for (j=0;j<public_ips->num;j++) {
1200 struct ctdb_public_ip_list *tmp_ip;
1202 tmp_ip = talloc_zero(ctdb->ip_tree, struct ctdb_public_ip_list);
1203 CTDB_NO_MEMORY_NULL(ctdb, tmp_ip);
1204 tmp_ip->pnn = public_ips->ips[j].pnn;
1205 tmp_ip->addr = public_ips->ips[j].addr;
1206 tmp_ip->next = NULL;
1208 trbt_insertarray32_callback(ctdb->ip_tree,
1209 IP_KEYLEN, ip_key(&public_ips->ips[j].addr),
1216 trbt_traversearray32(ctdb->ip_tree, IP_KEYLEN, getips_count_callback, &ip_list);
1222 make any IP alias changes for public addresses that are necessary
1224 int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap)
1226 int i, num_healthy, retries;
1227 struct ctdb_public_ip ip;
1228 struct ctdb_public_ipv4 ipv4;
1229 uint32_t mask, *nodes;
1230 struct ctdb_public_ip_list *all_ips, *tmp_ip;
1231 int maxnode, maxnum=0, minnode, minnum=0, num;
1233 struct timeval timeout;
1234 struct client_async_data *async_data;
1235 struct ctdb_client_control_state *state;
1236 TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
1241 /* Count how many completely healthy nodes we have */
1243 for (i=0;i<nodemap->num;i++) {
1244 if (!(nodemap->nodes[i].flags & (NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED))) {
1249 if (num_healthy > 0) {
1250 /* We have healthy nodes, so only consider them for
1251 serving public addresses
1253 mask = NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED;
1255 /* We didnt have any completely healthy nodes so
1256 use "disabled" nodes as a fallback
1258 mask = NODE_FLAGS_INACTIVE;
1261 /* since nodes only know about those public addresses that
1262 can be served by that particular node, no single node has
1263 a full list of all public addresses that exist in the cluster.
1264 Walk over all node structures and create a merged list of
1265 all public addresses that exist in the cluster.
1267 keep the tree of ips around as ctdb->ip_tree
1269 all_ips = create_merged_ip_list(ctdb);
1271 /* If we want deterministic ip allocations, i.e. that the ip addresses
1272 will always be allocated the same way for a specific set of
1273 available/unavailable nodes.
1275 if (1 == ctdb->tunable.deterministic_public_ips) {
1276 DEBUG(DEBUG_NOTICE,("Deterministic IPs enabled. Resetting all ip allocations\n"));
1277 for (i=0,tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next,i++) {
1278 tmp_ip->pnn = i%nodemap->num;
1283 /* mark all public addresses with a masked node as being served by
1286 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1287 if (tmp_ip->pnn == -1) {
1290 if (nodemap->nodes[tmp_ip->pnn].flags & mask) {
1295 /* verify that the assigned nodes can serve that public ip
1296 and set it to -1 if not
1298 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1299 if (tmp_ip->pnn == -1) {
1302 if (can_node_serve_ip(ctdb, tmp_ip->pnn, tmp_ip) != 0) {
1303 /* this node can not serve this ip. */
1309 /* now we must redistribute all public addresses with takeover node
1310 -1 among the nodes available
1314 /* loop over all ip's and find a physical node to cover for
1317 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1318 if (tmp_ip->pnn == -1) {
1319 if (find_takeover_node(ctdb, nodemap, mask, tmp_ip, all_ips)) {
1320 DEBUG(DEBUG_WARNING,("Failed to find node to cover ip %s\n",
1321 ctdb_addr_to_str(&tmp_ip->addr)));
1326 /* If we dont want ips to fail back after a node becomes healthy
1327 again, we wont even try to reallocat the ip addresses so that
1328 they are evenly spread out.
1329 This can NOT be used at the same time as DeterministicIPs !
1331 if (1 == ctdb->tunable.no_ip_failback) {
1332 if (1 == ctdb->tunable.deterministic_public_ips) {
1333 DEBUG(DEBUG_ERR, ("ERROR: You can not use 'DeterministicIPs' and 'NoIPFailback' at the same time\n"));
1339 /* now, try to make sure the ip adresses are evenly distributed
1341 for each ip address, loop over all nodes that can serve this
1342 ip and make sure that the difference between the node
1343 serving the most and the node serving the least ip's are not greater
1346 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1347 if (tmp_ip->pnn == -1) {
1351 /* Get the highest and lowest number of ips's served by any
1352 valid node which can serve this ip.
1356 for (i=0;i<nodemap->num;i++) {
1357 if (nodemap->nodes[i].flags & mask) {
1361 /* only check nodes that can actually serve this ip */
1362 if (can_node_serve_ip(ctdb, i, tmp_ip)) {
1363 /* no it couldnt so skip to the next node */
1367 num = node_ip_coverage(ctdb, i, all_ips);
1368 if (maxnode == -1) {
1377 if (minnode == -1) {
1387 if (maxnode == -1) {
1388 DEBUG(DEBUG_WARNING,(__location__ " Could not find maxnode. May not be able to serve ip '%s'\n",
1389 ctdb_addr_to_str(&tmp_ip->addr)));
1394 /* If we want deterministic IPs then dont try to reallocate
1395 them to spread out the load.
1397 if (1 == ctdb->tunable.deterministic_public_ips) {
1401 /* if the spread between the smallest and largest coverage by
1402 a node is >=2 we steal one of the ips from the node with
1403 most coverage to even things out a bit.
1404 try to do this at most 5 times since we dont want to spend
1405 too much time balancing the ip coverage.
1407 if ( (maxnum > minnum+1)
1409 struct ctdb_public_ip_list *tmp;
1411 /* mark one of maxnode's vnn's as unassigned and try
1414 for (tmp=all_ips;tmp;tmp=tmp->next) {
1415 if (tmp->pnn == maxnode) {
1425 /* finished distributing the public addresses, now just send the
1426 info out to the nodes
1430 /* at this point ->pnn is the node which will own each IP
1431 or -1 if there is no node that can cover this ip
1434 /* now tell all nodes to delete any alias that they should not
1435 have. This will be a NOOP on nodes that don't currently
1436 hold the given alias */
1437 async_data = talloc_zero(tmp_ctx, struct client_async_data);
1438 CTDB_NO_MEMORY_FATAL(ctdb, async_data);
1440 for (i=0;i<nodemap->num;i++) {
1441 /* don't talk to unconnected nodes, but do talk to banned nodes */
1442 if (nodemap->nodes[i].flags & NODE_FLAGS_DISCONNECTED) {
1446 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1447 if (tmp_ip->pnn == nodemap->nodes[i].pnn) {
1448 /* This node should be serving this
1449 vnn so dont tell it to release the ip
1453 if (tmp_ip->addr.sa.sa_family == AF_INET) {
1454 ipv4.pnn = tmp_ip->pnn;
1455 ipv4.sin = tmp_ip->addr.ip;
1457 timeout = TAKEOVER_TIMEOUT();
1458 data.dsize = sizeof(ipv4);
1459 data.dptr = (uint8_t *)&ipv4;
1460 state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
1461 0, CTDB_CONTROL_RELEASE_IPv4, 0,
1465 ip.pnn = tmp_ip->pnn;
1466 ip.addr = tmp_ip->addr;
1468 timeout = TAKEOVER_TIMEOUT();
1469 data.dsize = sizeof(ip);
1470 data.dptr = (uint8_t *)&ip;
1471 state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
1472 0, CTDB_CONTROL_RELEASE_IP, 0,
1477 if (state == NULL) {
1478 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_RELEASE_IP to node %u\n", nodemap->nodes[i].pnn));
1479 talloc_free(tmp_ctx);
1483 ctdb_client_async_add(async_data, state);
1486 if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1487 DEBUG(DEBUG_ERR,(__location__ " Async control CTDB_CONTROL_RELEASE_IP failed\n"));
1488 talloc_free(tmp_ctx);
1491 talloc_free(async_data);
1494 /* tell all nodes to get their own IPs */
1495 async_data = talloc_zero(tmp_ctx, struct client_async_data);
1496 CTDB_NO_MEMORY_FATAL(ctdb, async_data);
1497 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1498 if (tmp_ip->pnn == -1) {
1499 /* this IP won't be taken over */
1503 if (tmp_ip->addr.sa.sa_family == AF_INET) {
1504 ipv4.pnn = tmp_ip->pnn;
1505 ipv4.sin = tmp_ip->addr.ip;
1507 timeout = TAKEOVER_TIMEOUT();
1508 data.dsize = sizeof(ipv4);
1509 data.dptr = (uint8_t *)&ipv4;
1510 state = ctdb_control_send(ctdb, tmp_ip->pnn,
1511 0, CTDB_CONTROL_TAKEOVER_IPv4, 0,
1515 ip.pnn = tmp_ip->pnn;
1516 ip.addr = tmp_ip->addr;
1518 timeout = TAKEOVER_TIMEOUT();
1519 data.dsize = sizeof(ip);
1520 data.dptr = (uint8_t *)&ip;
1521 state = ctdb_control_send(ctdb, tmp_ip->pnn,
1522 0, CTDB_CONTROL_TAKEOVER_IP, 0,
1526 if (state == NULL) {
1527 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_TAKEOVER_IP to node %u\n", tmp_ip->pnn));
1528 talloc_free(tmp_ctx);
1532 ctdb_client_async_add(async_data, state);
1534 if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1535 DEBUG(DEBUG_ERR,(__location__ " Async control CTDB_CONTROL_TAKEOVER_IP failed\n"));
1536 talloc_free(tmp_ctx);
1540 /* tell all nodes to update natwg */
1541 /* send the flags update natgw on all connected nodes */
1542 data.dptr = discard_const("ipreallocated");
1543 data.dsize = strlen((char *)data.dptr) + 1;
1544 nodes = list_of_connected_nodes(ctdb, nodemap, tmp_ctx, true);
1545 if (ctdb_client_async_control(ctdb, CTDB_CONTROL_RUN_EVENTSCRIPTS,
1546 nodes, 0, TAKEOVER_TIMEOUT(),
1550 DEBUG(DEBUG_ERR, (__location__ " ctdb_control to updatenatgw failed\n"));
1553 talloc_free(tmp_ctx);
1559 destroy a ctdb_client_ip structure
1561 static int ctdb_client_ip_destructor(struct ctdb_client_ip *ip)
1563 DEBUG(DEBUG_DEBUG,("destroying client tcp for %s:%u (client_id %u)\n",
1564 ctdb_addr_to_str(&ip->addr),
1565 ntohs(ip->addr.ip.sin_port),
1568 DLIST_REMOVE(ip->ctdb->client_ip_list, ip);
1573 called by a client to inform us of a TCP connection that it is managing
1574 that should tickled with an ACK when IP takeover is done
1575 we handle both the old ipv4 style of packets as well as the new ipv4/6
1578 int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id,
1581 struct ctdb_client *client = ctdb_reqid_find(ctdb, client_id, struct ctdb_client);
1582 struct ctdb_control_tcp *old_addr = NULL;
1583 struct ctdb_control_tcp_addr new_addr;
1584 struct ctdb_control_tcp_addr *tcp_sock = NULL;
1585 struct ctdb_tcp_list *tcp;
1586 struct ctdb_tcp_connection t;
1589 struct ctdb_client_ip *ip;
1590 struct ctdb_vnn *vnn;
1591 ctdb_sock_addr addr;
1593 switch (indata.dsize) {
1594 case sizeof(struct ctdb_control_tcp):
1595 old_addr = (struct ctdb_control_tcp *)indata.dptr;
1596 ZERO_STRUCT(new_addr);
1597 tcp_sock = &new_addr;
1598 tcp_sock->src.ip = old_addr->src;
1599 tcp_sock->dest.ip = old_addr->dest;
1601 case sizeof(struct ctdb_control_tcp_addr):
1602 tcp_sock = (struct ctdb_control_tcp_addr *)indata.dptr;
1605 DEBUG(DEBUG_ERR,(__location__ " Invalid data structure passed "
1606 "to ctdb_control_tcp_client. size was %d but "
1607 "only allowed sizes are %lu and %lu\n",
1609 (long unsigned)sizeof(struct ctdb_control_tcp),
1610 (long unsigned)sizeof(struct ctdb_control_tcp_addr)));
1614 addr = tcp_sock->src;
1615 ctdb_canonicalize_ip(&addr, &tcp_sock->src);
1616 addr = tcp_sock->dest;
1617 ctdb_canonicalize_ip(&addr, &tcp_sock->dest);
1620 memcpy(&addr, &tcp_sock->dest, sizeof(addr));
1621 vnn = find_public_ip_vnn(ctdb, &addr);
1623 switch (addr.sa.sa_family) {
1625 if (ntohl(addr.ip.sin_addr.s_addr) != INADDR_LOOPBACK) {
1626 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public address.\n",
1627 ctdb_addr_to_str(&addr)));
1631 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public ipv6 address.\n",
1632 ctdb_addr_to_str(&addr)));
1635 DEBUG(DEBUG_ERR,(__location__ " Unknown family type %d\n", addr.sa.sa_family));
1641 if (vnn->pnn != ctdb->pnn) {
1642 DEBUG(DEBUG_ERR,("Attempt to register tcp client for IP %s we don't hold - failing (client_id %u pid %u)\n",
1643 ctdb_addr_to_str(&addr),
1644 client_id, client->pid));
1645 /* failing this call will tell smbd to die */
1649 ip = talloc(client, struct ctdb_client_ip);
1650 CTDB_NO_MEMORY(ctdb, ip);
1654 ip->client_id = client_id;
1655 talloc_set_destructor(ip, ctdb_client_ip_destructor);
1656 DLIST_ADD(ctdb->client_ip_list, ip);
1658 tcp = talloc(client, struct ctdb_tcp_list);
1659 CTDB_NO_MEMORY(ctdb, tcp);
1661 tcp->connection.src_addr = tcp_sock->src;
1662 tcp->connection.dst_addr = tcp_sock->dest;
1664 DLIST_ADD(client->tcp_list, tcp);
1666 t.src_addr = tcp_sock->src;
1667 t.dst_addr = tcp_sock->dest;
1669 data.dptr = (uint8_t *)&t;
1670 data.dsize = sizeof(t);
1672 switch (addr.sa.sa_family) {
1674 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1675 (unsigned)ntohs(tcp_sock->dest.ip.sin_port),
1676 ctdb_addr_to_str(&tcp_sock->src),
1677 (unsigned)ntohs(tcp_sock->src.ip.sin_port), client_id, client->pid));
1680 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1681 (unsigned)ntohs(tcp_sock->dest.ip6.sin6_port),
1682 ctdb_addr_to_str(&tcp_sock->src),
1683 (unsigned)ntohs(tcp_sock->src.ip6.sin6_port), client_id, client->pid));
1686 DEBUG(DEBUG_ERR,(__location__ " Unknown family %d\n", addr.sa.sa_family));
1690 /* tell all nodes about this tcp connection */
1691 ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0,
1692 CTDB_CONTROL_TCP_ADD,
1693 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
1695 DEBUG(DEBUG_ERR,(__location__ " Failed to send CTDB_CONTROL_TCP_ADD\n"));
1703 find a tcp address on a list
1705 static struct ctdb_tcp_connection *ctdb_tcp_find(struct ctdb_tcp_array *array,
1706 struct ctdb_tcp_connection *tcp)
1710 if (array == NULL) {
1714 for (i=0;i<array->num;i++) {
1715 if (ctdb_same_sockaddr(&array->connections[i].src_addr, &tcp->src_addr) &&
1716 ctdb_same_sockaddr(&array->connections[i].dst_addr, &tcp->dst_addr)) {
1717 return &array->connections[i];
1726 called by a daemon to inform us of a TCP connection that one of its
1727 clients managing that should tickled with an ACK when IP takeover is
1730 int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata, bool tcp_update_needed)
1732 struct ctdb_tcp_connection *p = (struct ctdb_tcp_connection *)indata.dptr;
1733 struct ctdb_tcp_array *tcparray;
1734 struct ctdb_tcp_connection tcp;
1735 struct ctdb_vnn *vnn;
1737 vnn = find_public_ip_vnn(ctdb, &p->dst_addr);
1739 DEBUG(DEBUG_INFO,(__location__ " got TCP_ADD control for an address which is not a public address '%s'\n",
1740 ctdb_addr_to_str(&p->dst_addr)));
1746 tcparray = vnn->tcp_array;
1748 /* If this is the first tickle */
1749 if (tcparray == NULL) {
1750 tcparray = talloc_size(ctdb->nodes,
1751 offsetof(struct ctdb_tcp_array, connections) +
1752 sizeof(struct ctdb_tcp_connection) * 1);
1753 CTDB_NO_MEMORY(ctdb, tcparray);
1754 vnn->tcp_array = tcparray;
1757 tcparray->connections = talloc_size(tcparray, sizeof(struct ctdb_tcp_connection));
1758 CTDB_NO_MEMORY(ctdb, tcparray->connections);
1760 tcparray->connections[tcparray->num].src_addr = p->src_addr;
1761 tcparray->connections[tcparray->num].dst_addr = p->dst_addr;
1764 if (tcp_update_needed) {
1765 vnn->tcp_update_needed = true;
1771 /* Do we already have this tickle ?*/
1772 tcp.src_addr = p->src_addr;
1773 tcp.dst_addr = p->dst_addr;
1774 if (ctdb_tcp_find(vnn->tcp_array, &tcp) != NULL) {
1775 DEBUG(DEBUG_DEBUG,("Already had tickle info for %s:%u for vnn:%u\n",
1776 ctdb_addr_to_str(&tcp.dst_addr),
1777 ntohs(tcp.dst_addr.ip.sin_port),
1782 /* A new tickle, we must add it to the array */
1783 tcparray->connections = talloc_realloc(tcparray, tcparray->connections,
1784 struct ctdb_tcp_connection,
1786 CTDB_NO_MEMORY(ctdb, tcparray->connections);
1788 vnn->tcp_array = tcparray;
1789 tcparray->connections[tcparray->num].src_addr = p->src_addr;
1790 tcparray->connections[tcparray->num].dst_addr = p->dst_addr;
1793 DEBUG(DEBUG_INFO,("Added tickle info for %s:%u from vnn %u\n",
1794 ctdb_addr_to_str(&tcp.dst_addr),
1795 ntohs(tcp.dst_addr.ip.sin_port),
1798 if (tcp_update_needed) {
1799 vnn->tcp_update_needed = true;
1807 called by a daemon to inform us of a TCP connection that one of its
1808 clients managing that should tickled with an ACK when IP takeover is
1811 static void ctdb_remove_tcp_connection(struct ctdb_context *ctdb, struct ctdb_tcp_connection *conn)
1813 struct ctdb_tcp_connection *tcpp;
1814 struct ctdb_vnn *vnn = find_public_ip_vnn(ctdb, &conn->dst_addr);
1817 DEBUG(DEBUG_ERR,(__location__ " unable to find public address %s\n",
1818 ctdb_addr_to_str(&conn->dst_addr)));
1822 /* if the array is empty we cant remove it
1823 and we dont need to do anything
1825 if (vnn->tcp_array == NULL) {
1826 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist (array is empty) %s:%u\n",
1827 ctdb_addr_to_str(&conn->dst_addr),
1828 ntohs(conn->dst_addr.ip.sin_port)));
1833 /* See if we know this connection
1834 if we dont know this connection then we dont need to do anything
1836 tcpp = ctdb_tcp_find(vnn->tcp_array, conn);
1838 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist %s:%u\n",
1839 ctdb_addr_to_str(&conn->dst_addr),
1840 ntohs(conn->dst_addr.ip.sin_port)));
1845 /* We need to remove this entry from the array.
1846 Instead of allocating a new array and copying data to it
1847 we cheat and just copy the last entry in the existing array
1848 to the entry that is to be removed and just shring the
1851 *tcpp = vnn->tcp_array->connections[vnn->tcp_array->num - 1];
1852 vnn->tcp_array->num--;
1854 /* If we deleted the last entry we also need to remove the entire array
1856 if (vnn->tcp_array->num == 0) {
1857 talloc_free(vnn->tcp_array);
1858 vnn->tcp_array = NULL;
1861 vnn->tcp_update_needed = true;
1863 DEBUG(DEBUG_INFO,("Removed tickle info for %s:%u\n",
1864 ctdb_addr_to_str(&conn->src_addr),
1865 ntohs(conn->src_addr.ip.sin_port)));
1870 called by a daemon to inform us of a TCP connection that one of its
1871 clients used are no longer needed in the tickle database
1873 int32_t ctdb_control_tcp_remove(struct ctdb_context *ctdb, TDB_DATA indata)
1875 struct ctdb_tcp_connection *conn = (struct ctdb_tcp_connection *)indata.dptr;
1877 ctdb_remove_tcp_connection(ctdb, conn);
1884 called when a daemon restarts - send all tickes for all public addresses
1885 we are serving immediately to the new node.
1887 int32_t ctdb_control_startup(struct ctdb_context *ctdb, uint32_t vnn)
1889 /*XXX here we should send all tickes we are serving to the new node */
1895 called when a client structure goes away - hook to remove
1896 elements from the tcp_list in all daemons
1898 void ctdb_takeover_client_destructor_hook(struct ctdb_client *client)
1900 while (client->tcp_list) {
1901 struct ctdb_tcp_list *tcp = client->tcp_list;
1902 DLIST_REMOVE(client->tcp_list, tcp);
1903 ctdb_remove_tcp_connection(client->ctdb, &tcp->connection);
1909 release all IPs on shutdown
1911 void ctdb_release_all_ips(struct ctdb_context *ctdb)
1913 struct ctdb_vnn *vnn;
1915 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1916 if (!ctdb_sys_have_ip(&vnn->public_address)) {
1917 ctdb_vnn_unassign_iface(ctdb, vnn);
1923 ctdb_event_script_args(ctdb, CTDB_EVENT_RELEASE_IP, "%s %s %u",
1924 ctdb_vnn_iface_string(vnn),
1925 ctdb_addr_to_str(&vnn->public_address),
1926 vnn->public_netmask_bits);
1927 release_kill_clients(ctdb, &vnn->public_address);
1928 ctdb_vnn_unassign_iface(ctdb, vnn);
1934 get list of public IPs
1936 int32_t ctdb_control_get_public_ips(struct ctdb_context *ctdb,
1937 struct ctdb_req_control *c, TDB_DATA *outdata)
1940 struct ctdb_all_public_ips *ips;
1941 struct ctdb_vnn *vnn;
1942 bool only_available = false;
1944 if (c->flags & CTDB_PUBLIC_IP_FLAGS_ONLY_AVAILABLE) {
1945 only_available = true;
1948 /* count how many public ip structures we have */
1950 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1954 len = offsetof(struct ctdb_all_public_ips, ips) +
1955 num*sizeof(struct ctdb_public_ip);
1956 ips = talloc_zero_size(outdata, len);
1957 CTDB_NO_MEMORY(ctdb, ips);
1960 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1961 if (only_available && !ctdb_vnn_available(ctdb, vnn)) {
1964 ips->ips[i].pnn = vnn->pnn;
1965 ips->ips[i].addr = vnn->public_address;
1969 len = offsetof(struct ctdb_all_public_ips, ips) +
1970 i*sizeof(struct ctdb_public_ip);
1972 outdata->dsize = len;
1973 outdata->dptr = (uint8_t *)ips;
1980 get list of public IPs, old ipv4 style. only returns ipv4 addresses
1982 int32_t ctdb_control_get_public_ipsv4(struct ctdb_context *ctdb,
1983 struct ctdb_req_control *c, TDB_DATA *outdata)
1986 struct ctdb_all_public_ipsv4 *ips;
1987 struct ctdb_vnn *vnn;
1989 /* count how many public ip structures we have */
1991 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1992 if (vnn->public_address.sa.sa_family != AF_INET) {
1998 len = offsetof(struct ctdb_all_public_ipsv4, ips) +
1999 num*sizeof(struct ctdb_public_ipv4);
2000 ips = talloc_zero_size(outdata, len);
2001 CTDB_NO_MEMORY(ctdb, ips);
2003 outdata->dsize = len;
2004 outdata->dptr = (uint8_t *)ips;
2008 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2009 if (vnn->public_address.sa.sa_family != AF_INET) {
2012 ips->ips[i].pnn = vnn->pnn;
2013 ips->ips[i].sin = vnn->public_address.ip;
2020 int32_t ctdb_control_get_public_ip_info(struct ctdb_context *ctdb,
2021 struct ctdb_req_control *c,
2026 ctdb_sock_addr *addr;
2027 struct ctdb_control_public_ip_info *info;
2028 struct ctdb_vnn *vnn;
2030 addr = (ctdb_sock_addr *)indata.dptr;
2032 vnn = find_public_ip_vnn(ctdb, addr);
2034 /* if it is not a public ip it could be our 'single ip' */
2035 if (ctdb->single_ip_vnn) {
2036 if (ctdb_same_ip(&ctdb->single_ip_vnn->public_address, addr)) {
2037 vnn = ctdb->single_ip_vnn;
2042 DEBUG(DEBUG_ERR,(__location__ " Could not get public ip info, "
2043 "'%s'not a public address\n",
2044 ctdb_addr_to_str(addr)));
2048 /* count how many public ip structures we have */
2050 for (;vnn->ifaces[num];) {
2054 len = offsetof(struct ctdb_control_public_ip_info, ifaces) +
2055 num*sizeof(struct ctdb_control_iface_info);
2056 info = talloc_zero_size(outdata, len);
2057 CTDB_NO_MEMORY(ctdb, info);
2059 info->ip.addr = vnn->public_address;
2060 info->ip.pnn = vnn->pnn;
2061 info->active_idx = 0xFFFFFFFF;
2063 for (i=0; vnn->ifaces[i]; i++) {
2064 struct ctdb_iface *cur;
2066 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
2068 DEBUG(DEBUG_CRIT, (__location__ " internal error iface[%s] unknown\n",
2072 if (vnn->iface == cur) {
2073 info->active_idx = i;
2075 strcpy(info->ifaces[i].name, cur->name);
2076 info->ifaces[i].link_state = cur->link_up;
2077 info->ifaces[i].references = cur->references;
2080 len = offsetof(struct ctdb_control_public_ip_info, ifaces) +
2081 i*sizeof(struct ctdb_control_iface_info);
2083 outdata->dsize = len;
2084 outdata->dptr = (uint8_t *)info;
2089 int32_t ctdb_control_get_ifaces(struct ctdb_context *ctdb,
2090 struct ctdb_req_control *c,
2094 struct ctdb_control_get_ifaces *ifaces;
2095 struct ctdb_iface *cur;
2097 /* count how many public ip structures we have */
2099 for (cur=ctdb->ifaces;cur;cur=cur->next) {
2103 len = offsetof(struct ctdb_control_get_ifaces, ifaces) +
2104 num*sizeof(struct ctdb_control_iface_info);
2105 ifaces = talloc_zero_size(outdata, len);
2106 CTDB_NO_MEMORY(ctdb, ifaces);
2109 for (cur=ctdb->ifaces;cur;cur=cur->next) {
2110 strcpy(ifaces->ifaces[i].name, cur->name);
2111 ifaces->ifaces[i].link_state = cur->link_up;
2112 ifaces->ifaces[i].references = cur->references;
2116 len = offsetof(struct ctdb_control_get_ifaces, ifaces) +
2117 i*sizeof(struct ctdb_control_iface_info);
2119 outdata->dsize = len;
2120 outdata->dptr = (uint8_t *)ifaces;
2125 int32_t ctdb_control_set_iface_link(struct ctdb_context *ctdb,
2126 struct ctdb_req_control *c,
2129 struct ctdb_control_iface_info *info;
2130 struct ctdb_iface *iface;
2131 bool link_up = false;
2133 info = (struct ctdb_control_iface_info *)indata.dptr;
2135 if (info->name[CTDB_IFACE_SIZE] != '\0') {
2136 int len = strnlen(info->name, CTDB_IFACE_SIZE);
2137 DEBUG(DEBUG_ERR, (__location__ " name[%*.*s] not terminated\n",
2138 len, len, info->name));
2142 switch (info->link_state) {
2150 DEBUG(DEBUG_ERR, (__location__ " link_state[%u] invalid\n",
2151 (unsigned int)info->link_state));
2155 if (info->references != 0) {
2156 DEBUG(DEBUG_ERR, (__location__ " references[%u] should be 0\n",
2157 (unsigned int)info->references));
2161 iface = ctdb_find_iface(ctdb, info->name);
2162 if (iface == NULL) {
2163 DEBUG(DEBUG_ERR, (__location__ "iface[%s] is unknown\n",
2168 if (link_up == iface->link_up) {
2172 DEBUG(iface->link_up?DEBUG_ERR:DEBUG_NOTICE,
2173 ("iface[%s] has changed it's link status %s => %s\n",
2175 iface->link_up?"up":"down",
2176 link_up?"up":"down"));
2178 iface->link_up = link_up;
2184 structure containing the listening socket and the list of tcp connections
2185 that the ctdb daemon is to kill
2187 struct ctdb_kill_tcp {
2188 struct ctdb_vnn *vnn;
2189 struct ctdb_context *ctdb;
2191 struct fd_event *fde;
2192 trbt_tree_t *connections;
2197 a tcp connection that is to be killed
2199 struct ctdb_killtcp_con {
2200 ctdb_sock_addr src_addr;
2201 ctdb_sock_addr dst_addr;
2203 struct ctdb_kill_tcp *killtcp;
2206 /* this function is used to create a key to represent this socketpair
2207 in the killtcp tree.
2208 this key is used to insert and lookup matching socketpairs that are
2209 to be tickled and RST
2211 #define KILLTCP_KEYLEN 10
2212 static uint32_t *killtcp_key(ctdb_sock_addr *src, ctdb_sock_addr *dst)
2214 static uint32_t key[KILLTCP_KEYLEN];
2216 bzero(key, sizeof(key));
2218 if (src->sa.sa_family != dst->sa.sa_family) {
2219 DEBUG(DEBUG_ERR, (__location__ " ERROR, different families passed :%u vs %u\n", src->sa.sa_family, dst->sa.sa_family));
2223 switch (src->sa.sa_family) {
2225 key[0] = dst->ip.sin_addr.s_addr;
2226 key[1] = src->ip.sin_addr.s_addr;
2227 key[2] = dst->ip.sin_port;
2228 key[3] = src->ip.sin_port;
2231 key[0] = dst->ip6.sin6_addr.s6_addr32[3];
2232 key[1] = src->ip6.sin6_addr.s6_addr32[3];
2233 key[2] = dst->ip6.sin6_addr.s6_addr32[2];
2234 key[3] = src->ip6.sin6_addr.s6_addr32[2];
2235 key[4] = dst->ip6.sin6_addr.s6_addr32[1];
2236 key[5] = src->ip6.sin6_addr.s6_addr32[1];
2237 key[6] = dst->ip6.sin6_addr.s6_addr32[0];
2238 key[7] = src->ip6.sin6_addr.s6_addr32[0];
2239 key[8] = dst->ip6.sin6_port;
2240 key[9] = src->ip6.sin6_port;
2243 DEBUG(DEBUG_ERR, (__location__ " ERROR, unknown family passed :%u\n", src->sa.sa_family));
2251 called when we get a read event on the raw socket
2253 static void capture_tcp_handler(struct event_context *ev, struct fd_event *fde,
2254 uint16_t flags, void *private_data)
2256 struct ctdb_kill_tcp *killtcp = talloc_get_type(private_data, struct ctdb_kill_tcp);
2257 struct ctdb_killtcp_con *con;
2258 ctdb_sock_addr src, dst;
2259 uint32_t ack_seq, seq;
2261 if (!(flags & EVENT_FD_READ)) {
2265 if (ctdb_sys_read_tcp_packet(killtcp->capture_fd,
2266 killtcp->private_data,
2268 &ack_seq, &seq) != 0) {
2269 /* probably a non-tcp ACK packet */
2273 /* check if we have this guy in our list of connections
2276 con = trbt_lookuparray32(killtcp->connections,
2277 KILLTCP_KEYLEN, killtcp_key(&src, &dst));
2279 /* no this was some other packet we can just ignore */
2283 /* This one has been tickled !
2284 now reset him and remove him from the list.
2286 DEBUG(DEBUG_INFO, ("sending a tcp reset to kill connection :%d -> %s:%d\n",
2287 ntohs(con->dst_addr.ip.sin_port),
2288 ctdb_addr_to_str(&con->src_addr),
2289 ntohs(con->src_addr.ip.sin_port)));
2291 ctdb_sys_send_tcp(&con->dst_addr, &con->src_addr, ack_seq, seq, 1);
2296 /* when traversing the list of all tcp connections to send tickle acks to
2297 (so that we can capture the ack coming back and kill the connection
2299 this callback is called for each connection we are currently trying to kill
2301 static void tickle_connection_traverse(void *param, void *data)
2303 struct ctdb_killtcp_con *con = talloc_get_type(data, struct ctdb_killtcp_con);
2305 /* have tried too many times, just give up */
2306 if (con->count >= 5) {
2307 /* can't delete in traverse: reparent to delete_cons */
2308 talloc_steal(param, con);
2312 /* othervise, try tickling it again */
2315 (ctdb_sock_addr *)&con->dst_addr,
2316 (ctdb_sock_addr *)&con->src_addr,
2322 called every second until all sentenced connections have been reset
2324 static void ctdb_tickle_sentenced_connections(struct event_context *ev, struct timed_event *te,
2325 struct timeval t, void *private_data)
2327 struct ctdb_kill_tcp *killtcp = talloc_get_type(private_data, struct ctdb_kill_tcp);
2328 void *delete_cons = talloc_new(NULL);
2330 /* loop over all connections sending tickle ACKs */
2331 trbt_traversearray32(killtcp->connections, KILLTCP_KEYLEN, tickle_connection_traverse, delete_cons);
2333 /* now we've finished traverse, it's safe to do deletion. */
2334 talloc_free(delete_cons);
2336 /* If there are no more connections to kill we can remove the
2337 entire killtcp structure
2339 if ( (killtcp->connections == NULL) ||
2340 (killtcp->connections->root == NULL) ) {
2341 talloc_free(killtcp);
2345 /* try tickling them again in a seconds time
2347 event_add_timed(killtcp->ctdb->ev, killtcp, timeval_current_ofs(1, 0),
2348 ctdb_tickle_sentenced_connections, killtcp);
2352 destroy the killtcp structure
2354 static int ctdb_killtcp_destructor(struct ctdb_kill_tcp *killtcp)
2357 killtcp->vnn->killtcp = NULL;
2363 /* nothing fancy here, just unconditionally replace any existing
2364 connection structure with the new one.
2366 dont even free the old one if it did exist, that one is talloc_stolen
2367 by the same node in the tree anyway and will be deleted when the new data
2370 static void *add_killtcp_callback(void *parm, void *data)
2376 add a tcp socket to the list of connections we want to RST
2378 static int ctdb_killtcp_add_connection(struct ctdb_context *ctdb,
2382 ctdb_sock_addr src, dst;
2383 struct ctdb_kill_tcp *killtcp;
2384 struct ctdb_killtcp_con *con;
2385 struct ctdb_vnn *vnn;
2387 ctdb_canonicalize_ip(s, &src);
2388 ctdb_canonicalize_ip(d, &dst);
2390 vnn = find_public_ip_vnn(ctdb, &dst);
2392 vnn = find_public_ip_vnn(ctdb, &src);
2395 /* if it is not a public ip it could be our 'single ip' */
2396 if (ctdb->single_ip_vnn) {
2397 if (ctdb_same_ip(&ctdb->single_ip_vnn->public_address, &dst)) {
2398 vnn = ctdb->single_ip_vnn;
2403 DEBUG(DEBUG_ERR,(__location__ " Could not killtcp, not a public address\n"));
2407 killtcp = vnn->killtcp;
2409 /* If this is the first connection to kill we must allocate
2412 if (killtcp == NULL) {
2413 killtcp = talloc_zero(ctdb, struct ctdb_kill_tcp);
2414 CTDB_NO_MEMORY(ctdb, killtcp);
2417 killtcp->ctdb = ctdb;
2418 killtcp->capture_fd = -1;
2419 killtcp->connections = trbt_create(killtcp, 0);
2421 vnn->killtcp = killtcp;
2422 talloc_set_destructor(killtcp, ctdb_killtcp_destructor);
2427 /* create a structure that describes this connection we want to
2428 RST and store it in killtcp->connections
2430 con = talloc(killtcp, struct ctdb_killtcp_con);
2431 CTDB_NO_MEMORY(ctdb, con);
2432 con->src_addr = src;
2433 con->dst_addr = dst;
2435 con->killtcp = killtcp;
2438 trbt_insertarray32_callback(killtcp->connections,
2439 KILLTCP_KEYLEN, killtcp_key(&con->dst_addr, &con->src_addr),
2440 add_killtcp_callback, con);
2443 If we dont have a socket to listen on yet we must create it
2445 if (killtcp->capture_fd == -1) {
2446 const char *iface = ctdb_vnn_iface_string(vnn);
2447 killtcp->capture_fd = ctdb_sys_open_capture_socket(iface, &killtcp->private_data);
2448 if (killtcp->capture_fd == -1) {
2449 DEBUG(DEBUG_CRIT,(__location__ " Failed to open capturing "
2450 "socket on iface '%s' for killtcp (%s)\n",
2451 iface, strerror(errno)));
2457 if (killtcp->fde == NULL) {
2458 killtcp->fde = event_add_fd(ctdb->ev, killtcp, killtcp->capture_fd,
2460 capture_tcp_handler, killtcp);
2461 tevent_fd_set_auto_close(killtcp->fde);
2463 /* We also need to set up some events to tickle all these connections
2464 until they are all reset
2466 event_add_timed(ctdb->ev, killtcp, timeval_current_ofs(1, 0),
2467 ctdb_tickle_sentenced_connections, killtcp);
2470 /* tickle him once now */
2479 talloc_free(vnn->killtcp);
2480 vnn->killtcp = NULL;
2485 kill a TCP connection.
2487 int32_t ctdb_control_kill_tcp(struct ctdb_context *ctdb, TDB_DATA indata)
2489 struct ctdb_control_killtcp *killtcp = (struct ctdb_control_killtcp *)indata.dptr;
2491 return ctdb_killtcp_add_connection(ctdb, &killtcp->src_addr, &killtcp->dst_addr);
2495 called by a daemon to inform us of the entire list of TCP tickles for
2496 a particular public address.
2497 this control should only be sent by the node that is currently serving
2498 that public address.
2500 int32_t ctdb_control_set_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata)
2502 struct ctdb_control_tcp_tickle_list *list = (struct ctdb_control_tcp_tickle_list *)indata.dptr;
2503 struct ctdb_tcp_array *tcparray;
2504 struct ctdb_vnn *vnn;
2506 /* We must at least have tickles.num or else we cant verify the size
2507 of the received data blob
2509 if (indata.dsize < offsetof(struct ctdb_control_tcp_tickle_list,
2510 tickles.connections)) {
2511 DEBUG(DEBUG_ERR,("Bad indata in ctdb_control_set_tcp_tickle_list. Not enough data for the tickle.num field\n"));
2515 /* verify that the size of data matches what we expect */
2516 if (indata.dsize < offsetof(struct ctdb_control_tcp_tickle_list,
2517 tickles.connections)
2518 + sizeof(struct ctdb_tcp_connection)
2519 * list->tickles.num) {
2520 DEBUG(DEBUG_ERR,("Bad indata in ctdb_control_set_tcp_tickle_list\n"));
2524 vnn = find_public_ip_vnn(ctdb, &list->addr);
2526 DEBUG(DEBUG_INFO,(__location__ " Could not set tcp tickle list, '%s' is not a public address\n",
2527 ctdb_addr_to_str(&list->addr)));
2532 /* remove any old ticklelist we might have */
2533 talloc_free(vnn->tcp_array);
2534 vnn->tcp_array = NULL;
2536 tcparray = talloc(ctdb->nodes, struct ctdb_tcp_array);
2537 CTDB_NO_MEMORY(ctdb, tcparray);
2539 tcparray->num = list->tickles.num;
2541 tcparray->connections = talloc_array(tcparray, struct ctdb_tcp_connection, tcparray->num);
2542 CTDB_NO_MEMORY(ctdb, tcparray->connections);
2544 memcpy(tcparray->connections, &list->tickles.connections[0],
2545 sizeof(struct ctdb_tcp_connection)*tcparray->num);
2547 /* We now have a new fresh tickle list array for this vnn */
2548 vnn->tcp_array = talloc_steal(vnn, tcparray);
2554 called to return the full list of tickles for the puclic address associated
2555 with the provided vnn
2557 int32_t ctdb_control_get_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata)
2559 ctdb_sock_addr *addr = (ctdb_sock_addr *)indata.dptr;
2560 struct ctdb_control_tcp_tickle_list *list;
2561 struct ctdb_tcp_array *tcparray;
2563 struct ctdb_vnn *vnn;
2565 vnn = find_public_ip_vnn(ctdb, addr);
2567 DEBUG(DEBUG_ERR,(__location__ " Could not get tcp tickle list, '%s' is not a public address\n",
2568 ctdb_addr_to_str(addr)));
2573 tcparray = vnn->tcp_array;
2575 num = tcparray->num;
2580 outdata->dsize = offsetof(struct ctdb_control_tcp_tickle_list,
2581 tickles.connections)
2582 + sizeof(struct ctdb_tcp_connection) * num;
2584 outdata->dptr = talloc_size(outdata, outdata->dsize);
2585 CTDB_NO_MEMORY(ctdb, outdata->dptr);
2586 list = (struct ctdb_control_tcp_tickle_list *)outdata->dptr;
2589 list->tickles.num = num;
2591 memcpy(&list->tickles.connections[0], tcparray->connections,
2592 sizeof(struct ctdb_tcp_connection) * num);
2600 set the list of all tcp tickles for a public address
2602 static int ctdb_ctrl_set_tcp_tickles(struct ctdb_context *ctdb,
2603 struct timeval timeout, uint32_t destnode,
2604 ctdb_sock_addr *addr,
2605 struct ctdb_tcp_array *tcparray)
2609 struct ctdb_control_tcp_tickle_list *list;
2612 num = tcparray->num;
2617 data.dsize = offsetof(struct ctdb_control_tcp_tickle_list,
2618 tickles.connections) +
2619 sizeof(struct ctdb_tcp_connection) * num;
2620 data.dptr = talloc_size(ctdb, data.dsize);
2621 CTDB_NO_MEMORY(ctdb, data.dptr);
2623 list = (struct ctdb_control_tcp_tickle_list *)data.dptr;
2625 list->tickles.num = num;
2627 memcpy(&list->tickles.connections[0], tcparray->connections, sizeof(struct ctdb_tcp_connection) * num);
2630 ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0,
2631 CTDB_CONTROL_SET_TCP_TICKLE_LIST,
2632 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
2634 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set tcp tickles failed\n"));
2638 talloc_free(data.dptr);
2645 perform tickle updates if required
2647 static void ctdb_update_tcp_tickles(struct event_context *ev,
2648 struct timed_event *te,
2649 struct timeval t, void *private_data)
2651 struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
2653 struct ctdb_vnn *vnn;
2655 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2656 /* we only send out updates for public addresses that
2659 if (ctdb->pnn != vnn->pnn) {
2662 /* We only send out the updates if we need to */
2663 if (!vnn->tcp_update_needed) {
2666 ret = ctdb_ctrl_set_tcp_tickles(ctdb,
2668 CTDB_BROADCAST_CONNECTED,
2669 &vnn->public_address,
2672 DEBUG(DEBUG_ERR,("Failed to send the tickle update for public address %s\n",
2673 ctdb_addr_to_str(&vnn->public_address)));
2677 event_add_timed(ctdb->ev, ctdb->tickle_update_context,
2678 timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
2679 ctdb_update_tcp_tickles, ctdb);
2684 start periodic update of tcp tickles
2686 void ctdb_start_tcp_tickle_update(struct ctdb_context *ctdb)
2688 ctdb->tickle_update_context = talloc_new(ctdb);
2690 event_add_timed(ctdb->ev, ctdb->tickle_update_context,
2691 timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
2692 ctdb_update_tcp_tickles, ctdb);
2698 struct control_gratious_arp {
2699 struct ctdb_context *ctdb;
2700 ctdb_sock_addr addr;
2706 send a control_gratuitous arp
2708 static void send_gratious_arp(struct event_context *ev, struct timed_event *te,
2709 struct timeval t, void *private_data)
2712 struct control_gratious_arp *arp = talloc_get_type(private_data,
2713 struct control_gratious_arp);
2715 ret = ctdb_sys_send_arp(&arp->addr, arp->iface);
2717 DEBUG(DEBUG_ERR,(__location__ " sending of gratious arp on iface '%s' failed (%s)\n",
2718 arp->iface, strerror(errno)));
2723 if (arp->count == CTDB_ARP_REPEAT) {
2728 event_add_timed(arp->ctdb->ev, arp,
2729 timeval_current_ofs(CTDB_ARP_INTERVAL, 0),
2730 send_gratious_arp, arp);
2737 int32_t ctdb_control_send_gratious_arp(struct ctdb_context *ctdb, TDB_DATA indata)
2739 struct ctdb_control_gratious_arp *gratious_arp = (struct ctdb_control_gratious_arp *)indata.dptr;
2740 struct control_gratious_arp *arp;
2742 /* verify the size of indata */
2743 if (indata.dsize < offsetof(struct ctdb_control_gratious_arp, iface)) {
2744 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_gratious_arp structure. Got %u require %u bytes\n",
2745 (unsigned)indata.dsize,
2746 (unsigned)offsetof(struct ctdb_control_gratious_arp, iface)));
2750 ( offsetof(struct ctdb_control_gratious_arp, iface)
2751 + gratious_arp->len ) ){
2753 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2754 "but should be %u bytes\n",
2755 (unsigned)indata.dsize,
2756 (unsigned)(offsetof(struct ctdb_control_gratious_arp, iface)+gratious_arp->len)));
2761 arp = talloc(ctdb, struct control_gratious_arp);
2762 CTDB_NO_MEMORY(ctdb, arp);
2765 arp->addr = gratious_arp->addr;
2766 arp->iface = talloc_strdup(arp, gratious_arp->iface);
2767 CTDB_NO_MEMORY(ctdb, arp->iface);
2770 event_add_timed(arp->ctdb->ev, arp,
2771 timeval_zero(), send_gratious_arp, arp);
2776 int32_t ctdb_control_add_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2778 struct ctdb_control_ip_iface *pub = (struct ctdb_control_ip_iface *)indata.dptr;
2781 /* verify the size of indata */
2782 if (indata.dsize < offsetof(struct ctdb_control_ip_iface, iface)) {
2783 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_ip_iface structure\n"));
2787 ( offsetof(struct ctdb_control_ip_iface, iface)
2790 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2791 "but should be %u bytes\n",
2792 (unsigned)indata.dsize,
2793 (unsigned)(offsetof(struct ctdb_control_ip_iface, iface)+pub->len)));
2797 ret = ctdb_add_public_address(ctdb, &pub->addr, pub->mask, &pub->iface[0]);
2800 DEBUG(DEBUG_ERR,(__location__ " Failed to add public address\n"));
2808 called when releaseip event finishes for del_public_address
2810 static void delete_ip_callback(struct ctdb_context *ctdb, int status,
2813 talloc_free(private_data);
2816 int32_t ctdb_control_del_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2818 struct ctdb_control_ip_iface *pub = (struct ctdb_control_ip_iface *)indata.dptr;
2819 struct ctdb_vnn *vnn;
2822 /* verify the size of indata */
2823 if (indata.dsize < offsetof(struct ctdb_control_ip_iface, iface)) {
2824 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_ip_iface structure\n"));
2828 ( offsetof(struct ctdb_control_ip_iface, iface)
2831 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2832 "but should be %u bytes\n",
2833 (unsigned)indata.dsize,
2834 (unsigned)(offsetof(struct ctdb_control_ip_iface, iface)+pub->len)));
2838 /* walk over all public addresses until we find a match */
2839 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2840 if (ctdb_same_ip(&vnn->public_address, &pub->addr)) {
2841 TALLOC_CTX *mem_ctx;
2843 DLIST_REMOVE(ctdb->vnn, vnn);
2844 if (vnn->iface == NULL) {
2849 mem_ctx = talloc_new(ctdb);
2850 ret = ctdb_event_script_callback(ctdb,
2851 mem_ctx, delete_ip_callback, mem_ctx,
2853 CTDB_EVENT_RELEASE_IP,
2855 ctdb_vnn_iface_string(vnn),
2856 ctdb_addr_to_str(&vnn->public_address),
2857 vnn->public_netmask_bits);
2858 ctdb_vnn_unassign_iface(ctdb, vnn);
2870 /* This function is called from the recovery daemon to verify that a remote
2871 node has the expected ip allocation.
2872 This is verified against ctdb->ip_tree
2874 int verify_remote_ip_allocation(struct ctdb_context *ctdb, struct ctdb_all_public_ips *ips)
2876 struct ctdb_public_ip_list *tmp_ip;
2879 if (ctdb->ip_tree == NULL) {
2880 /* dont know the expected allocation yet, assume remote node
2889 for (i=0; i<ips->num; i++) {
2890 tmp_ip = trbt_lookuparray32(ctdb->ip_tree, IP_KEYLEN, ip_key(&ips->ips[i].addr));
2891 if (tmp_ip == NULL) {
2892 DEBUG(DEBUG_ERR,(__location__ " Could not find host for address %s, reassign ips\n", ctdb_addr_to_str(&ips->ips[i].addr)));
2896 if (tmp_ip->pnn == -1 || ips->ips[i].pnn == -1) {
2900 if (tmp_ip->pnn != ips->ips[i].pnn) {
2901 DEBUG(DEBUG_ERR,("Inconsistent ip allocation. Trigger reallocation. Thinks %s is held by node %u while it is held by node %u\n", ctdb_addr_to_str(&ips->ips[i].addr), ips->ips[i].pnn, tmp_ip->pnn));
2909 int update_ip_assignment_tree(struct ctdb_context *ctdb, struct ctdb_public_ip *ip)
2911 struct ctdb_public_ip_list *tmp_ip;
2913 if (ctdb->ip_tree == NULL) {
2914 DEBUG(DEBUG_ERR,("No ctdb->ip_tree yet. Failed to update ip assignment\n"));
2918 tmp_ip = trbt_lookuparray32(ctdb->ip_tree, IP_KEYLEN, ip_key(&ip->addr));
2919 if (tmp_ip == NULL) {
2920 DEBUG(DEBUG_ERR,(__location__ " Could not find record for address %s, update ip\n", ctdb_addr_to_str(&ip->addr)));
2924 DEBUG(DEBUG_NOTICE,("Updated ip assignment tree for ip : %s from node %u to node %u\n", ctdb_addr_to_str(&ip->addr), tmp_ip->pnn, ip->pnn));
2925 tmp_ip->pnn = ip->pnn;