4 Copyright (C) Ronnie Sahlberg 2007
5 Copyright (C) Andrew Tridgell 2007
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, see <http://www.gnu.org/licenses/>.
21 #include "lib/tevent/tevent.h"
22 #include "lib/tdb/include/tdb.h"
23 #include "lib/util/dlinklist.h"
24 #include "system/network.h"
25 #include "system/filesys.h"
26 #include "system/wait.h"
27 #include "../include/ctdb_private.h"
28 #include "../common/rb_tree.h"
31 #define TAKEOVER_TIMEOUT() timeval_current_ofs(ctdb->tunable.takeover_timeout,0)
33 #define CTDB_ARP_INTERVAL 1
34 #define CTDB_ARP_REPEAT 3
37 struct ctdb_iface *prev, *next;
43 static const char *ctdb_vnn_iface_string(const struct ctdb_vnn *vnn)
46 return vnn->iface->name;
52 static int ctdb_add_local_iface(struct ctdb_context *ctdb, const char *iface)
56 /* Verify that we dont have an entry for this ip yet */
57 for (i=ctdb->ifaces;i;i=i->next) {
58 if (strcmp(i->name, iface) == 0) {
63 /* create a new structure for this interface */
64 i = talloc_zero(ctdb, struct ctdb_iface);
65 CTDB_NO_MEMORY_FATAL(ctdb, i);
66 i->name = talloc_strdup(i, iface);
67 CTDB_NO_MEMORY(ctdb, i->name);
70 DLIST_ADD(ctdb->ifaces, i);
75 static struct ctdb_iface *ctdb_find_iface(struct ctdb_context *ctdb,
80 /* Verify that we dont have an entry for this ip yet */
81 for (i=ctdb->ifaces;i;i=i->next) {
82 if (strcmp(i->name, iface) == 0) {
90 static struct ctdb_iface *ctdb_vnn_best_iface(struct ctdb_context *ctdb,
94 struct ctdb_iface *cur = NULL;
95 struct ctdb_iface *best = NULL;
97 for (i=0; vnn->ifaces[i]; i++) {
99 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
113 if (cur->references < best->references) {
122 static int32_t ctdb_vnn_assign_iface(struct ctdb_context *ctdb,
123 struct ctdb_vnn *vnn)
125 struct ctdb_iface *best = NULL;
128 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
129 "still assigned to iface '%s'\n",
130 ctdb_addr_to_str(&vnn->public_address),
131 ctdb_vnn_iface_string(vnn)));
135 best = ctdb_vnn_best_iface(ctdb, vnn);
137 DEBUG(DEBUG_ERR, (__location__ " public address '%s' "
138 "cannot assign to iface any iface\n",
139 ctdb_addr_to_str(&vnn->public_address)));
145 vnn->pnn = ctdb->pnn;
147 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
148 "now assigned to iface '%s' refs[%d]\n",
149 ctdb_addr_to_str(&vnn->public_address),
150 ctdb_vnn_iface_string(vnn),
155 static void ctdb_vnn_unassign_iface(struct ctdb_context *ctdb,
156 struct ctdb_vnn *vnn)
158 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
159 "now unassigned (old iface '%s' refs[%d])\n",
160 ctdb_addr_to_str(&vnn->public_address),
161 ctdb_vnn_iface_string(vnn),
162 vnn->iface?vnn->iface->references:0));
164 vnn->iface->references--;
167 if (vnn->pnn == ctdb->pnn) {
172 static bool ctdb_vnn_available(struct ctdb_context *ctdb,
173 struct ctdb_vnn *vnn)
177 if (vnn->iface && vnn->iface->link_up) {
181 for (i=0; vnn->ifaces[i]; i++) {
182 struct ctdb_iface *cur;
184 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
197 struct ctdb_takeover_arp {
198 struct ctdb_context *ctdb;
201 struct ctdb_tcp_array *tcparray;
202 struct ctdb_vnn *vnn;
207 lists of tcp endpoints
209 struct ctdb_tcp_list {
210 struct ctdb_tcp_list *prev, *next;
211 struct ctdb_tcp_connection connection;
215 list of clients to kill on IP release
217 struct ctdb_client_ip {
218 struct ctdb_client_ip *prev, *next;
219 struct ctdb_context *ctdb;
226 send a gratuitous arp
228 static void ctdb_control_send_arp(struct event_context *ev, struct timed_event *te,
229 struct timeval t, void *private_data)
231 struct ctdb_takeover_arp *arp = talloc_get_type(private_data,
232 struct ctdb_takeover_arp);
234 struct ctdb_tcp_array *tcparray;
235 const char *iface = ctdb_vnn_iface_string(arp->vnn);
237 ret = ctdb_sys_send_arp(&arp->addr, iface);
239 DEBUG(DEBUG_CRIT,(__location__ " sending of arp failed on iface '%s' (%s)\n",
240 iface, strerror(errno)));
243 tcparray = arp->tcparray;
245 for (i=0;i<tcparray->num;i++) {
246 struct ctdb_tcp_connection *tcon;
248 tcon = &tcparray->connections[i];
249 DEBUG(DEBUG_INFO,("sending tcp tickle ack for %u->%s:%u\n",
250 (unsigned)ntohs(tcon->dst_addr.ip.sin_port),
251 ctdb_addr_to_str(&tcon->src_addr),
252 (unsigned)ntohs(tcon->src_addr.ip.sin_port)));
253 ret = ctdb_sys_send_tcp(
258 DEBUG(DEBUG_CRIT,(__location__ " Failed to send tcp tickle ack for %s\n",
259 ctdb_addr_to_str(&tcon->src_addr)));
266 if (arp->count == CTDB_ARP_REPEAT) {
271 event_add_timed(arp->ctdb->ev, arp->vnn->takeover_ctx,
272 timeval_current_ofs(CTDB_ARP_INTERVAL, 100000),
273 ctdb_control_send_arp, arp);
276 static int32_t ctdb_announce_vnn_iface(struct ctdb_context *ctdb,
277 struct ctdb_vnn *vnn)
279 struct ctdb_takeover_arp *arp;
280 struct ctdb_tcp_array *tcparray;
282 if (!vnn->takeover_ctx) {
283 vnn->takeover_ctx = talloc_new(vnn);
284 if (!vnn->takeover_ctx) {
289 arp = talloc_zero(vnn->takeover_ctx, struct ctdb_takeover_arp);
295 arp->addr = vnn->public_address;
298 tcparray = vnn->tcp_array;
300 /* add all of the known tcp connections for this IP to the
301 list of tcp connections to send tickle acks for */
302 arp->tcparray = talloc_steal(arp, tcparray);
304 vnn->tcp_array = NULL;
305 vnn->tcp_update_needed = true;
308 event_add_timed(arp->ctdb->ev, vnn->takeover_ctx,
309 timeval_zero(), ctdb_control_send_arp, arp);
314 struct takeover_callback_state {
315 struct ctdb_req_control *c;
316 ctdb_sock_addr *addr;
317 struct ctdb_vnn *vnn;
320 struct ctdb_do_takeip_state {
321 struct ctdb_req_control *c;
322 struct ctdb_vnn *vnn;
326 called when takeip event finishes
328 static void ctdb_do_takeip_callback(struct ctdb_context *ctdb, int status,
331 struct ctdb_do_takeip_state *state =
332 talloc_get_type(private_data, struct ctdb_do_takeip_state);
337 if (status == -ETIME) {
340 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
341 ctdb_addr_to_str(&state->vnn->public_address),
342 ctdb_vnn_iface_string(state->vnn)));
343 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
348 ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
350 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
355 data.dptr = (uint8_t *)ctdb_addr_to_str(&state->vnn->public_address);
356 data.dsize = strlen((char *)data.dptr) + 1;
357 DEBUG(DEBUG_INFO,(__location__ " sending TAKE_IP for '%s'\n", data.dptr));
359 ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_TAKE_IP, data);
362 /* the control succeeded */
363 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
369 take over an ip address
371 static int32_t ctdb_do_takeip(struct ctdb_context *ctdb,
372 struct ctdb_req_control *c,
373 struct ctdb_vnn *vnn)
376 struct ctdb_do_takeip_state *state;
378 ret = ctdb_vnn_assign_iface(ctdb, vnn);
380 DEBUG(DEBUG_ERR,("Takeover of IP %s/%u failed to "
381 "assin a usable interface\n",
382 ctdb_addr_to_str(&vnn->public_address),
383 vnn->public_netmask_bits));
387 state = talloc(vnn, struct ctdb_do_takeip_state);
388 CTDB_NO_MEMORY(ctdb, state);
390 state->c = talloc_steal(ctdb, c);
393 DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u on interface %s\n",
394 ctdb_addr_to_str(&vnn->public_address),
395 vnn->public_netmask_bits,
396 ctdb_vnn_iface_string(vnn)));
398 ret = ctdb_event_script_callback(ctdb,
400 ctdb_do_takeip_callback,
405 ctdb_vnn_iface_string(vnn),
406 ctdb_addr_to_str(&vnn->public_address),
407 vnn->public_netmask_bits);
410 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
411 ctdb_addr_to_str(&vnn->public_address),
412 ctdb_vnn_iface_string(vnn)));
420 struct ctdb_do_updateip_state {
421 struct ctdb_req_control *c;
422 struct ctdb_iface *old;
423 struct ctdb_vnn *vnn;
427 called when updateip event finishes
429 static void ctdb_do_updateip_callback(struct ctdb_context *ctdb, int status,
432 struct ctdb_do_updateip_state *state =
433 talloc_get_type(private_data, struct ctdb_do_updateip_state);
437 if (status == -ETIME) {
440 DEBUG(DEBUG_ERR,(__location__ " Failed to move IP %s from interface %s to %s\n",
441 ctdb_addr_to_str(&state->vnn->public_address),
443 ctdb_vnn_iface_string(state->vnn)));
446 * All we can do is reset the old interface
447 * and let the next run fix it
449 ctdb_vnn_unassign_iface(ctdb, state->vnn);
450 state->vnn->iface = state->old;
451 state->vnn->iface->references++;
453 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
458 ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
460 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
465 /* the control succeeded */
466 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
472 update (move) an ip address
474 static int32_t ctdb_do_updateip(struct ctdb_context *ctdb,
475 struct ctdb_req_control *c,
476 struct ctdb_vnn *vnn)
479 struct ctdb_do_updateip_state *state;
480 struct ctdb_iface *old = vnn->iface;
482 ctdb_vnn_unassign_iface(ctdb, vnn);
483 ret = ctdb_vnn_assign_iface(ctdb, vnn);
485 DEBUG(DEBUG_ERR,("update of IP %s/%u failed to "
486 "assin a usable interface (old iface '%s')\n",
487 ctdb_addr_to_str(&vnn->public_address),
488 vnn->public_netmask_bits,
493 if (vnn->iface == old) {
494 DEBUG(DEBUG_ERR,("update of IP %s/%u trying to "
495 "assin a same interface '%s'\n",
496 ctdb_addr_to_str(&vnn->public_address),
497 vnn->public_netmask_bits,
502 state = talloc(vnn, struct ctdb_do_updateip_state);
503 CTDB_NO_MEMORY(ctdb, state);
505 state->c = talloc_steal(ctdb, c);
509 DEBUG(DEBUG_NOTICE,("Update of IP %s/%u from "
510 "interface %s to %s\n",
511 ctdb_addr_to_str(&vnn->public_address),
512 vnn->public_netmask_bits,
514 ctdb_vnn_iface_string(vnn)));
516 ret = ctdb_event_script_callback(ctdb,
518 ctdb_do_updateip_callback,
521 CTDB_EVENT_UPDATE_IP,
524 ctdb_vnn_iface_string(vnn),
525 ctdb_addr_to_str(&vnn->public_address),
526 vnn->public_netmask_bits);
528 DEBUG(DEBUG_ERR,(__location__ " Failed update IP %s from interface %s to %s\n",
529 ctdb_addr_to_str(&vnn->public_address),
530 old->name, ctdb_vnn_iface_string(vnn)));
539 Find the vnn of the node that has a public ip address
540 returns -1 if the address is not known as a public address
542 static struct ctdb_vnn *find_public_ip_vnn(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
544 struct ctdb_vnn *vnn;
546 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
547 if (ctdb_same_ip(&vnn->public_address, addr)) {
556 take over an ip address
558 int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb,
559 struct ctdb_req_control *c,
564 struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
565 struct ctdb_vnn *vnn;
566 bool have_ip = false;
567 bool do_updateip = false;
568 bool do_takeip = false;
569 struct ctdb_iface *best_iface = NULL;
571 if (pip->pnn != ctdb->pnn) {
572 DEBUG(DEBUG_ERR,(__location__" takeoverip called for an ip '%s' "
573 "with pnn %d, but we're node %d\n",
574 ctdb_addr_to_str(&pip->addr),
575 pip->pnn, ctdb->pnn));
579 /* update out vnn list */
580 vnn = find_public_ip_vnn(ctdb, &pip->addr);
582 DEBUG(DEBUG_INFO,("takeoverip called for an ip '%s' that is not a public address\n",
583 ctdb_addr_to_str(&pip->addr)));
587 have_ip = ctdb_sys_have_ip(&pip->addr);
588 best_iface = ctdb_vnn_best_iface(ctdb, vnn);
589 if (best_iface == NULL) {
590 DEBUG(DEBUG_ERR,("takeoverip of IP %s/%u failed to find"
591 "a usable interface (old %s, have_ip %d)\n",
592 ctdb_addr_to_str(&vnn->public_address),
593 vnn->public_netmask_bits,
594 ctdb_vnn_iface_string(vnn),
599 if (vnn->iface == NULL && vnn->pnn == -1 && have_ip && best_iface != NULL) {
600 DEBUG(DEBUG_ERR,("Taking over newly created ip\n"));
604 if (vnn->iface == NULL && have_ip) {
605 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
606 "but we have no interface assigned, has someone manually configured it?"
608 ctdb_addr_to_str(&vnn->public_address)));
613 if (vnn->pnn != ctdb->pnn && have_ip) {
614 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
615 "and we have it on iface[%s], but it was assigned to node %d"
616 "and we are node %d, banning ourself\n",
617 ctdb_addr_to_str(&vnn->public_address),
618 ctdb_vnn_iface_string(vnn), vnn->pnn, ctdb->pnn));
624 if (vnn->iface->link_up) {
625 /* only move when the rebalance gains something */
626 if (vnn->iface->references > (best_iface->references + 1)) {
629 } else if (vnn->iface != best_iface) {
636 ctdb_vnn_unassign_iface(ctdb, vnn);
643 ret = ctdb_do_takeip(ctdb, c, vnn);
647 } else if (do_updateip) {
648 ret = ctdb_do_updateip(ctdb, c, vnn);
654 * The interface is up and the kernel known the ip
657 DEBUG(DEBUG_INFO,("Redundant takeover of IP %s/%u on interface %s (ip already held)\n",
658 ctdb_addr_to_str(&pip->addr),
659 vnn->public_netmask_bits,
660 ctdb_vnn_iface_string(vnn)));
664 /* tell ctdb_control.c that we will be replying asynchronously */
671 takeover an ip address old v4 style
673 int32_t ctdb_control_takeover_ipv4(struct ctdb_context *ctdb,
674 struct ctdb_req_control *c,
680 data.dsize = sizeof(struct ctdb_public_ip);
681 data.dptr = (uint8_t *)talloc_zero(c, struct ctdb_public_ip);
682 CTDB_NO_MEMORY(ctdb, data.dptr);
684 memcpy(data.dptr, indata.dptr, indata.dsize);
685 return ctdb_control_takeover_ip(ctdb, c, data, async_reply);
689 kill any clients that are registered with a IP that is being released
691 static void release_kill_clients(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
693 struct ctdb_client_ip *ip;
695 DEBUG(DEBUG_INFO,("release_kill_clients for ip %s\n",
696 ctdb_addr_to_str(addr)));
698 for (ip=ctdb->client_ip_list; ip; ip=ip->next) {
699 ctdb_sock_addr tmp_addr;
702 DEBUG(DEBUG_INFO,("checking for client %u with IP %s\n",
704 ctdb_addr_to_str(&ip->addr)));
706 if (ctdb_same_ip(&tmp_addr, addr)) {
707 struct ctdb_client *client = ctdb_reqid_find(ctdb,
710 DEBUG(DEBUG_INFO,("matched client %u with IP %s and pid %u\n",
712 ctdb_addr_to_str(&ip->addr),
715 if (client->pid != 0) {
716 DEBUG(DEBUG_INFO,(__location__ " Killing client pid %u for IP %s on client_id %u\n",
717 (unsigned)client->pid,
718 ctdb_addr_to_str(addr),
720 kill(client->pid, SIGKILL);
727 called when releaseip event finishes
729 static void release_ip_callback(struct ctdb_context *ctdb, int status,
732 struct takeover_callback_state *state =
733 talloc_get_type(private_data, struct takeover_callback_state);
736 if (status == -ETIME) {
740 /* send a message to all clients of this node telling them
741 that the cluster has been reconfigured and they should
742 release any sockets on this IP */
743 data.dptr = (uint8_t *)talloc_strdup(state, ctdb_addr_to_str(state->addr));
744 CTDB_NO_MEMORY_VOID(ctdb, data.dptr);
745 data.dsize = strlen((char *)data.dptr)+1;
747 DEBUG(DEBUG_INFO,(__location__ " sending RELEASE_IP for '%s'\n", data.dptr));
749 ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_RELEASE_IP, data);
751 /* kill clients that have registered with this IP */
752 release_kill_clients(ctdb, state->addr);
754 ctdb_vnn_unassign_iface(ctdb, state->vnn);
756 /* the control succeeded */
757 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
762 release an ip address
764 int32_t ctdb_control_release_ip(struct ctdb_context *ctdb,
765 struct ctdb_req_control *c,
770 struct takeover_callback_state *state;
771 struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
772 struct ctdb_vnn *vnn;
774 /* update our vnn list */
775 vnn = find_public_ip_vnn(ctdb, &pip->addr);
777 DEBUG(DEBUG_INFO,("releaseip called for an ip '%s' that is not a public address\n",
778 ctdb_addr_to_str(&pip->addr)));
783 /* stop any previous arps */
784 talloc_free(vnn->takeover_ctx);
785 vnn->takeover_ctx = NULL;
787 if (!ctdb_sys_have_ip(&pip->addr)) {
788 DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u on interface %s (ip not held)\n",
789 ctdb_addr_to_str(&pip->addr),
790 vnn->public_netmask_bits,
791 ctdb_vnn_iface_string(vnn)));
792 ctdb_vnn_unassign_iface(ctdb, vnn);
796 if (vnn->iface == NULL) {
797 DEBUG(DEBUG_CRIT,(__location__ " release_ip of IP %s is known to the kernel, "
798 "but we have no interface assigned, has someone manually configured it?"
800 ctdb_addr_to_str(&vnn->public_address)));
805 DEBUG(DEBUG_NOTICE,("Release of IP %s/%u on interface %s node:%d\n",
806 ctdb_addr_to_str(&pip->addr),
807 vnn->public_netmask_bits,
808 ctdb_vnn_iface_string(vnn),
811 state = talloc(ctdb, struct takeover_callback_state);
812 CTDB_NO_MEMORY(ctdb, state);
814 state->c = talloc_steal(state, c);
815 state->addr = talloc(state, ctdb_sock_addr);
816 CTDB_NO_MEMORY(ctdb, state->addr);
817 *state->addr = pip->addr;
820 ret = ctdb_event_script_callback(ctdb,
821 state, release_ip_callback, state,
823 CTDB_EVENT_RELEASE_IP,
825 ctdb_vnn_iface_string(vnn),
826 ctdb_addr_to_str(&pip->addr),
827 vnn->public_netmask_bits);
829 DEBUG(DEBUG_ERR,(__location__ " Failed to release IP %s on interface %s\n",
830 ctdb_addr_to_str(&pip->addr),
831 ctdb_vnn_iface_string(vnn)));
836 /* tell the control that we will be reply asynchronously */
842 release an ip address old v4 style
844 int32_t ctdb_control_release_ipv4(struct ctdb_context *ctdb,
845 struct ctdb_req_control *c,
851 data.dsize = sizeof(struct ctdb_public_ip);
852 data.dptr = (uint8_t *)talloc_zero(c, struct ctdb_public_ip);
853 CTDB_NO_MEMORY(ctdb, data.dptr);
855 memcpy(data.dptr, indata.dptr, indata.dsize);
856 return ctdb_control_release_ip(ctdb, c, data, async_reply);
860 static int ctdb_add_public_address(struct ctdb_context *ctdb,
861 ctdb_sock_addr *addr,
862 unsigned mask, const char *ifaces)
864 struct ctdb_vnn *vnn;
871 /* Verify that we dont have an entry for this ip yet */
872 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
873 if (ctdb_same_sockaddr(addr, &vnn->public_address)) {
874 DEBUG(DEBUG_CRIT,("Same ip '%s' specified multiple times in the public address list \n",
875 ctdb_addr_to_str(addr)));
880 /* create a new vnn structure for this ip address */
881 vnn = talloc_zero(ctdb, struct ctdb_vnn);
882 CTDB_NO_MEMORY_FATAL(ctdb, vnn);
883 vnn->ifaces = talloc_array(vnn, const char *, num + 2);
884 tmp = talloc_strdup(vnn, ifaces);
885 CTDB_NO_MEMORY_FATAL(ctdb, tmp);
886 for (iface = strtok(tmp, ","); iface; iface = strtok(NULL, ",")) {
887 vnn->ifaces = talloc_realloc(vnn, vnn->ifaces, const char *, num + 2);
888 CTDB_NO_MEMORY_FATAL(ctdb, vnn->ifaces);
889 vnn->ifaces[num] = talloc_strdup(vnn, iface);
890 CTDB_NO_MEMORY_FATAL(ctdb, vnn->ifaces[num]);
894 vnn->ifaces[num] = NULL;
895 vnn->public_address = *addr;
896 vnn->public_netmask_bits = mask;
899 for (i=0; vnn->ifaces[i]; i++) {
900 ret = ctdb_add_local_iface(ctdb, vnn->ifaces[i]);
902 DEBUG(DEBUG_CRIT, (__location__ " failed to add iface[%s] "
903 "for public_address[%s]\n",
904 vnn->ifaces[i], ctdb_addr_to_str(addr)));
909 vnn->iface = ctdb_find_iface(ctdb, vnn->ifaces[i]);
913 DLIST_ADD(ctdb->vnn, vnn);
919 setup the event script directory
921 int ctdb_set_event_script_dir(struct ctdb_context *ctdb, const char *script_dir)
923 ctdb->event_script_dir = talloc_strdup(ctdb, script_dir);
924 CTDB_NO_MEMORY(ctdb, ctdb->event_script_dir);
929 setup the public address lists from a file
931 int ctdb_set_public_addresses(struct ctdb_context *ctdb, const char *alist)
937 lines = file_lines_load(alist, &nlines, ctdb);
939 ctdb_set_error(ctdb, "Failed to load public address list '%s'\n", alist);
942 while (nlines > 0 && strcmp(lines[nlines-1], "") == 0) {
946 for (i=0;i<nlines;i++) {
954 while ((*line == ' ') || (*line == '\t')) {
960 if (strcmp(line, "") == 0) {
963 tok = strtok(line, " \t");
965 tok = strtok(NULL, " \t");
967 if (NULL == ctdb->default_public_interface) {
968 DEBUG(DEBUG_CRIT,("No default public interface and no interface specified at line %u of public address list\n",
973 ifaces = ctdb->default_public_interface;
978 if (!addrstr || !parse_ip_mask(addrstr, ifaces, &addr, &mask)) {
979 DEBUG(DEBUG_CRIT,("Badly formed line %u in public address list\n", i+1));
983 if (ctdb_add_public_address(ctdb, &addr, mask, ifaces)) {
984 DEBUG(DEBUG_CRIT,("Failed to add line %u to the public address list\n", i+1));
994 int ctdb_set_single_public_ip(struct ctdb_context *ctdb,
998 struct ctdb_vnn *svnn;
1002 svnn = talloc_zero(ctdb, struct ctdb_vnn);
1003 CTDB_NO_MEMORY(ctdb, svnn);
1005 svnn->ifaces = talloc_array(svnn, const char *, 2);
1006 CTDB_NO_MEMORY(ctdb, svnn->ifaces);
1007 svnn->ifaces[0] = talloc_strdup(svnn->ifaces, iface);
1008 CTDB_NO_MEMORY(ctdb, svnn->ifaces[0]);
1009 svnn->ifaces[1] = NULL;
1011 ok = parse_ip(ip, iface, 0, &svnn->public_address);
1017 ret = ctdb_add_local_iface(ctdb, svnn->ifaces[0]);
1019 DEBUG(DEBUG_CRIT, (__location__ " failed to add iface[%s] "
1020 "for single_ip[%s]\n",
1022 ctdb_addr_to_str(&svnn->public_address)));
1027 ret = ctdb_vnn_assign_iface(ctdb, svnn);
1033 ctdb->single_ip_vnn = svnn;
1037 struct ctdb_public_ip_list {
1038 struct ctdb_public_ip_list *next;
1040 ctdb_sock_addr addr;
1044 /* Given a physical node, return the number of
1045 public addresses that is currently assigned to this node.
1047 static int node_ip_coverage(struct ctdb_context *ctdb,
1049 struct ctdb_public_ip_list *ips)
1053 for (;ips;ips=ips->next) {
1054 if (ips->pnn == pnn) {
1062 /* Check if this is a public ip known to the node, i.e. can that
1063 node takeover this ip ?
1065 static int can_node_serve_ip(struct ctdb_context *ctdb, int32_t pnn,
1066 struct ctdb_public_ip_list *ip)
1068 struct ctdb_all_public_ips *public_ips;
1071 public_ips = ctdb->nodes[pnn]->available_public_ips;
1073 if (public_ips == NULL) {
1077 for (i=0;i<public_ips->num;i++) {
1078 if (ctdb_same_ip(&ip->addr, &public_ips->ips[i].addr)) {
1079 /* yes, this node can serve this public ip */
1088 /* search the node lists list for a node to takeover this ip.
1089 pick the node that currently are serving the least number of ips
1090 so that the ips get spread out evenly.
1092 static int find_takeover_node(struct ctdb_context *ctdb,
1093 struct ctdb_node_map *nodemap, uint32_t mask,
1094 struct ctdb_public_ip_list *ip,
1095 struct ctdb_public_ip_list *all_ips)
1097 int pnn, min=0, num;
1101 for (i=0;i<nodemap->num;i++) {
1102 if (nodemap->nodes[i].flags & mask) {
1103 /* This node is not healty and can not be used to serve
1109 /* verify that this node can serve this ip */
1110 if (can_node_serve_ip(ctdb, i, ip)) {
1111 /* no it couldnt so skip to the next node */
1115 num = node_ip_coverage(ctdb, i, all_ips);
1116 /* was this the first node we checked ? */
1128 DEBUG(DEBUG_WARNING,(__location__ " Could not find node to take over public address '%s'\n",
1129 ctdb_addr_to_str(&ip->addr)));
1139 static uint32_t *ip_key(ctdb_sock_addr *ip)
1141 static uint32_t key[IP_KEYLEN];
1143 bzero(key, sizeof(key));
1145 switch (ip->sa.sa_family) {
1147 key[3] = htonl(ip->ip.sin_addr.s_addr);
1150 key[0] = htonl(ip->ip6.sin6_addr.s6_addr32[0]);
1151 key[1] = htonl(ip->ip6.sin6_addr.s6_addr32[1]);
1152 key[2] = htonl(ip->ip6.sin6_addr.s6_addr32[2]);
1153 key[3] = htonl(ip->ip6.sin6_addr.s6_addr32[3]);
1156 DEBUG(DEBUG_ERR, (__location__ " ERROR, unknown family passed :%u\n", ip->sa.sa_family));
1163 static void *add_ip_callback(void *parm, void *data)
1168 void getips_count_callback(void *param, void *data)
1170 struct ctdb_public_ip_list **ip_list = (struct ctdb_public_ip_list **)param;
1171 struct ctdb_public_ip_list *new_ip = (struct ctdb_public_ip_list *)data;
1173 new_ip->next = *ip_list;
1177 static struct ctdb_public_ip_list *
1178 create_merged_ip_list(struct ctdb_context *ctdb)
1181 struct ctdb_public_ip_list *ip_list;
1182 struct ctdb_all_public_ips *public_ips;
1184 if (ctdb->ip_tree != NULL) {
1185 talloc_free(ctdb->ip_tree);
1186 ctdb->ip_tree = NULL;
1188 ctdb->ip_tree = trbt_create(ctdb, 0);
1190 for (i=0;i<ctdb->num_nodes;i++) {
1191 public_ips = ctdb->nodes[i]->known_public_ips;
1193 if (ctdb->nodes[i]->flags & NODE_FLAGS_DELETED) {
1197 /* there were no public ips for this node */
1198 if (public_ips == NULL) {
1202 for (j=0;j<public_ips->num;j++) {
1203 struct ctdb_public_ip_list *tmp_ip;
1205 tmp_ip = talloc_zero(ctdb->ip_tree, struct ctdb_public_ip_list);
1206 CTDB_NO_MEMORY_NULL(ctdb, tmp_ip);
1207 tmp_ip->pnn = public_ips->ips[j].pnn;
1208 tmp_ip->addr = public_ips->ips[j].addr;
1209 tmp_ip->next = NULL;
1211 trbt_insertarray32_callback(ctdb->ip_tree,
1212 IP_KEYLEN, ip_key(&public_ips->ips[j].addr),
1219 trbt_traversearray32(ctdb->ip_tree, IP_KEYLEN, getips_count_callback, &ip_list);
1225 make any IP alias changes for public addresses that are necessary
1227 int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap)
1229 int i, num_healthy, retries;
1230 struct ctdb_public_ip ip;
1231 struct ctdb_public_ipv4 ipv4;
1232 uint32_t mask, *nodes;
1233 struct ctdb_public_ip_list *all_ips, *tmp_ip;
1234 int maxnode, maxnum=0, minnode, minnum=0, num;
1236 struct timeval timeout;
1237 struct client_async_data *async_data;
1238 struct ctdb_client_control_state *state;
1239 TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
1242 * ip failover is completely disabled, just send out the
1243 * ipreallocated event.
1245 if (ctdb->tunable.disable_ip_failover != 0) {
1251 /* Count how many completely healthy nodes we have */
1253 for (i=0;i<nodemap->num;i++) {
1254 if (!(nodemap->nodes[i].flags & (NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED))) {
1259 if (num_healthy > 0) {
1260 /* We have healthy nodes, so only consider them for
1261 serving public addresses
1263 mask = NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED;
1265 /* We didnt have any completely healthy nodes so
1266 use "disabled" nodes as a fallback
1268 mask = NODE_FLAGS_INACTIVE;
1271 /* since nodes only know about those public addresses that
1272 can be served by that particular node, no single node has
1273 a full list of all public addresses that exist in the cluster.
1274 Walk over all node structures and create a merged list of
1275 all public addresses that exist in the cluster.
1277 keep the tree of ips around as ctdb->ip_tree
1279 all_ips = create_merged_ip_list(ctdb);
1281 /* If we want deterministic ip allocations, i.e. that the ip addresses
1282 will always be allocated the same way for a specific set of
1283 available/unavailable nodes.
1285 if (1 == ctdb->tunable.deterministic_public_ips) {
1286 DEBUG(DEBUG_NOTICE,("Deterministic IPs enabled. Resetting all ip allocations\n"));
1287 for (i=0,tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next,i++) {
1288 tmp_ip->pnn = i%nodemap->num;
1293 /* mark all public addresses with a masked node as being served by
1296 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1297 if (tmp_ip->pnn == -1) {
1300 if (nodemap->nodes[tmp_ip->pnn].flags & mask) {
1305 /* verify that the assigned nodes can serve that public ip
1306 and set it to -1 if not
1308 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1309 if (tmp_ip->pnn == -1) {
1312 if (can_node_serve_ip(ctdb, tmp_ip->pnn, tmp_ip) != 0) {
1313 /* this node can not serve this ip. */
1319 /* now we must redistribute all public addresses with takeover node
1320 -1 among the nodes available
1324 /* loop over all ip's and find a physical node to cover for
1327 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1328 if (tmp_ip->pnn == -1) {
1329 if (find_takeover_node(ctdb, nodemap, mask, tmp_ip, all_ips)) {
1330 DEBUG(DEBUG_WARNING,("Failed to find node to cover ip %s\n",
1331 ctdb_addr_to_str(&tmp_ip->addr)));
1336 /* If we dont want ips to fail back after a node becomes healthy
1337 again, we wont even try to reallocat the ip addresses so that
1338 they are evenly spread out.
1339 This can NOT be used at the same time as DeterministicIPs !
1341 if (1 == ctdb->tunable.no_ip_failback) {
1342 if (1 == ctdb->tunable.deterministic_public_ips) {
1343 DEBUG(DEBUG_ERR, ("ERROR: You can not use 'DeterministicIPs' and 'NoIPFailback' at the same time\n"));
1349 /* now, try to make sure the ip adresses are evenly distributed
1351 for each ip address, loop over all nodes that can serve this
1352 ip and make sure that the difference between the node
1353 serving the most and the node serving the least ip's are not greater
1356 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1357 if (tmp_ip->pnn == -1) {
1361 /* Get the highest and lowest number of ips's served by any
1362 valid node which can serve this ip.
1366 for (i=0;i<nodemap->num;i++) {
1367 if (nodemap->nodes[i].flags & mask) {
1371 /* only check nodes that can actually serve this ip */
1372 if (can_node_serve_ip(ctdb, i, tmp_ip)) {
1373 /* no it couldnt so skip to the next node */
1377 num = node_ip_coverage(ctdb, i, all_ips);
1378 if (maxnode == -1) {
1387 if (minnode == -1) {
1397 if (maxnode == -1) {
1398 DEBUG(DEBUG_WARNING,(__location__ " Could not find maxnode. May not be able to serve ip '%s'\n",
1399 ctdb_addr_to_str(&tmp_ip->addr)));
1404 /* If we want deterministic IPs then dont try to reallocate
1405 them to spread out the load.
1407 if (1 == ctdb->tunable.deterministic_public_ips) {
1411 /* if the spread between the smallest and largest coverage by
1412 a node is >=2 we steal one of the ips from the node with
1413 most coverage to even things out a bit.
1414 try to do this at most 5 times since we dont want to spend
1415 too much time balancing the ip coverage.
1417 if ( (maxnum > minnum+1)
1419 struct ctdb_public_ip_list *tmp;
1421 /* mark one of maxnode's vnn's as unassigned and try
1424 for (tmp=all_ips;tmp;tmp=tmp->next) {
1425 if (tmp->pnn == maxnode) {
1435 /* finished distributing the public addresses, now just send the
1436 info out to the nodes
1440 /* at this point ->pnn is the node which will own each IP
1441 or -1 if there is no node that can cover this ip
1444 /* now tell all nodes to delete any alias that they should not
1445 have. This will be a NOOP on nodes that don't currently
1446 hold the given alias */
1447 async_data = talloc_zero(tmp_ctx, struct client_async_data);
1448 CTDB_NO_MEMORY_FATAL(ctdb, async_data);
1450 for (i=0;i<nodemap->num;i++) {
1451 /* don't talk to unconnected nodes, but do talk to banned nodes */
1452 if (nodemap->nodes[i].flags & NODE_FLAGS_DISCONNECTED) {
1456 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1457 if (tmp_ip->pnn == nodemap->nodes[i].pnn) {
1458 /* This node should be serving this
1459 vnn so dont tell it to release the ip
1463 if (tmp_ip->addr.sa.sa_family == AF_INET) {
1464 ipv4.pnn = tmp_ip->pnn;
1465 ipv4.sin = tmp_ip->addr.ip;
1467 timeout = TAKEOVER_TIMEOUT();
1468 data.dsize = sizeof(ipv4);
1469 data.dptr = (uint8_t *)&ipv4;
1470 state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
1471 0, CTDB_CONTROL_RELEASE_IPv4, 0,
1475 ip.pnn = tmp_ip->pnn;
1476 ip.addr = tmp_ip->addr;
1478 timeout = TAKEOVER_TIMEOUT();
1479 data.dsize = sizeof(ip);
1480 data.dptr = (uint8_t *)&ip;
1481 state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
1482 0, CTDB_CONTROL_RELEASE_IP, 0,
1487 if (state == NULL) {
1488 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_RELEASE_IP to node %u\n", nodemap->nodes[i].pnn));
1489 talloc_free(tmp_ctx);
1493 ctdb_client_async_add(async_data, state);
1496 if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1497 DEBUG(DEBUG_ERR,(__location__ " Async control CTDB_CONTROL_RELEASE_IP failed\n"));
1498 talloc_free(tmp_ctx);
1501 talloc_free(async_data);
1504 /* tell all nodes to get their own IPs */
1505 async_data = talloc_zero(tmp_ctx, struct client_async_data);
1506 CTDB_NO_MEMORY_FATAL(ctdb, async_data);
1507 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1508 if (tmp_ip->pnn == -1) {
1509 /* this IP won't be taken over */
1513 if (tmp_ip->addr.sa.sa_family == AF_INET) {
1514 ipv4.pnn = tmp_ip->pnn;
1515 ipv4.sin = tmp_ip->addr.ip;
1517 timeout = TAKEOVER_TIMEOUT();
1518 data.dsize = sizeof(ipv4);
1519 data.dptr = (uint8_t *)&ipv4;
1520 state = ctdb_control_send(ctdb, tmp_ip->pnn,
1521 0, CTDB_CONTROL_TAKEOVER_IPv4, 0,
1525 ip.pnn = tmp_ip->pnn;
1526 ip.addr = tmp_ip->addr;
1528 timeout = TAKEOVER_TIMEOUT();
1529 data.dsize = sizeof(ip);
1530 data.dptr = (uint8_t *)&ip;
1531 state = ctdb_control_send(ctdb, tmp_ip->pnn,
1532 0, CTDB_CONTROL_TAKEOVER_IP, 0,
1536 if (state == NULL) {
1537 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_TAKEOVER_IP to node %u\n", tmp_ip->pnn));
1538 talloc_free(tmp_ctx);
1542 ctdb_client_async_add(async_data, state);
1544 if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1545 DEBUG(DEBUG_ERR,(__location__ " Async control CTDB_CONTROL_TAKEOVER_IP failed\n"));
1546 talloc_free(tmp_ctx);
1551 /* tell all nodes to update natwg */
1552 /* send the flags update natgw on all connected nodes */
1553 data.dptr = discard_const("ipreallocated");
1554 data.dsize = strlen((char *)data.dptr) + 1;
1555 nodes = list_of_connected_nodes(ctdb, nodemap, tmp_ctx, true);
1556 if (ctdb_client_async_control(ctdb, CTDB_CONTROL_RUN_EVENTSCRIPTS,
1557 nodes, 0, TAKEOVER_TIMEOUT(),
1561 DEBUG(DEBUG_ERR, (__location__ " ctdb_control to updatenatgw failed\n"));
1564 talloc_free(tmp_ctx);
1570 destroy a ctdb_client_ip structure
1572 static int ctdb_client_ip_destructor(struct ctdb_client_ip *ip)
1574 DEBUG(DEBUG_DEBUG,("destroying client tcp for %s:%u (client_id %u)\n",
1575 ctdb_addr_to_str(&ip->addr),
1576 ntohs(ip->addr.ip.sin_port),
1579 DLIST_REMOVE(ip->ctdb->client_ip_list, ip);
1584 called by a client to inform us of a TCP connection that it is managing
1585 that should tickled with an ACK when IP takeover is done
1586 we handle both the old ipv4 style of packets as well as the new ipv4/6
1589 int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id,
1592 struct ctdb_client *client = ctdb_reqid_find(ctdb, client_id, struct ctdb_client);
1593 struct ctdb_control_tcp *old_addr = NULL;
1594 struct ctdb_control_tcp_addr new_addr;
1595 struct ctdb_control_tcp_addr *tcp_sock = NULL;
1596 struct ctdb_tcp_list *tcp;
1597 struct ctdb_tcp_connection t;
1600 struct ctdb_client_ip *ip;
1601 struct ctdb_vnn *vnn;
1602 ctdb_sock_addr addr;
1604 switch (indata.dsize) {
1605 case sizeof(struct ctdb_control_tcp):
1606 old_addr = (struct ctdb_control_tcp *)indata.dptr;
1607 ZERO_STRUCT(new_addr);
1608 tcp_sock = &new_addr;
1609 tcp_sock->src.ip = old_addr->src;
1610 tcp_sock->dest.ip = old_addr->dest;
1612 case sizeof(struct ctdb_control_tcp_addr):
1613 tcp_sock = (struct ctdb_control_tcp_addr *)indata.dptr;
1616 DEBUG(DEBUG_ERR,(__location__ " Invalid data structure passed "
1617 "to ctdb_control_tcp_client. size was %d but "
1618 "only allowed sizes are %lu and %lu\n",
1620 (long unsigned)sizeof(struct ctdb_control_tcp),
1621 (long unsigned)sizeof(struct ctdb_control_tcp_addr)));
1625 addr = tcp_sock->src;
1626 ctdb_canonicalize_ip(&addr, &tcp_sock->src);
1627 addr = tcp_sock->dest;
1628 ctdb_canonicalize_ip(&addr, &tcp_sock->dest);
1631 memcpy(&addr, &tcp_sock->dest, sizeof(addr));
1632 vnn = find_public_ip_vnn(ctdb, &addr);
1634 switch (addr.sa.sa_family) {
1636 if (ntohl(addr.ip.sin_addr.s_addr) != INADDR_LOOPBACK) {
1637 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public address.\n",
1638 ctdb_addr_to_str(&addr)));
1642 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public ipv6 address.\n",
1643 ctdb_addr_to_str(&addr)));
1646 DEBUG(DEBUG_ERR,(__location__ " Unknown family type %d\n", addr.sa.sa_family));
1652 if (vnn->pnn != ctdb->pnn) {
1653 DEBUG(DEBUG_ERR,("Attempt to register tcp client for IP %s we don't hold - failing (client_id %u pid %u)\n",
1654 ctdb_addr_to_str(&addr),
1655 client_id, client->pid));
1656 /* failing this call will tell smbd to die */
1660 ip = talloc(client, struct ctdb_client_ip);
1661 CTDB_NO_MEMORY(ctdb, ip);
1665 ip->client_id = client_id;
1666 talloc_set_destructor(ip, ctdb_client_ip_destructor);
1667 DLIST_ADD(ctdb->client_ip_list, ip);
1669 tcp = talloc(client, struct ctdb_tcp_list);
1670 CTDB_NO_MEMORY(ctdb, tcp);
1672 tcp->connection.src_addr = tcp_sock->src;
1673 tcp->connection.dst_addr = tcp_sock->dest;
1675 DLIST_ADD(client->tcp_list, tcp);
1677 t.src_addr = tcp_sock->src;
1678 t.dst_addr = tcp_sock->dest;
1680 data.dptr = (uint8_t *)&t;
1681 data.dsize = sizeof(t);
1683 switch (addr.sa.sa_family) {
1685 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1686 (unsigned)ntohs(tcp_sock->dest.ip.sin_port),
1687 ctdb_addr_to_str(&tcp_sock->src),
1688 (unsigned)ntohs(tcp_sock->src.ip.sin_port), client_id, client->pid));
1691 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1692 (unsigned)ntohs(tcp_sock->dest.ip6.sin6_port),
1693 ctdb_addr_to_str(&tcp_sock->src),
1694 (unsigned)ntohs(tcp_sock->src.ip6.sin6_port), client_id, client->pid));
1697 DEBUG(DEBUG_ERR,(__location__ " Unknown family %d\n", addr.sa.sa_family));
1701 /* tell all nodes about this tcp connection */
1702 ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0,
1703 CTDB_CONTROL_TCP_ADD,
1704 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
1706 DEBUG(DEBUG_ERR,(__location__ " Failed to send CTDB_CONTROL_TCP_ADD\n"));
1714 find a tcp address on a list
1716 static struct ctdb_tcp_connection *ctdb_tcp_find(struct ctdb_tcp_array *array,
1717 struct ctdb_tcp_connection *tcp)
1721 if (array == NULL) {
1725 for (i=0;i<array->num;i++) {
1726 if (ctdb_same_sockaddr(&array->connections[i].src_addr, &tcp->src_addr) &&
1727 ctdb_same_sockaddr(&array->connections[i].dst_addr, &tcp->dst_addr)) {
1728 return &array->connections[i];
1737 called by a daemon to inform us of a TCP connection that one of its
1738 clients managing that should tickled with an ACK when IP takeover is
1741 int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata, bool tcp_update_needed)
1743 struct ctdb_tcp_connection *p = (struct ctdb_tcp_connection *)indata.dptr;
1744 struct ctdb_tcp_array *tcparray;
1745 struct ctdb_tcp_connection tcp;
1746 struct ctdb_vnn *vnn;
1748 vnn = find_public_ip_vnn(ctdb, &p->dst_addr);
1750 DEBUG(DEBUG_INFO,(__location__ " got TCP_ADD control for an address which is not a public address '%s'\n",
1751 ctdb_addr_to_str(&p->dst_addr)));
1757 tcparray = vnn->tcp_array;
1759 /* If this is the first tickle */
1760 if (tcparray == NULL) {
1761 tcparray = talloc_size(ctdb->nodes,
1762 offsetof(struct ctdb_tcp_array, connections) +
1763 sizeof(struct ctdb_tcp_connection) * 1);
1764 CTDB_NO_MEMORY(ctdb, tcparray);
1765 vnn->tcp_array = tcparray;
1768 tcparray->connections = talloc_size(tcparray, sizeof(struct ctdb_tcp_connection));
1769 CTDB_NO_MEMORY(ctdb, tcparray->connections);
1771 tcparray->connections[tcparray->num].src_addr = p->src_addr;
1772 tcparray->connections[tcparray->num].dst_addr = p->dst_addr;
1775 if (tcp_update_needed) {
1776 vnn->tcp_update_needed = true;
1782 /* Do we already have this tickle ?*/
1783 tcp.src_addr = p->src_addr;
1784 tcp.dst_addr = p->dst_addr;
1785 if (ctdb_tcp_find(vnn->tcp_array, &tcp) != NULL) {
1786 DEBUG(DEBUG_DEBUG,("Already had tickle info for %s:%u for vnn:%u\n",
1787 ctdb_addr_to_str(&tcp.dst_addr),
1788 ntohs(tcp.dst_addr.ip.sin_port),
1793 /* A new tickle, we must add it to the array */
1794 tcparray->connections = talloc_realloc(tcparray, tcparray->connections,
1795 struct ctdb_tcp_connection,
1797 CTDB_NO_MEMORY(ctdb, tcparray->connections);
1799 vnn->tcp_array = tcparray;
1800 tcparray->connections[tcparray->num].src_addr = p->src_addr;
1801 tcparray->connections[tcparray->num].dst_addr = p->dst_addr;
1804 DEBUG(DEBUG_INFO,("Added tickle info for %s:%u from vnn %u\n",
1805 ctdb_addr_to_str(&tcp.dst_addr),
1806 ntohs(tcp.dst_addr.ip.sin_port),
1809 if (tcp_update_needed) {
1810 vnn->tcp_update_needed = true;
1818 called by a daemon to inform us of a TCP connection that one of its
1819 clients managing that should tickled with an ACK when IP takeover is
1822 static void ctdb_remove_tcp_connection(struct ctdb_context *ctdb, struct ctdb_tcp_connection *conn)
1824 struct ctdb_tcp_connection *tcpp;
1825 struct ctdb_vnn *vnn = find_public_ip_vnn(ctdb, &conn->dst_addr);
1828 DEBUG(DEBUG_ERR,(__location__ " unable to find public address %s\n",
1829 ctdb_addr_to_str(&conn->dst_addr)));
1833 /* if the array is empty we cant remove it
1834 and we dont need to do anything
1836 if (vnn->tcp_array == NULL) {
1837 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist (array is empty) %s:%u\n",
1838 ctdb_addr_to_str(&conn->dst_addr),
1839 ntohs(conn->dst_addr.ip.sin_port)));
1844 /* See if we know this connection
1845 if we dont know this connection then we dont need to do anything
1847 tcpp = ctdb_tcp_find(vnn->tcp_array, conn);
1849 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist %s:%u\n",
1850 ctdb_addr_to_str(&conn->dst_addr),
1851 ntohs(conn->dst_addr.ip.sin_port)));
1856 /* We need to remove this entry from the array.
1857 Instead of allocating a new array and copying data to it
1858 we cheat and just copy the last entry in the existing array
1859 to the entry that is to be removed and just shring the
1862 *tcpp = vnn->tcp_array->connections[vnn->tcp_array->num - 1];
1863 vnn->tcp_array->num--;
1865 /* If we deleted the last entry we also need to remove the entire array
1867 if (vnn->tcp_array->num == 0) {
1868 talloc_free(vnn->tcp_array);
1869 vnn->tcp_array = NULL;
1872 vnn->tcp_update_needed = true;
1874 DEBUG(DEBUG_INFO,("Removed tickle info for %s:%u\n",
1875 ctdb_addr_to_str(&conn->src_addr),
1876 ntohs(conn->src_addr.ip.sin_port)));
1881 called by a daemon to inform us of a TCP connection that one of its
1882 clients used are no longer needed in the tickle database
1884 int32_t ctdb_control_tcp_remove(struct ctdb_context *ctdb, TDB_DATA indata)
1886 struct ctdb_tcp_connection *conn = (struct ctdb_tcp_connection *)indata.dptr;
1888 ctdb_remove_tcp_connection(ctdb, conn);
1895 called when a daemon restarts - send all tickes for all public addresses
1896 we are serving immediately to the new node.
1898 int32_t ctdb_control_startup(struct ctdb_context *ctdb, uint32_t vnn)
1900 /*XXX here we should send all tickes we are serving to the new node */
1906 called when a client structure goes away - hook to remove
1907 elements from the tcp_list in all daemons
1909 void ctdb_takeover_client_destructor_hook(struct ctdb_client *client)
1911 while (client->tcp_list) {
1912 struct ctdb_tcp_list *tcp = client->tcp_list;
1913 DLIST_REMOVE(client->tcp_list, tcp);
1914 ctdb_remove_tcp_connection(client->ctdb, &tcp->connection);
1920 release all IPs on shutdown
1922 void ctdb_release_all_ips(struct ctdb_context *ctdb)
1924 struct ctdb_vnn *vnn;
1926 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1927 if (!ctdb_sys_have_ip(&vnn->public_address)) {
1928 ctdb_vnn_unassign_iface(ctdb, vnn);
1934 ctdb_event_script_args(ctdb, CTDB_EVENT_RELEASE_IP, "%s %s %u",
1935 ctdb_vnn_iface_string(vnn),
1936 ctdb_addr_to_str(&vnn->public_address),
1937 vnn->public_netmask_bits);
1938 release_kill_clients(ctdb, &vnn->public_address);
1939 ctdb_vnn_unassign_iface(ctdb, vnn);
1945 get list of public IPs
1947 int32_t ctdb_control_get_public_ips(struct ctdb_context *ctdb,
1948 struct ctdb_req_control *c, TDB_DATA *outdata)
1951 struct ctdb_all_public_ips *ips;
1952 struct ctdb_vnn *vnn;
1953 bool only_available = false;
1955 if (c->flags & CTDB_PUBLIC_IP_FLAGS_ONLY_AVAILABLE) {
1956 only_available = true;
1959 /* count how many public ip structures we have */
1961 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1965 len = offsetof(struct ctdb_all_public_ips, ips) +
1966 num*sizeof(struct ctdb_public_ip);
1967 ips = talloc_zero_size(outdata, len);
1968 CTDB_NO_MEMORY(ctdb, ips);
1971 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1972 if (only_available && !ctdb_vnn_available(ctdb, vnn)) {
1975 ips->ips[i].pnn = vnn->pnn;
1976 ips->ips[i].addr = vnn->public_address;
1980 len = offsetof(struct ctdb_all_public_ips, ips) +
1981 i*sizeof(struct ctdb_public_ip);
1983 outdata->dsize = len;
1984 outdata->dptr = (uint8_t *)ips;
1991 get list of public IPs, old ipv4 style. only returns ipv4 addresses
1993 int32_t ctdb_control_get_public_ipsv4(struct ctdb_context *ctdb,
1994 struct ctdb_req_control *c, TDB_DATA *outdata)
1997 struct ctdb_all_public_ipsv4 *ips;
1998 struct ctdb_vnn *vnn;
2000 /* count how many public ip structures we have */
2002 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2003 if (vnn->public_address.sa.sa_family != AF_INET) {
2009 len = offsetof(struct ctdb_all_public_ipsv4, ips) +
2010 num*sizeof(struct ctdb_public_ipv4);
2011 ips = talloc_zero_size(outdata, len);
2012 CTDB_NO_MEMORY(ctdb, ips);
2014 outdata->dsize = len;
2015 outdata->dptr = (uint8_t *)ips;
2019 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2020 if (vnn->public_address.sa.sa_family != AF_INET) {
2023 ips->ips[i].pnn = vnn->pnn;
2024 ips->ips[i].sin = vnn->public_address.ip;
2031 int32_t ctdb_control_get_public_ip_info(struct ctdb_context *ctdb,
2032 struct ctdb_req_control *c,
2037 ctdb_sock_addr *addr;
2038 struct ctdb_control_public_ip_info *info;
2039 struct ctdb_vnn *vnn;
2041 addr = (ctdb_sock_addr *)indata.dptr;
2043 vnn = find_public_ip_vnn(ctdb, addr);
2045 /* if it is not a public ip it could be our 'single ip' */
2046 if (ctdb->single_ip_vnn) {
2047 if (ctdb_same_ip(&ctdb->single_ip_vnn->public_address, addr)) {
2048 vnn = ctdb->single_ip_vnn;
2053 DEBUG(DEBUG_ERR,(__location__ " Could not get public ip info, "
2054 "'%s'not a public address\n",
2055 ctdb_addr_to_str(addr)));
2059 /* count how many public ip structures we have */
2061 for (;vnn->ifaces[num];) {
2065 len = offsetof(struct ctdb_control_public_ip_info, ifaces) +
2066 num*sizeof(struct ctdb_control_iface_info);
2067 info = talloc_zero_size(outdata, len);
2068 CTDB_NO_MEMORY(ctdb, info);
2070 info->ip.addr = vnn->public_address;
2071 info->ip.pnn = vnn->pnn;
2072 info->active_idx = 0xFFFFFFFF;
2074 for (i=0; vnn->ifaces[i]; i++) {
2075 struct ctdb_iface *cur;
2077 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
2079 DEBUG(DEBUG_CRIT, (__location__ " internal error iface[%s] unknown\n",
2083 if (vnn->iface == cur) {
2084 info->active_idx = i;
2086 strcpy(info->ifaces[i].name, cur->name);
2087 info->ifaces[i].link_state = cur->link_up;
2088 info->ifaces[i].references = cur->references;
2091 len = offsetof(struct ctdb_control_public_ip_info, ifaces) +
2092 i*sizeof(struct ctdb_control_iface_info);
2094 outdata->dsize = len;
2095 outdata->dptr = (uint8_t *)info;
2100 int32_t ctdb_control_get_ifaces(struct ctdb_context *ctdb,
2101 struct ctdb_req_control *c,
2105 struct ctdb_control_get_ifaces *ifaces;
2106 struct ctdb_iface *cur;
2108 /* count how many public ip structures we have */
2110 for (cur=ctdb->ifaces;cur;cur=cur->next) {
2114 len = offsetof(struct ctdb_control_get_ifaces, ifaces) +
2115 num*sizeof(struct ctdb_control_iface_info);
2116 ifaces = talloc_zero_size(outdata, len);
2117 CTDB_NO_MEMORY(ctdb, ifaces);
2120 for (cur=ctdb->ifaces;cur;cur=cur->next) {
2121 strcpy(ifaces->ifaces[i].name, cur->name);
2122 ifaces->ifaces[i].link_state = cur->link_up;
2123 ifaces->ifaces[i].references = cur->references;
2127 len = offsetof(struct ctdb_control_get_ifaces, ifaces) +
2128 i*sizeof(struct ctdb_control_iface_info);
2130 outdata->dsize = len;
2131 outdata->dptr = (uint8_t *)ifaces;
2136 int32_t ctdb_control_set_iface_link(struct ctdb_context *ctdb,
2137 struct ctdb_req_control *c,
2140 struct ctdb_control_iface_info *info;
2141 struct ctdb_iface *iface;
2142 bool link_up = false;
2144 info = (struct ctdb_control_iface_info *)indata.dptr;
2146 if (info->name[CTDB_IFACE_SIZE] != '\0') {
2147 int len = strnlen(info->name, CTDB_IFACE_SIZE);
2148 DEBUG(DEBUG_ERR, (__location__ " name[%*.*s] not terminated\n",
2149 len, len, info->name));
2153 switch (info->link_state) {
2161 DEBUG(DEBUG_ERR, (__location__ " link_state[%u] invalid\n",
2162 (unsigned int)info->link_state));
2166 if (info->references != 0) {
2167 DEBUG(DEBUG_ERR, (__location__ " references[%u] should be 0\n",
2168 (unsigned int)info->references));
2172 iface = ctdb_find_iface(ctdb, info->name);
2173 if (iface == NULL) {
2174 DEBUG(DEBUG_ERR, (__location__ "iface[%s] is unknown\n",
2179 if (link_up == iface->link_up) {
2183 DEBUG(iface->link_up?DEBUG_ERR:DEBUG_NOTICE,
2184 ("iface[%s] has changed it's link status %s => %s\n",
2186 iface->link_up?"up":"down",
2187 link_up?"up":"down"));
2189 iface->link_up = link_up;
2195 structure containing the listening socket and the list of tcp connections
2196 that the ctdb daemon is to kill
2198 struct ctdb_kill_tcp {
2199 struct ctdb_vnn *vnn;
2200 struct ctdb_context *ctdb;
2202 struct fd_event *fde;
2203 trbt_tree_t *connections;
2208 a tcp connection that is to be killed
2210 struct ctdb_killtcp_con {
2211 ctdb_sock_addr src_addr;
2212 ctdb_sock_addr dst_addr;
2214 struct ctdb_kill_tcp *killtcp;
2217 /* this function is used to create a key to represent this socketpair
2218 in the killtcp tree.
2219 this key is used to insert and lookup matching socketpairs that are
2220 to be tickled and RST
2222 #define KILLTCP_KEYLEN 10
2223 static uint32_t *killtcp_key(ctdb_sock_addr *src, ctdb_sock_addr *dst)
2225 static uint32_t key[KILLTCP_KEYLEN];
2227 bzero(key, sizeof(key));
2229 if (src->sa.sa_family != dst->sa.sa_family) {
2230 DEBUG(DEBUG_ERR, (__location__ " ERROR, different families passed :%u vs %u\n", src->sa.sa_family, dst->sa.sa_family));
2234 switch (src->sa.sa_family) {
2236 key[0] = dst->ip.sin_addr.s_addr;
2237 key[1] = src->ip.sin_addr.s_addr;
2238 key[2] = dst->ip.sin_port;
2239 key[3] = src->ip.sin_port;
2242 key[0] = dst->ip6.sin6_addr.s6_addr32[3];
2243 key[1] = src->ip6.sin6_addr.s6_addr32[3];
2244 key[2] = dst->ip6.sin6_addr.s6_addr32[2];
2245 key[3] = src->ip6.sin6_addr.s6_addr32[2];
2246 key[4] = dst->ip6.sin6_addr.s6_addr32[1];
2247 key[5] = src->ip6.sin6_addr.s6_addr32[1];
2248 key[6] = dst->ip6.sin6_addr.s6_addr32[0];
2249 key[7] = src->ip6.sin6_addr.s6_addr32[0];
2250 key[8] = dst->ip6.sin6_port;
2251 key[9] = src->ip6.sin6_port;
2254 DEBUG(DEBUG_ERR, (__location__ " ERROR, unknown family passed :%u\n", src->sa.sa_family));
2262 called when we get a read event on the raw socket
2264 static void capture_tcp_handler(struct event_context *ev, struct fd_event *fde,
2265 uint16_t flags, void *private_data)
2267 struct ctdb_kill_tcp *killtcp = talloc_get_type(private_data, struct ctdb_kill_tcp);
2268 struct ctdb_killtcp_con *con;
2269 ctdb_sock_addr src, dst;
2270 uint32_t ack_seq, seq;
2272 if (!(flags & EVENT_FD_READ)) {
2276 if (ctdb_sys_read_tcp_packet(killtcp->capture_fd,
2277 killtcp->private_data,
2279 &ack_seq, &seq) != 0) {
2280 /* probably a non-tcp ACK packet */
2284 /* check if we have this guy in our list of connections
2287 con = trbt_lookuparray32(killtcp->connections,
2288 KILLTCP_KEYLEN, killtcp_key(&src, &dst));
2290 /* no this was some other packet we can just ignore */
2294 /* This one has been tickled !
2295 now reset him and remove him from the list.
2297 DEBUG(DEBUG_INFO, ("sending a tcp reset to kill connection :%d -> %s:%d\n",
2298 ntohs(con->dst_addr.ip.sin_port),
2299 ctdb_addr_to_str(&con->src_addr),
2300 ntohs(con->src_addr.ip.sin_port)));
2302 ctdb_sys_send_tcp(&con->dst_addr, &con->src_addr, ack_seq, seq, 1);
2307 /* when traversing the list of all tcp connections to send tickle acks to
2308 (so that we can capture the ack coming back and kill the connection
2310 this callback is called for each connection we are currently trying to kill
2312 static void tickle_connection_traverse(void *param, void *data)
2314 struct ctdb_killtcp_con *con = talloc_get_type(data, struct ctdb_killtcp_con);
2316 /* have tried too many times, just give up */
2317 if (con->count >= 5) {
2318 /* can't delete in traverse: reparent to delete_cons */
2319 talloc_steal(param, con);
2323 /* othervise, try tickling it again */
2326 (ctdb_sock_addr *)&con->dst_addr,
2327 (ctdb_sock_addr *)&con->src_addr,
2333 called every second until all sentenced connections have been reset
2335 static void ctdb_tickle_sentenced_connections(struct event_context *ev, struct timed_event *te,
2336 struct timeval t, void *private_data)
2338 struct ctdb_kill_tcp *killtcp = talloc_get_type(private_data, struct ctdb_kill_tcp);
2339 void *delete_cons = talloc_new(NULL);
2341 /* loop over all connections sending tickle ACKs */
2342 trbt_traversearray32(killtcp->connections, KILLTCP_KEYLEN, tickle_connection_traverse, delete_cons);
2344 /* now we've finished traverse, it's safe to do deletion. */
2345 talloc_free(delete_cons);
2347 /* If there are no more connections to kill we can remove the
2348 entire killtcp structure
2350 if ( (killtcp->connections == NULL) ||
2351 (killtcp->connections->root == NULL) ) {
2352 talloc_free(killtcp);
2356 /* try tickling them again in a seconds time
2358 event_add_timed(killtcp->ctdb->ev, killtcp, timeval_current_ofs(1, 0),
2359 ctdb_tickle_sentenced_connections, killtcp);
2363 destroy the killtcp structure
2365 static int ctdb_killtcp_destructor(struct ctdb_kill_tcp *killtcp)
2368 killtcp->vnn->killtcp = NULL;
2374 /* nothing fancy here, just unconditionally replace any existing
2375 connection structure with the new one.
2377 dont even free the old one if it did exist, that one is talloc_stolen
2378 by the same node in the tree anyway and will be deleted when the new data
2381 static void *add_killtcp_callback(void *parm, void *data)
2387 add a tcp socket to the list of connections we want to RST
2389 static int ctdb_killtcp_add_connection(struct ctdb_context *ctdb,
2393 ctdb_sock_addr src, dst;
2394 struct ctdb_kill_tcp *killtcp;
2395 struct ctdb_killtcp_con *con;
2396 struct ctdb_vnn *vnn;
2398 ctdb_canonicalize_ip(s, &src);
2399 ctdb_canonicalize_ip(d, &dst);
2401 vnn = find_public_ip_vnn(ctdb, &dst);
2403 vnn = find_public_ip_vnn(ctdb, &src);
2406 /* if it is not a public ip it could be our 'single ip' */
2407 if (ctdb->single_ip_vnn) {
2408 if (ctdb_same_ip(&ctdb->single_ip_vnn->public_address, &dst)) {
2409 vnn = ctdb->single_ip_vnn;
2414 DEBUG(DEBUG_ERR,(__location__ " Could not killtcp, not a public address\n"));
2418 killtcp = vnn->killtcp;
2420 /* If this is the first connection to kill we must allocate
2423 if (killtcp == NULL) {
2424 killtcp = talloc_zero(ctdb, struct ctdb_kill_tcp);
2425 CTDB_NO_MEMORY(ctdb, killtcp);
2428 killtcp->ctdb = ctdb;
2429 killtcp->capture_fd = -1;
2430 killtcp->connections = trbt_create(killtcp, 0);
2432 vnn->killtcp = killtcp;
2433 talloc_set_destructor(killtcp, ctdb_killtcp_destructor);
2438 /* create a structure that describes this connection we want to
2439 RST and store it in killtcp->connections
2441 con = talloc(killtcp, struct ctdb_killtcp_con);
2442 CTDB_NO_MEMORY(ctdb, con);
2443 con->src_addr = src;
2444 con->dst_addr = dst;
2446 con->killtcp = killtcp;
2449 trbt_insertarray32_callback(killtcp->connections,
2450 KILLTCP_KEYLEN, killtcp_key(&con->dst_addr, &con->src_addr),
2451 add_killtcp_callback, con);
2454 If we dont have a socket to listen on yet we must create it
2456 if (killtcp->capture_fd == -1) {
2457 const char *iface = ctdb_vnn_iface_string(vnn);
2458 killtcp->capture_fd = ctdb_sys_open_capture_socket(iface, &killtcp->private_data);
2459 if (killtcp->capture_fd == -1) {
2460 DEBUG(DEBUG_CRIT,(__location__ " Failed to open capturing "
2461 "socket on iface '%s' for killtcp (%s)\n",
2462 iface, strerror(errno)));
2468 if (killtcp->fde == NULL) {
2469 killtcp->fde = event_add_fd(ctdb->ev, killtcp, killtcp->capture_fd,
2471 capture_tcp_handler, killtcp);
2472 tevent_fd_set_auto_close(killtcp->fde);
2474 /* We also need to set up some events to tickle all these connections
2475 until they are all reset
2477 event_add_timed(ctdb->ev, killtcp, timeval_current_ofs(1, 0),
2478 ctdb_tickle_sentenced_connections, killtcp);
2481 /* tickle him once now */
2490 talloc_free(vnn->killtcp);
2491 vnn->killtcp = NULL;
2496 kill a TCP connection.
2498 int32_t ctdb_control_kill_tcp(struct ctdb_context *ctdb, TDB_DATA indata)
2500 struct ctdb_control_killtcp *killtcp = (struct ctdb_control_killtcp *)indata.dptr;
2502 return ctdb_killtcp_add_connection(ctdb, &killtcp->src_addr, &killtcp->dst_addr);
2506 called by a daemon to inform us of the entire list of TCP tickles for
2507 a particular public address.
2508 this control should only be sent by the node that is currently serving
2509 that public address.
2511 int32_t ctdb_control_set_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata)
2513 struct ctdb_control_tcp_tickle_list *list = (struct ctdb_control_tcp_tickle_list *)indata.dptr;
2514 struct ctdb_tcp_array *tcparray;
2515 struct ctdb_vnn *vnn;
2517 /* We must at least have tickles.num or else we cant verify the size
2518 of the received data blob
2520 if (indata.dsize < offsetof(struct ctdb_control_tcp_tickle_list,
2521 tickles.connections)) {
2522 DEBUG(DEBUG_ERR,("Bad indata in ctdb_control_set_tcp_tickle_list. Not enough data for the tickle.num field\n"));
2526 /* verify that the size of data matches what we expect */
2527 if (indata.dsize < offsetof(struct ctdb_control_tcp_tickle_list,
2528 tickles.connections)
2529 + sizeof(struct ctdb_tcp_connection)
2530 * list->tickles.num) {
2531 DEBUG(DEBUG_ERR,("Bad indata in ctdb_control_set_tcp_tickle_list\n"));
2535 vnn = find_public_ip_vnn(ctdb, &list->addr);
2537 DEBUG(DEBUG_INFO,(__location__ " Could not set tcp tickle list, '%s' is not a public address\n",
2538 ctdb_addr_to_str(&list->addr)));
2543 /* remove any old ticklelist we might have */
2544 talloc_free(vnn->tcp_array);
2545 vnn->tcp_array = NULL;
2547 tcparray = talloc(ctdb->nodes, struct ctdb_tcp_array);
2548 CTDB_NO_MEMORY(ctdb, tcparray);
2550 tcparray->num = list->tickles.num;
2552 tcparray->connections = talloc_array(tcparray, struct ctdb_tcp_connection, tcparray->num);
2553 CTDB_NO_MEMORY(ctdb, tcparray->connections);
2555 memcpy(tcparray->connections, &list->tickles.connections[0],
2556 sizeof(struct ctdb_tcp_connection)*tcparray->num);
2558 /* We now have a new fresh tickle list array for this vnn */
2559 vnn->tcp_array = talloc_steal(vnn, tcparray);
2565 called to return the full list of tickles for the puclic address associated
2566 with the provided vnn
2568 int32_t ctdb_control_get_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata)
2570 ctdb_sock_addr *addr = (ctdb_sock_addr *)indata.dptr;
2571 struct ctdb_control_tcp_tickle_list *list;
2572 struct ctdb_tcp_array *tcparray;
2574 struct ctdb_vnn *vnn;
2576 vnn = find_public_ip_vnn(ctdb, addr);
2578 DEBUG(DEBUG_ERR,(__location__ " Could not get tcp tickle list, '%s' is not a public address\n",
2579 ctdb_addr_to_str(addr)));
2584 tcparray = vnn->tcp_array;
2586 num = tcparray->num;
2591 outdata->dsize = offsetof(struct ctdb_control_tcp_tickle_list,
2592 tickles.connections)
2593 + sizeof(struct ctdb_tcp_connection) * num;
2595 outdata->dptr = talloc_size(outdata, outdata->dsize);
2596 CTDB_NO_MEMORY(ctdb, outdata->dptr);
2597 list = (struct ctdb_control_tcp_tickle_list *)outdata->dptr;
2600 list->tickles.num = num;
2602 memcpy(&list->tickles.connections[0], tcparray->connections,
2603 sizeof(struct ctdb_tcp_connection) * num);
2611 set the list of all tcp tickles for a public address
2613 static int ctdb_ctrl_set_tcp_tickles(struct ctdb_context *ctdb,
2614 struct timeval timeout, uint32_t destnode,
2615 ctdb_sock_addr *addr,
2616 struct ctdb_tcp_array *tcparray)
2620 struct ctdb_control_tcp_tickle_list *list;
2623 num = tcparray->num;
2628 data.dsize = offsetof(struct ctdb_control_tcp_tickle_list,
2629 tickles.connections) +
2630 sizeof(struct ctdb_tcp_connection) * num;
2631 data.dptr = talloc_size(ctdb, data.dsize);
2632 CTDB_NO_MEMORY(ctdb, data.dptr);
2634 list = (struct ctdb_control_tcp_tickle_list *)data.dptr;
2636 list->tickles.num = num;
2638 memcpy(&list->tickles.connections[0], tcparray->connections, sizeof(struct ctdb_tcp_connection) * num);
2641 ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0,
2642 CTDB_CONTROL_SET_TCP_TICKLE_LIST,
2643 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
2645 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set tcp tickles failed\n"));
2649 talloc_free(data.dptr);
2656 perform tickle updates if required
2658 static void ctdb_update_tcp_tickles(struct event_context *ev,
2659 struct timed_event *te,
2660 struct timeval t, void *private_data)
2662 struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
2664 struct ctdb_vnn *vnn;
2666 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2667 /* we only send out updates for public addresses that
2670 if (ctdb->pnn != vnn->pnn) {
2673 /* We only send out the updates if we need to */
2674 if (!vnn->tcp_update_needed) {
2677 ret = ctdb_ctrl_set_tcp_tickles(ctdb,
2679 CTDB_BROADCAST_CONNECTED,
2680 &vnn->public_address,
2683 DEBUG(DEBUG_ERR,("Failed to send the tickle update for public address %s\n",
2684 ctdb_addr_to_str(&vnn->public_address)));
2688 event_add_timed(ctdb->ev, ctdb->tickle_update_context,
2689 timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
2690 ctdb_update_tcp_tickles, ctdb);
2695 start periodic update of tcp tickles
2697 void ctdb_start_tcp_tickle_update(struct ctdb_context *ctdb)
2699 ctdb->tickle_update_context = talloc_new(ctdb);
2701 event_add_timed(ctdb->ev, ctdb->tickle_update_context,
2702 timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
2703 ctdb_update_tcp_tickles, ctdb);
2709 struct control_gratious_arp {
2710 struct ctdb_context *ctdb;
2711 ctdb_sock_addr addr;
2717 send a control_gratuitous arp
2719 static void send_gratious_arp(struct event_context *ev, struct timed_event *te,
2720 struct timeval t, void *private_data)
2723 struct control_gratious_arp *arp = talloc_get_type(private_data,
2724 struct control_gratious_arp);
2726 ret = ctdb_sys_send_arp(&arp->addr, arp->iface);
2728 DEBUG(DEBUG_ERR,(__location__ " sending of gratious arp on iface '%s' failed (%s)\n",
2729 arp->iface, strerror(errno)));
2734 if (arp->count == CTDB_ARP_REPEAT) {
2739 event_add_timed(arp->ctdb->ev, arp,
2740 timeval_current_ofs(CTDB_ARP_INTERVAL, 0),
2741 send_gratious_arp, arp);
2748 int32_t ctdb_control_send_gratious_arp(struct ctdb_context *ctdb, TDB_DATA indata)
2750 struct ctdb_control_gratious_arp *gratious_arp = (struct ctdb_control_gratious_arp *)indata.dptr;
2751 struct control_gratious_arp *arp;
2753 /* verify the size of indata */
2754 if (indata.dsize < offsetof(struct ctdb_control_gratious_arp, iface)) {
2755 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_gratious_arp structure. Got %u require %u bytes\n",
2756 (unsigned)indata.dsize,
2757 (unsigned)offsetof(struct ctdb_control_gratious_arp, iface)));
2761 ( offsetof(struct ctdb_control_gratious_arp, iface)
2762 + gratious_arp->len ) ){
2764 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2765 "but should be %u bytes\n",
2766 (unsigned)indata.dsize,
2767 (unsigned)(offsetof(struct ctdb_control_gratious_arp, iface)+gratious_arp->len)));
2772 arp = talloc(ctdb, struct control_gratious_arp);
2773 CTDB_NO_MEMORY(ctdb, arp);
2776 arp->addr = gratious_arp->addr;
2777 arp->iface = talloc_strdup(arp, gratious_arp->iface);
2778 CTDB_NO_MEMORY(ctdb, arp->iface);
2781 event_add_timed(arp->ctdb->ev, arp,
2782 timeval_zero(), send_gratious_arp, arp);
2787 int32_t ctdb_control_add_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2789 struct ctdb_control_ip_iface *pub = (struct ctdb_control_ip_iface *)indata.dptr;
2792 /* verify the size of indata */
2793 if (indata.dsize < offsetof(struct ctdb_control_ip_iface, iface)) {
2794 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_ip_iface structure\n"));
2798 ( offsetof(struct ctdb_control_ip_iface, iface)
2801 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2802 "but should be %u bytes\n",
2803 (unsigned)indata.dsize,
2804 (unsigned)(offsetof(struct ctdb_control_ip_iface, iface)+pub->len)));
2808 ret = ctdb_add_public_address(ctdb, &pub->addr, pub->mask, &pub->iface[0]);
2811 DEBUG(DEBUG_ERR,(__location__ " Failed to add public address\n"));
2819 called when releaseip event finishes for del_public_address
2821 static void delete_ip_callback(struct ctdb_context *ctdb, int status,
2824 talloc_free(private_data);
2827 int32_t ctdb_control_del_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2829 struct ctdb_control_ip_iface *pub = (struct ctdb_control_ip_iface *)indata.dptr;
2830 struct ctdb_vnn *vnn;
2833 /* verify the size of indata */
2834 if (indata.dsize < offsetof(struct ctdb_control_ip_iface, iface)) {
2835 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_ip_iface structure\n"));
2839 ( offsetof(struct ctdb_control_ip_iface, iface)
2842 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2843 "but should be %u bytes\n",
2844 (unsigned)indata.dsize,
2845 (unsigned)(offsetof(struct ctdb_control_ip_iface, iface)+pub->len)));
2849 /* walk over all public addresses until we find a match */
2850 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2851 if (ctdb_same_ip(&vnn->public_address, &pub->addr)) {
2852 TALLOC_CTX *mem_ctx;
2854 DLIST_REMOVE(ctdb->vnn, vnn);
2855 if (vnn->iface == NULL) {
2860 mem_ctx = talloc_new(ctdb);
2861 ret = ctdb_event_script_callback(ctdb,
2862 mem_ctx, delete_ip_callback, mem_ctx,
2864 CTDB_EVENT_RELEASE_IP,
2866 ctdb_vnn_iface_string(vnn),
2867 ctdb_addr_to_str(&vnn->public_address),
2868 vnn->public_netmask_bits);
2869 ctdb_vnn_unassign_iface(ctdb, vnn);
2881 /* This function is called from the recovery daemon to verify that a remote
2882 node has the expected ip allocation.
2883 This is verified against ctdb->ip_tree
2885 int verify_remote_ip_allocation(struct ctdb_context *ctdb, struct ctdb_all_public_ips *ips)
2887 struct ctdb_public_ip_list *tmp_ip;
2890 if (ctdb->ip_tree == NULL) {
2891 /* dont know the expected allocation yet, assume remote node
2900 for (i=0; i<ips->num; i++) {
2901 tmp_ip = trbt_lookuparray32(ctdb->ip_tree, IP_KEYLEN, ip_key(&ips->ips[i].addr));
2902 if (tmp_ip == NULL) {
2903 DEBUG(DEBUG_ERR,(__location__ " Could not find host for address %s, reassign ips\n", ctdb_addr_to_str(&ips->ips[i].addr)));
2907 if (tmp_ip->pnn == -1 || ips->ips[i].pnn == -1) {
2911 if (tmp_ip->pnn != ips->ips[i].pnn) {
2912 DEBUG(DEBUG_ERR,("Inconsistent ip allocation. Trigger reallocation. Thinks %s is held by node %u while it is held by node %u\n", ctdb_addr_to_str(&ips->ips[i].addr), ips->ips[i].pnn, tmp_ip->pnn));
2920 int update_ip_assignment_tree(struct ctdb_context *ctdb, struct ctdb_public_ip *ip)
2922 struct ctdb_public_ip_list *tmp_ip;
2924 if (ctdb->ip_tree == NULL) {
2925 DEBUG(DEBUG_ERR,("No ctdb->ip_tree yet. Failed to update ip assignment\n"));
2929 tmp_ip = trbt_lookuparray32(ctdb->ip_tree, IP_KEYLEN, ip_key(&ip->addr));
2930 if (tmp_ip == NULL) {
2931 DEBUG(DEBUG_ERR,(__location__ " Could not find record for address %s, update ip\n", ctdb_addr_to_str(&ip->addr)));
2935 DEBUG(DEBUG_NOTICE,("Updated ip assignment tree for ip : %s from node %u to node %u\n", ctdb_addr_to_str(&ip->addr), tmp_ip->pnn, ip->pnn));
2936 tmp_ip->pnn = ip->pnn;