4 Copyright (C) Ronnie Sahlberg 2007
5 Copyright (C) Andrew Tridgell 2007
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, see <http://www.gnu.org/licenses/>.
21 #include "lib/tevent/tevent.h"
22 #include "lib/tdb/include/tdb.h"
23 #include "lib/util/dlinklist.h"
24 #include "system/network.h"
25 #include "system/filesys.h"
26 #include "system/wait.h"
27 #include "../include/ctdb_private.h"
28 #include "../common/rb_tree.h"
31 #define TAKEOVER_TIMEOUT() timeval_current_ofs(ctdb->tunable.takeover_timeout,0)
33 #define CTDB_ARP_INTERVAL 1
34 #define CTDB_ARP_REPEAT 3
37 struct ctdb_iface *prev, *next;
43 static const char *ctdb_vnn_iface_string(const struct ctdb_vnn *vnn)
46 return vnn->iface->name;
52 static int ctdb_add_local_iface(struct ctdb_context *ctdb, const char *iface)
56 /* Verify that we dont have an entry for this ip yet */
57 for (i=ctdb->ifaces;i;i=i->next) {
58 if (strcmp(i->name, iface) == 0) {
63 /* create a new structure for this interface */
64 i = talloc_zero(ctdb, struct ctdb_iface);
65 CTDB_NO_MEMORY_FATAL(ctdb, i);
66 i->name = talloc_strdup(i, iface);
67 CTDB_NO_MEMORY(ctdb, i->name);
70 DLIST_ADD(ctdb->ifaces, i);
75 static struct ctdb_iface *ctdb_find_iface(struct ctdb_context *ctdb,
80 /* Verify that we dont have an entry for this ip yet */
81 for (i=ctdb->ifaces;i;i=i->next) {
82 if (strcmp(i->name, iface) == 0) {
90 static struct ctdb_iface *ctdb_vnn_best_iface(struct ctdb_context *ctdb,
94 struct ctdb_iface *cur = NULL;
95 struct ctdb_iface *best = NULL;
97 for (i=0; vnn->ifaces[i]; i++) {
99 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
113 if (cur->references < best->references) {
122 static int32_t ctdb_vnn_assign_iface(struct ctdb_context *ctdb,
123 struct ctdb_vnn *vnn)
125 struct ctdb_iface *best = NULL;
128 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
129 "still assigned to iface '%s'\n",
130 ctdb_addr_to_str(&vnn->public_address),
131 ctdb_vnn_iface_string(vnn)));
135 best = ctdb_vnn_best_iface(ctdb, vnn);
137 DEBUG(DEBUG_ERR, (__location__ " public address '%s' "
138 "cannot assign to iface any iface\n",
139 ctdb_addr_to_str(&vnn->public_address)));
145 vnn->pnn = ctdb->pnn;
147 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
148 "now assigned to iface '%s' refs[%d]\n",
149 ctdb_addr_to_str(&vnn->public_address),
150 ctdb_vnn_iface_string(vnn),
155 static void ctdb_vnn_unassign_iface(struct ctdb_context *ctdb,
156 struct ctdb_vnn *vnn)
158 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
159 "now unassigned (old iface '%s' refs[%d])\n",
160 ctdb_addr_to_str(&vnn->public_address),
161 ctdb_vnn_iface_string(vnn),
162 vnn->iface?vnn->iface->references:0));
164 vnn->iface->references--;
167 if (vnn->pnn == ctdb->pnn) {
172 static bool ctdb_vnn_available(struct ctdb_context *ctdb,
173 struct ctdb_vnn *vnn)
177 if (vnn->iface && vnn->iface->link_up) {
181 for (i=0; vnn->ifaces[i]; i++) {
182 struct ctdb_iface *cur;
184 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
197 struct ctdb_takeover_arp {
198 struct ctdb_context *ctdb;
201 struct ctdb_tcp_array *tcparray;
202 struct ctdb_vnn *vnn;
207 lists of tcp endpoints
209 struct ctdb_tcp_list {
210 struct ctdb_tcp_list *prev, *next;
211 struct ctdb_tcp_connection connection;
215 list of clients to kill on IP release
217 struct ctdb_client_ip {
218 struct ctdb_client_ip *prev, *next;
219 struct ctdb_context *ctdb;
226 send a gratuitous arp
228 static void ctdb_control_send_arp(struct event_context *ev, struct timed_event *te,
229 struct timeval t, void *private_data)
231 struct ctdb_takeover_arp *arp = talloc_get_type(private_data,
232 struct ctdb_takeover_arp);
234 struct ctdb_tcp_array *tcparray;
235 const char *iface = ctdb_vnn_iface_string(arp->vnn);
237 ret = ctdb_sys_send_arp(&arp->addr, iface);
239 DEBUG(DEBUG_CRIT,(__location__ " sending of arp failed on iface '%s' (%s)\n",
240 iface, strerror(errno)));
243 tcparray = arp->tcparray;
245 for (i=0;i<tcparray->num;i++) {
246 struct ctdb_tcp_connection *tcon;
248 tcon = &tcparray->connections[i];
249 DEBUG(DEBUG_INFO,("sending tcp tickle ack for %u->%s:%u\n",
250 (unsigned)ntohs(tcon->dst_addr.ip.sin_port),
251 ctdb_addr_to_str(&tcon->src_addr),
252 (unsigned)ntohs(tcon->src_addr.ip.sin_port)));
253 ret = ctdb_sys_send_tcp(
258 DEBUG(DEBUG_CRIT,(__location__ " Failed to send tcp tickle ack for %s\n",
259 ctdb_addr_to_str(&tcon->src_addr)));
266 if (arp->count == CTDB_ARP_REPEAT) {
271 event_add_timed(arp->ctdb->ev, arp->vnn->takeover_ctx,
272 timeval_current_ofs(CTDB_ARP_INTERVAL, 100000),
273 ctdb_control_send_arp, arp);
276 static int32_t ctdb_announce_vnn_iface(struct ctdb_context *ctdb,
277 struct ctdb_vnn *vnn)
279 struct ctdb_takeover_arp *arp;
280 struct ctdb_tcp_array *tcparray;
282 if (!vnn->takeover_ctx) {
283 vnn->takeover_ctx = talloc_new(vnn);
284 if (!vnn->takeover_ctx) {
289 arp = talloc_zero(vnn->takeover_ctx, struct ctdb_takeover_arp);
295 arp->addr = vnn->public_address;
298 tcparray = vnn->tcp_array;
300 /* add all of the known tcp connections for this IP to the
301 list of tcp connections to send tickle acks for */
302 arp->tcparray = talloc_steal(arp, tcparray);
304 vnn->tcp_array = NULL;
305 vnn->tcp_update_needed = true;
308 event_add_timed(arp->ctdb->ev, vnn->takeover_ctx,
309 timeval_zero(), ctdb_control_send_arp, arp);
314 struct takeover_callback_state {
315 struct ctdb_req_control *c;
316 ctdb_sock_addr *addr;
317 struct ctdb_vnn *vnn;
320 struct ctdb_do_takeip_state {
321 struct ctdb_req_control *c;
322 struct ctdb_vnn *vnn;
326 called when takeip event finishes
328 static void ctdb_do_takeip_callback(struct ctdb_context *ctdb, int status,
331 struct ctdb_do_takeip_state *state =
332 talloc_get_type(private_data, struct ctdb_do_takeip_state);
337 struct ctdb_node *node = ctdb->nodes[ctdb->pnn];
339 if (status == -ETIME) {
342 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
343 ctdb_addr_to_str(&state->vnn->public_address),
344 ctdb_vnn_iface_string(state->vnn)));
345 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
347 node->flags |= NODE_FLAGS_UNHEALTHY;
352 ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
354 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
359 data.dptr = (uint8_t *)ctdb_addr_to_str(&state->vnn->public_address);
360 data.dsize = strlen((char *)data.dptr) + 1;
361 DEBUG(DEBUG_INFO,(__location__ " sending TAKE_IP for '%s'\n", data.dptr));
363 ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_TAKE_IP, data);
366 /* the control succeeded */
367 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
373 take over an ip address
375 static int32_t ctdb_do_takeip(struct ctdb_context *ctdb,
376 struct ctdb_req_control *c,
377 struct ctdb_vnn *vnn)
380 struct ctdb_do_takeip_state *state;
382 ret = ctdb_vnn_assign_iface(ctdb, vnn);
384 DEBUG(DEBUG_ERR,("Takeover of IP %s/%u failed to "
385 "assin a usable interface\n",
386 ctdb_addr_to_str(&vnn->public_address),
387 vnn->public_netmask_bits));
391 state = talloc(vnn, struct ctdb_do_takeip_state);
392 CTDB_NO_MEMORY(ctdb, state);
394 state->c = talloc_steal(ctdb, c);
397 DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u on interface %s\n",
398 ctdb_addr_to_str(&vnn->public_address),
399 vnn->public_netmask_bits,
400 ctdb_vnn_iface_string(vnn)));
402 ret = ctdb_event_script_callback(ctdb,
404 ctdb_do_takeip_callback,
409 ctdb_vnn_iface_string(vnn),
410 ctdb_addr_to_str(&vnn->public_address),
411 vnn->public_netmask_bits);
414 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
415 ctdb_addr_to_str(&vnn->public_address),
416 ctdb_vnn_iface_string(vnn)));
424 struct ctdb_do_updateip_state {
425 struct ctdb_req_control *c;
426 struct ctdb_iface *old;
427 struct ctdb_vnn *vnn;
431 called when updateip event finishes
433 static void ctdb_do_updateip_callback(struct ctdb_context *ctdb, int status,
436 struct ctdb_do_updateip_state *state =
437 talloc_get_type(private_data, struct ctdb_do_updateip_state);
441 if (status == -ETIME) {
444 DEBUG(DEBUG_ERR,(__location__ " Failed to move IP %s from interface %s to %s\n",
445 ctdb_addr_to_str(&state->vnn->public_address),
447 ctdb_vnn_iface_string(state->vnn)));
450 * All we can do is reset the old interface
451 * and let the next run fix it
453 ctdb_vnn_unassign_iface(ctdb, state->vnn);
454 state->vnn->iface = state->old;
455 state->vnn->iface->references++;
457 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
462 ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
464 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
469 /* the control succeeded */
470 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
476 update (move) an ip address
478 static int32_t ctdb_do_updateip(struct ctdb_context *ctdb,
479 struct ctdb_req_control *c,
480 struct ctdb_vnn *vnn)
483 struct ctdb_do_updateip_state *state;
484 struct ctdb_iface *old = vnn->iface;
487 ctdb_vnn_unassign_iface(ctdb, vnn);
488 ret = ctdb_vnn_assign_iface(ctdb, vnn);
490 DEBUG(DEBUG_ERR,("update of IP %s/%u failed to "
491 "assin a usable interface (old iface '%s')\n",
492 ctdb_addr_to_str(&vnn->public_address),
493 vnn->public_netmask_bits,
498 new_name = ctdb_vnn_iface_string(vnn);
499 if (old->name != NULL && new_name != NULL && !strcmp(old->name, new_name)) {
500 /* A benign update from one interface onto itself.
501 * no need to run the eventscripts in this case, just return
504 ctdb_request_control_reply(ctdb, c, NULL, 0, NULL);
508 state = talloc(vnn, struct ctdb_do_updateip_state);
509 CTDB_NO_MEMORY(ctdb, state);
511 state->c = talloc_steal(ctdb, c);
515 DEBUG(DEBUG_NOTICE,("Update of IP %s/%u from "
516 "interface %s to %s\n",
517 ctdb_addr_to_str(&vnn->public_address),
518 vnn->public_netmask_bits,
522 ret = ctdb_event_script_callback(ctdb,
524 ctdb_do_updateip_callback,
527 CTDB_EVENT_UPDATE_IP,
531 ctdb_addr_to_str(&vnn->public_address),
532 vnn->public_netmask_bits);
534 DEBUG(DEBUG_ERR,(__location__ " Failed update IP %s from interface %s to %s\n",
535 ctdb_addr_to_str(&vnn->public_address),
536 old->name, new_name));
545 Find the vnn of the node that has a public ip address
546 returns -1 if the address is not known as a public address
548 static struct ctdb_vnn *find_public_ip_vnn(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
550 struct ctdb_vnn *vnn;
552 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
553 if (ctdb_same_ip(&vnn->public_address, addr)) {
562 take over an ip address
564 int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb,
565 struct ctdb_req_control *c,
570 struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
571 struct ctdb_vnn *vnn;
572 bool have_ip = false;
573 bool do_updateip = false;
574 bool do_takeip = false;
575 struct ctdb_iface *best_iface = NULL;
577 if (pip->pnn != ctdb->pnn) {
578 DEBUG(DEBUG_ERR,(__location__" takeoverip called for an ip '%s' "
579 "with pnn %d, but we're node %d\n",
580 ctdb_addr_to_str(&pip->addr),
581 pip->pnn, ctdb->pnn));
585 /* update out vnn list */
586 vnn = find_public_ip_vnn(ctdb, &pip->addr);
588 DEBUG(DEBUG_INFO,("takeoverip called for an ip '%s' that is not a public address\n",
589 ctdb_addr_to_str(&pip->addr)));
593 have_ip = ctdb_sys_have_ip(&pip->addr);
594 best_iface = ctdb_vnn_best_iface(ctdb, vnn);
595 if (best_iface == NULL) {
596 DEBUG(DEBUG_ERR,("takeoverip of IP %s/%u failed to find"
597 "a usable interface (old %s, have_ip %d)\n",
598 ctdb_addr_to_str(&vnn->public_address),
599 vnn->public_netmask_bits,
600 ctdb_vnn_iface_string(vnn),
605 if (vnn->iface == NULL && vnn->pnn == -1 && have_ip && best_iface != NULL) {
606 DEBUG(DEBUG_ERR,("Taking over newly created ip\n"));
610 if (vnn->iface == NULL && have_ip) {
611 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
612 "but we have no interface assigned, has someone manually configured it? Ignore for now.\n",
613 ctdb_addr_to_str(&vnn->public_address)));
617 if (vnn->pnn != ctdb->pnn && have_ip && vnn->pnn != -1) {
618 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
619 "and we have it on iface[%s], but it was assigned to node %d"
620 "and we are node %d, banning ourself\n",
621 ctdb_addr_to_str(&vnn->public_address),
622 ctdb_vnn_iface_string(vnn), vnn->pnn, ctdb->pnn));
627 if (vnn->pnn == -1 && have_ip) {
628 vnn->pnn = ctdb->pnn;
629 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
630 "and we already have it on iface[%s], update local daemon\n",
631 ctdb_addr_to_str(&vnn->public_address),
632 ctdb_vnn_iface_string(vnn)));
637 if (vnn->iface->link_up) {
638 /* only move when the rebalance gains something */
639 if (vnn->iface->references > (best_iface->references + 1)) {
642 } else if (vnn->iface != best_iface) {
649 ctdb_vnn_unassign_iface(ctdb, vnn);
656 ret = ctdb_do_takeip(ctdb, c, vnn);
660 } else if (do_updateip) {
661 ret = ctdb_do_updateip(ctdb, c, vnn);
667 * The interface is up and the kernel known the ip
670 DEBUG(DEBUG_INFO,("Redundant takeover of IP %s/%u on interface %s (ip already held)\n",
671 ctdb_addr_to_str(&pip->addr),
672 vnn->public_netmask_bits,
673 ctdb_vnn_iface_string(vnn)));
677 /* tell ctdb_control.c that we will be replying asynchronously */
684 takeover an ip address old v4 style
686 int32_t ctdb_control_takeover_ipv4(struct ctdb_context *ctdb,
687 struct ctdb_req_control *c,
693 data.dsize = sizeof(struct ctdb_public_ip);
694 data.dptr = (uint8_t *)talloc_zero(c, struct ctdb_public_ip);
695 CTDB_NO_MEMORY(ctdb, data.dptr);
697 memcpy(data.dptr, indata.dptr, indata.dsize);
698 return ctdb_control_takeover_ip(ctdb, c, data, async_reply);
702 kill any clients that are registered with a IP that is being released
704 static void release_kill_clients(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
706 struct ctdb_client_ip *ip;
708 DEBUG(DEBUG_INFO,("release_kill_clients for ip %s\n",
709 ctdb_addr_to_str(addr)));
711 for (ip=ctdb->client_ip_list; ip; ip=ip->next) {
712 ctdb_sock_addr tmp_addr;
715 DEBUG(DEBUG_INFO,("checking for client %u with IP %s\n",
717 ctdb_addr_to_str(&ip->addr)));
719 if (ctdb_same_ip(&tmp_addr, addr)) {
720 struct ctdb_client *client = ctdb_reqid_find(ctdb,
723 DEBUG(DEBUG_INFO,("matched client %u with IP %s and pid %u\n",
725 ctdb_addr_to_str(&ip->addr),
728 if (client->pid != 0) {
729 DEBUG(DEBUG_INFO,(__location__ " Killing client pid %u for IP %s on client_id %u\n",
730 (unsigned)client->pid,
731 ctdb_addr_to_str(addr),
733 kill(client->pid, SIGKILL);
740 called when releaseip event finishes
742 static void release_ip_callback(struct ctdb_context *ctdb, int status,
745 struct takeover_callback_state *state =
746 talloc_get_type(private_data, struct takeover_callback_state);
749 if (status == -ETIME) {
753 /* send a message to all clients of this node telling them
754 that the cluster has been reconfigured and they should
755 release any sockets on this IP */
756 data.dptr = (uint8_t *)talloc_strdup(state, ctdb_addr_to_str(state->addr));
757 CTDB_NO_MEMORY_VOID(ctdb, data.dptr);
758 data.dsize = strlen((char *)data.dptr)+1;
760 DEBUG(DEBUG_INFO,(__location__ " sending RELEASE_IP for '%s'\n", data.dptr));
762 ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_RELEASE_IP, data);
764 /* kill clients that have registered with this IP */
765 release_kill_clients(ctdb, state->addr);
767 ctdb_vnn_unassign_iface(ctdb, state->vnn);
769 /* the control succeeded */
770 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
775 release an ip address
777 int32_t ctdb_control_release_ip(struct ctdb_context *ctdb,
778 struct ctdb_req_control *c,
783 struct takeover_callback_state *state;
784 struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
785 struct ctdb_vnn *vnn;
787 /* update our vnn list */
788 vnn = find_public_ip_vnn(ctdb, &pip->addr);
790 DEBUG(DEBUG_INFO,("releaseip called for an ip '%s' that is not a public address\n",
791 ctdb_addr_to_str(&pip->addr)));
796 /* stop any previous arps */
797 talloc_free(vnn->takeover_ctx);
798 vnn->takeover_ctx = NULL;
800 if (!ctdb_sys_have_ip(&pip->addr)) {
801 DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u on interface %s (ip not held)\n",
802 ctdb_addr_to_str(&pip->addr),
803 vnn->public_netmask_bits,
804 ctdb_vnn_iface_string(vnn)));
805 ctdb_vnn_unassign_iface(ctdb, vnn);
809 if (vnn->iface == NULL) {
810 DEBUG(DEBUG_ERR,(__location__ " release_ip of IP %s is known to the kernel, "
811 "but we have no interface assigned, has someone manually configured it? Ignore for now.\n",
812 ctdb_addr_to_str(&vnn->public_address)));
816 DEBUG(DEBUG_NOTICE,("Release of IP %s/%u on interface %s node:%d\n",
817 ctdb_addr_to_str(&pip->addr),
818 vnn->public_netmask_bits,
819 ctdb_vnn_iface_string(vnn),
822 state = talloc(ctdb, struct takeover_callback_state);
823 CTDB_NO_MEMORY(ctdb, state);
825 state->c = talloc_steal(state, c);
826 state->addr = talloc(state, ctdb_sock_addr);
827 CTDB_NO_MEMORY(ctdb, state->addr);
828 *state->addr = pip->addr;
831 ret = ctdb_event_script_callback(ctdb,
832 state, release_ip_callback, state,
834 CTDB_EVENT_RELEASE_IP,
836 ctdb_vnn_iface_string(vnn),
837 ctdb_addr_to_str(&pip->addr),
838 vnn->public_netmask_bits);
840 DEBUG(DEBUG_ERR,(__location__ " Failed to release IP %s on interface %s\n",
841 ctdb_addr_to_str(&pip->addr),
842 ctdb_vnn_iface_string(vnn)));
847 /* tell the control that we will be reply asynchronously */
853 release an ip address old v4 style
855 int32_t ctdb_control_release_ipv4(struct ctdb_context *ctdb,
856 struct ctdb_req_control *c,
862 data.dsize = sizeof(struct ctdb_public_ip);
863 data.dptr = (uint8_t *)talloc_zero(c, struct ctdb_public_ip);
864 CTDB_NO_MEMORY(ctdb, data.dptr);
866 memcpy(data.dptr, indata.dptr, indata.dsize);
867 return ctdb_control_release_ip(ctdb, c, data, async_reply);
871 static int ctdb_add_public_address(struct ctdb_context *ctdb,
872 ctdb_sock_addr *addr,
873 unsigned mask, const char *ifaces)
875 struct ctdb_vnn *vnn;
882 /* Verify that we dont have an entry for this ip yet */
883 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
884 if (ctdb_same_sockaddr(addr, &vnn->public_address)) {
885 DEBUG(DEBUG_CRIT,("Same ip '%s' specified multiple times in the public address list \n",
886 ctdb_addr_to_str(addr)));
891 /* create a new vnn structure for this ip address */
892 vnn = talloc_zero(ctdb, struct ctdb_vnn);
893 CTDB_NO_MEMORY_FATAL(ctdb, vnn);
894 vnn->ifaces = talloc_array(vnn, const char *, num + 2);
895 tmp = talloc_strdup(vnn, ifaces);
896 CTDB_NO_MEMORY_FATAL(ctdb, tmp);
897 for (iface = strtok(tmp, ","); iface; iface = strtok(NULL, ",")) {
898 vnn->ifaces = talloc_realloc(vnn, vnn->ifaces, const char *, num + 2);
899 CTDB_NO_MEMORY_FATAL(ctdb, vnn->ifaces);
900 vnn->ifaces[num] = talloc_strdup(vnn, iface);
901 CTDB_NO_MEMORY_FATAL(ctdb, vnn->ifaces[num]);
905 vnn->ifaces[num] = NULL;
906 vnn->public_address = *addr;
907 vnn->public_netmask_bits = mask;
909 if (ctdb_sys_have_ip(addr)) {
910 DEBUG(DEBUG_ERR,("We are already hosting public address '%s'. setting PNN to ourself:%d\n", ctdb_addr_to_str(addr), ctdb->pnn));
911 vnn->pnn = ctdb->pnn;
914 for (i=0; vnn->ifaces[i]; i++) {
915 ret = ctdb_add_local_iface(ctdb, vnn->ifaces[i]);
917 DEBUG(DEBUG_CRIT, (__location__ " failed to add iface[%s] "
918 "for public_address[%s]\n",
919 vnn->ifaces[i], ctdb_addr_to_str(addr)));
924 vnn->iface = ctdb_find_iface(ctdb, vnn->ifaces[i]);
928 DLIST_ADD(ctdb->vnn, vnn);
934 setup the event script directory
936 int ctdb_set_event_script_dir(struct ctdb_context *ctdb, const char *script_dir)
938 ctdb->event_script_dir = talloc_strdup(ctdb, script_dir);
939 CTDB_NO_MEMORY(ctdb, ctdb->event_script_dir);
944 setup the public address lists from a file
946 int ctdb_set_public_addresses(struct ctdb_context *ctdb, const char *alist)
952 lines = file_lines_load(alist, &nlines, ctdb);
954 ctdb_set_error(ctdb, "Failed to load public address list '%s'\n", alist);
957 while (nlines > 0 && strcmp(lines[nlines-1], "") == 0) {
961 for (i=0;i<nlines;i++) {
969 while ((*line == ' ') || (*line == '\t')) {
975 if (strcmp(line, "") == 0) {
978 tok = strtok(line, " \t");
980 tok = strtok(NULL, " \t");
982 if (NULL == ctdb->default_public_interface) {
983 DEBUG(DEBUG_CRIT,("No default public interface and no interface specified at line %u of public address list\n",
988 ifaces = ctdb->default_public_interface;
993 if (!addrstr || !parse_ip_mask(addrstr, ifaces, &addr, &mask)) {
994 DEBUG(DEBUG_CRIT,("Badly formed line %u in public address list\n", i+1));
998 if (ctdb_add_public_address(ctdb, &addr, mask, ifaces)) {
999 DEBUG(DEBUG_CRIT,("Failed to add line %u to the public address list\n", i+1));
1009 int ctdb_set_single_public_ip(struct ctdb_context *ctdb,
1013 struct ctdb_vnn *svnn;
1014 struct ctdb_iface *cur = NULL;
1018 svnn = talloc_zero(ctdb, struct ctdb_vnn);
1019 CTDB_NO_MEMORY(ctdb, svnn);
1021 svnn->ifaces = talloc_array(svnn, const char *, 2);
1022 CTDB_NO_MEMORY(ctdb, svnn->ifaces);
1023 svnn->ifaces[0] = talloc_strdup(svnn->ifaces, iface);
1024 CTDB_NO_MEMORY(ctdb, svnn->ifaces[0]);
1025 svnn->ifaces[1] = NULL;
1027 ok = parse_ip(ip, iface, 0, &svnn->public_address);
1033 ret = ctdb_add_local_iface(ctdb, svnn->ifaces[0]);
1035 DEBUG(DEBUG_CRIT, (__location__ " failed to add iface[%s] "
1036 "for single_ip[%s]\n",
1038 ctdb_addr_to_str(&svnn->public_address)));
1043 /* assume the single public ip interface is initially "good" */
1044 cur = ctdb_find_iface(ctdb, iface);
1046 DEBUG(DEBUG_CRIT,("Can not find public interface %s used by --single-public-ip", iface));
1049 cur->link_up = true;
1051 ret = ctdb_vnn_assign_iface(ctdb, svnn);
1057 ctdb->single_ip_vnn = svnn;
1061 struct ctdb_public_ip_list {
1062 struct ctdb_public_ip_list *next;
1064 ctdb_sock_addr addr;
1068 /* Given a physical node, return the number of
1069 public addresses that is currently assigned to this node.
1071 static int node_ip_coverage(struct ctdb_context *ctdb,
1073 struct ctdb_public_ip_list *ips)
1077 for (;ips;ips=ips->next) {
1078 if (ips->pnn == pnn) {
1086 /* Check if this is a public ip known to the node, i.e. can that
1087 node takeover this ip ?
1089 static int can_node_serve_ip(struct ctdb_context *ctdb, int32_t pnn,
1090 struct ctdb_public_ip_list *ip)
1092 struct ctdb_all_public_ips *public_ips;
1095 public_ips = ctdb->nodes[pnn]->available_public_ips;
1097 if (public_ips == NULL) {
1101 for (i=0;i<public_ips->num;i++) {
1102 if (ctdb_same_ip(&ip->addr, &public_ips->ips[i].addr)) {
1103 /* yes, this node can serve this public ip */
1112 /* search the node lists list for a node to takeover this ip.
1113 pick the node that currently are serving the least number of ips
1114 so that the ips get spread out evenly.
1116 static int find_takeover_node(struct ctdb_context *ctdb,
1117 struct ctdb_node_map *nodemap, uint32_t mask,
1118 struct ctdb_public_ip_list *ip,
1119 struct ctdb_public_ip_list *all_ips)
1121 int pnn, min=0, num;
1125 for (i=0;i<nodemap->num;i++) {
1126 if (nodemap->nodes[i].flags & mask) {
1127 /* This node is not healty and can not be used to serve
1133 /* verify that this node can serve this ip */
1134 if (can_node_serve_ip(ctdb, i, ip)) {
1135 /* no it couldnt so skip to the next node */
1139 num = node_ip_coverage(ctdb, i, all_ips);
1140 /* was this the first node we checked ? */
1152 DEBUG(DEBUG_WARNING,(__location__ " Could not find node to take over public address '%s'\n",
1153 ctdb_addr_to_str(&ip->addr)));
1163 static uint32_t *ip_key(ctdb_sock_addr *ip)
1165 static uint32_t key[IP_KEYLEN];
1167 bzero(key, sizeof(key));
1169 switch (ip->sa.sa_family) {
1171 key[3] = htonl(ip->ip.sin_addr.s_addr);
1174 key[0] = htonl(ip->ip6.sin6_addr.s6_addr32[0]);
1175 key[1] = htonl(ip->ip6.sin6_addr.s6_addr32[1]);
1176 key[2] = htonl(ip->ip6.sin6_addr.s6_addr32[2]);
1177 key[3] = htonl(ip->ip6.sin6_addr.s6_addr32[3]);
1180 DEBUG(DEBUG_ERR, (__location__ " ERROR, unknown family passed :%u\n", ip->sa.sa_family));
1187 static void *add_ip_callback(void *parm, void *data)
1189 struct ctdb_public_ip_list *this_ip = parm;
1190 struct ctdb_public_ip_list *prev_ip = data;
1192 if (prev_ip == NULL) {
1195 if (this_ip->pnn == -1) {
1196 this_ip->pnn = prev_ip->pnn;
1202 void getips_count_callback(void *param, void *data)
1204 struct ctdb_public_ip_list **ip_list = (struct ctdb_public_ip_list **)param;
1205 struct ctdb_public_ip_list *new_ip = (struct ctdb_public_ip_list *)data;
1207 new_ip->next = *ip_list;
1211 static struct ctdb_public_ip_list *
1212 create_merged_ip_list(struct ctdb_context *ctdb)
1215 struct ctdb_public_ip_list *ip_list;
1216 struct ctdb_all_public_ips *public_ips;
1218 if (ctdb->ip_tree != NULL) {
1219 talloc_free(ctdb->ip_tree);
1220 ctdb->ip_tree = NULL;
1222 ctdb->ip_tree = trbt_create(ctdb, 0);
1224 for (i=0;i<ctdb->num_nodes;i++) {
1225 public_ips = ctdb->nodes[i]->known_public_ips;
1227 if (ctdb->nodes[i]->flags & NODE_FLAGS_DELETED) {
1231 /* there were no public ips for this node */
1232 if (public_ips == NULL) {
1236 for (j=0;j<public_ips->num;j++) {
1237 struct ctdb_public_ip_list *tmp_ip;
1239 tmp_ip = talloc_zero(ctdb->ip_tree, struct ctdb_public_ip_list);
1240 CTDB_NO_MEMORY_NULL(ctdb, tmp_ip);
1241 tmp_ip->pnn = public_ips->ips[j].pnn;
1242 tmp_ip->addr = public_ips->ips[j].addr;
1243 tmp_ip->next = NULL;
1245 trbt_insertarray32_callback(ctdb->ip_tree,
1246 IP_KEYLEN, ip_key(&public_ips->ips[j].addr),
1253 trbt_traversearray32(ctdb->ip_tree, IP_KEYLEN, getips_count_callback, &ip_list);
1259 make any IP alias changes for public addresses that are necessary
1261 int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap)
1263 int i, num_healthy, retries, num_ips;
1264 struct ctdb_public_ip ip;
1265 struct ctdb_public_ipv4 ipv4;
1266 uint32_t mask, *nodes;
1267 struct ctdb_public_ip_list *all_ips, *tmp_ip;
1268 int maxnode, maxnum=0, minnode, minnum=0, num;
1270 struct timeval timeout;
1271 struct client_async_data *async_data;
1272 struct ctdb_client_control_state *state;
1273 TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
1276 * ip failover is completely disabled, just send out the
1277 * ipreallocated event.
1279 if (ctdb->tunable.disable_ip_failover != 0) {
1285 /* Count how many completely healthy nodes we have */
1287 for (i=0;i<nodemap->num;i++) {
1288 if (!(nodemap->nodes[i].flags & (NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED))) {
1293 if (num_healthy > 0) {
1294 /* We have healthy nodes, so only consider them for
1295 serving public addresses
1297 mask = NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED;
1299 /* We didnt have any completely healthy nodes so
1300 use "disabled" nodes as a fallback
1302 mask = NODE_FLAGS_INACTIVE;
1305 /* since nodes only know about those public addresses that
1306 can be served by that particular node, no single node has
1307 a full list of all public addresses that exist in the cluster.
1308 Walk over all node structures and create a merged list of
1309 all public addresses that exist in the cluster.
1311 keep the tree of ips around as ctdb->ip_tree
1313 all_ips = create_merged_ip_list(ctdb);
1315 /* Count how many ips we have */
1317 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1321 /* If we want deterministic ip allocations, i.e. that the ip addresses
1322 will always be allocated the same way for a specific set of
1323 available/unavailable nodes.
1325 if (1 == ctdb->tunable.deterministic_public_ips) {
1326 DEBUG(DEBUG_NOTICE,("Deterministic IPs enabled. Resetting all ip allocations\n"));
1327 for (i=0,tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next,i++) {
1328 tmp_ip->pnn = i%nodemap->num;
1333 /* mark all public addresses with a masked node as being served by
1336 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1337 if (tmp_ip->pnn == -1) {
1340 if (nodemap->nodes[tmp_ip->pnn].flags & mask) {
1345 /* verify that the assigned nodes can serve that public ip
1346 and set it to -1 if not
1348 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1349 if (tmp_ip->pnn == -1) {
1352 if (can_node_serve_ip(ctdb, tmp_ip->pnn, tmp_ip) != 0) {
1353 /* this node can not serve this ip. */
1359 /* now we must redistribute all public addresses with takeover node
1360 -1 among the nodes available
1364 /* loop over all ip's and find a physical node to cover for
1367 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1368 if (tmp_ip->pnn == -1) {
1369 if (find_takeover_node(ctdb, nodemap, mask, tmp_ip, all_ips)) {
1370 DEBUG(DEBUG_WARNING,("Failed to find node to cover ip %s\n",
1371 ctdb_addr_to_str(&tmp_ip->addr)));
1376 /* If we dont want ips to fail back after a node becomes healthy
1377 again, we wont even try to reallocat the ip addresses so that
1378 they are evenly spread out.
1379 This can NOT be used at the same time as DeterministicIPs !
1381 if (1 == ctdb->tunable.no_ip_failback) {
1382 if (1 == ctdb->tunable.deterministic_public_ips) {
1383 DEBUG(DEBUG_ERR, ("ERROR: You can not use 'DeterministicIPs' and 'NoIPFailback' at the same time\n"));
1389 /* now, try to make sure the ip adresses are evenly distributed
1391 for each ip address, loop over all nodes that can serve this
1392 ip and make sure that the difference between the node
1393 serving the most and the node serving the least ip's are not greater
1396 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1397 if (tmp_ip->pnn == -1) {
1401 /* Get the highest and lowest number of ips's served by any
1402 valid node which can serve this ip.
1406 for (i=0;i<nodemap->num;i++) {
1407 if (nodemap->nodes[i].flags & mask) {
1411 /* only check nodes that can actually serve this ip */
1412 if (can_node_serve_ip(ctdb, i, tmp_ip)) {
1413 /* no it couldnt so skip to the next node */
1417 num = node_ip_coverage(ctdb, i, all_ips);
1418 if (maxnode == -1) {
1427 if (minnode == -1) {
1437 if (maxnode == -1) {
1438 DEBUG(DEBUG_WARNING,(__location__ " Could not find maxnode. May not be able to serve ip '%s'\n",
1439 ctdb_addr_to_str(&tmp_ip->addr)));
1444 /* If we want deterministic IPs then dont try to reallocate
1445 them to spread out the load.
1447 if (1 == ctdb->tunable.deterministic_public_ips) {
1451 /* if the spread between the smallest and largest coverage by
1452 a node is >=2 we steal one of the ips from the node with
1453 most coverage to even things out a bit.
1454 try to do this a limited number of times since we dont
1455 want to spend too much time balancing the ip coverage.
1457 if ( (maxnum > minnum+1)
1458 && (retries < (num_ips + 5)) ){
1459 struct ctdb_public_ip_list *tmp;
1461 /* mark one of maxnode's vnn's as unassigned and try
1464 for (tmp=all_ips;tmp;tmp=tmp->next) {
1465 if (tmp->pnn == maxnode) {
1475 /* finished distributing the public addresses, now just send the
1476 info out to the nodes
1480 /* at this point ->pnn is the node which will own each IP
1481 or -1 if there is no node that can cover this ip
1484 /* now tell all nodes to delete any alias that they should not
1485 have. This will be a NOOP on nodes that don't currently
1486 hold the given alias */
1487 async_data = talloc_zero(tmp_ctx, struct client_async_data);
1488 CTDB_NO_MEMORY_FATAL(ctdb, async_data);
1490 for (i=0;i<nodemap->num;i++) {
1491 /* don't talk to unconnected nodes, but do talk to banned nodes */
1492 if (nodemap->nodes[i].flags & NODE_FLAGS_DISCONNECTED) {
1496 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1497 if (tmp_ip->pnn == nodemap->nodes[i].pnn) {
1498 /* This node should be serving this
1499 vnn so dont tell it to release the ip
1503 if (tmp_ip->addr.sa.sa_family == AF_INET) {
1504 ipv4.pnn = tmp_ip->pnn;
1505 ipv4.sin = tmp_ip->addr.ip;
1507 timeout = TAKEOVER_TIMEOUT();
1508 data.dsize = sizeof(ipv4);
1509 data.dptr = (uint8_t *)&ipv4;
1510 state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
1511 0, CTDB_CONTROL_RELEASE_IPv4, 0,
1515 ip.pnn = tmp_ip->pnn;
1516 ip.addr = tmp_ip->addr;
1518 timeout = TAKEOVER_TIMEOUT();
1519 data.dsize = sizeof(ip);
1520 data.dptr = (uint8_t *)&ip;
1521 state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
1522 0, CTDB_CONTROL_RELEASE_IP, 0,
1527 if (state == NULL) {
1528 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_RELEASE_IP to node %u\n", nodemap->nodes[i].pnn));
1529 talloc_free(tmp_ctx);
1533 ctdb_client_async_add(async_data, state);
1536 if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1537 DEBUG(DEBUG_ERR,(__location__ " Async control CTDB_CONTROL_RELEASE_IP failed\n"));
1538 talloc_free(tmp_ctx);
1541 talloc_free(async_data);
1544 /* tell all nodes to get their own IPs */
1545 async_data = talloc_zero(tmp_ctx, struct client_async_data);
1546 CTDB_NO_MEMORY_FATAL(ctdb, async_data);
1547 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1548 if (tmp_ip->pnn == -1) {
1549 /* this IP won't be taken over */
1553 if (tmp_ip->addr.sa.sa_family == AF_INET) {
1554 ipv4.pnn = tmp_ip->pnn;
1555 ipv4.sin = tmp_ip->addr.ip;
1557 timeout = TAKEOVER_TIMEOUT();
1558 data.dsize = sizeof(ipv4);
1559 data.dptr = (uint8_t *)&ipv4;
1560 state = ctdb_control_send(ctdb, tmp_ip->pnn,
1561 0, CTDB_CONTROL_TAKEOVER_IPv4, 0,
1565 ip.pnn = tmp_ip->pnn;
1566 ip.addr = tmp_ip->addr;
1568 timeout = TAKEOVER_TIMEOUT();
1569 data.dsize = sizeof(ip);
1570 data.dptr = (uint8_t *)&ip;
1571 state = ctdb_control_send(ctdb, tmp_ip->pnn,
1572 0, CTDB_CONTROL_TAKEOVER_IP, 0,
1576 if (state == NULL) {
1577 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_TAKEOVER_IP to node %u\n", tmp_ip->pnn));
1578 talloc_free(tmp_ctx);
1582 ctdb_client_async_add(async_data, state);
1584 if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1585 DEBUG(DEBUG_ERR,(__location__ " Async control CTDB_CONTROL_TAKEOVER_IP failed\n"));
1586 talloc_free(tmp_ctx);
1591 /* tell all nodes to update natwg */
1592 /* send the flags update natgw on all connected nodes */
1593 data.dptr = discard_const("ipreallocated");
1594 data.dsize = strlen((char *)data.dptr) + 1;
1595 nodes = list_of_connected_nodes(ctdb, nodemap, tmp_ctx, true);
1596 if (ctdb_client_async_control(ctdb, CTDB_CONTROL_RUN_EVENTSCRIPTS,
1597 nodes, 0, TAKEOVER_TIMEOUT(),
1601 DEBUG(DEBUG_ERR, (__location__ " ctdb_control to updatenatgw failed\n"));
1604 talloc_free(tmp_ctx);
1610 destroy a ctdb_client_ip structure
1612 static int ctdb_client_ip_destructor(struct ctdb_client_ip *ip)
1614 DEBUG(DEBUG_DEBUG,("destroying client tcp for %s:%u (client_id %u)\n",
1615 ctdb_addr_to_str(&ip->addr),
1616 ntohs(ip->addr.ip.sin_port),
1619 DLIST_REMOVE(ip->ctdb->client_ip_list, ip);
1624 called by a client to inform us of a TCP connection that it is managing
1625 that should tickled with an ACK when IP takeover is done
1626 we handle both the old ipv4 style of packets as well as the new ipv4/6
1629 int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id,
1632 struct ctdb_client *client = ctdb_reqid_find(ctdb, client_id, struct ctdb_client);
1633 struct ctdb_control_tcp *old_addr = NULL;
1634 struct ctdb_control_tcp_addr new_addr;
1635 struct ctdb_control_tcp_addr *tcp_sock = NULL;
1636 struct ctdb_tcp_list *tcp;
1637 struct ctdb_tcp_connection t;
1640 struct ctdb_client_ip *ip;
1641 struct ctdb_vnn *vnn;
1642 ctdb_sock_addr addr;
1644 switch (indata.dsize) {
1645 case sizeof(struct ctdb_control_tcp):
1646 old_addr = (struct ctdb_control_tcp *)indata.dptr;
1647 ZERO_STRUCT(new_addr);
1648 tcp_sock = &new_addr;
1649 tcp_sock->src.ip = old_addr->src;
1650 tcp_sock->dest.ip = old_addr->dest;
1652 case sizeof(struct ctdb_control_tcp_addr):
1653 tcp_sock = (struct ctdb_control_tcp_addr *)indata.dptr;
1656 DEBUG(DEBUG_ERR,(__location__ " Invalid data structure passed "
1657 "to ctdb_control_tcp_client. size was %d but "
1658 "only allowed sizes are %lu and %lu\n",
1660 (long unsigned)sizeof(struct ctdb_control_tcp),
1661 (long unsigned)sizeof(struct ctdb_control_tcp_addr)));
1665 addr = tcp_sock->src;
1666 ctdb_canonicalize_ip(&addr, &tcp_sock->src);
1667 addr = tcp_sock->dest;
1668 ctdb_canonicalize_ip(&addr, &tcp_sock->dest);
1671 memcpy(&addr, &tcp_sock->dest, sizeof(addr));
1672 vnn = find_public_ip_vnn(ctdb, &addr);
1674 switch (addr.sa.sa_family) {
1676 if (ntohl(addr.ip.sin_addr.s_addr) != INADDR_LOOPBACK) {
1677 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public address.\n",
1678 ctdb_addr_to_str(&addr)));
1682 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public ipv6 address.\n",
1683 ctdb_addr_to_str(&addr)));
1686 DEBUG(DEBUG_ERR,(__location__ " Unknown family type %d\n", addr.sa.sa_family));
1692 if (vnn->pnn != ctdb->pnn) {
1693 DEBUG(DEBUG_ERR,("Attempt to register tcp client for IP %s we don't hold - failing (client_id %u pid %u)\n",
1694 ctdb_addr_to_str(&addr),
1695 client_id, client->pid));
1696 /* failing this call will tell smbd to die */
1700 ip = talloc(client, struct ctdb_client_ip);
1701 CTDB_NO_MEMORY(ctdb, ip);
1705 ip->client_id = client_id;
1706 talloc_set_destructor(ip, ctdb_client_ip_destructor);
1707 DLIST_ADD(ctdb->client_ip_list, ip);
1709 tcp = talloc(client, struct ctdb_tcp_list);
1710 CTDB_NO_MEMORY(ctdb, tcp);
1712 tcp->connection.src_addr = tcp_sock->src;
1713 tcp->connection.dst_addr = tcp_sock->dest;
1715 DLIST_ADD(client->tcp_list, tcp);
1717 t.src_addr = tcp_sock->src;
1718 t.dst_addr = tcp_sock->dest;
1720 data.dptr = (uint8_t *)&t;
1721 data.dsize = sizeof(t);
1723 switch (addr.sa.sa_family) {
1725 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1726 (unsigned)ntohs(tcp_sock->dest.ip.sin_port),
1727 ctdb_addr_to_str(&tcp_sock->src),
1728 (unsigned)ntohs(tcp_sock->src.ip.sin_port), client_id, client->pid));
1731 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1732 (unsigned)ntohs(tcp_sock->dest.ip6.sin6_port),
1733 ctdb_addr_to_str(&tcp_sock->src),
1734 (unsigned)ntohs(tcp_sock->src.ip6.sin6_port), client_id, client->pid));
1737 DEBUG(DEBUG_ERR,(__location__ " Unknown family %d\n", addr.sa.sa_family));
1741 /* tell all nodes about this tcp connection */
1742 ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0,
1743 CTDB_CONTROL_TCP_ADD,
1744 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
1746 DEBUG(DEBUG_ERR,(__location__ " Failed to send CTDB_CONTROL_TCP_ADD\n"));
1754 find a tcp address on a list
1756 static struct ctdb_tcp_connection *ctdb_tcp_find(struct ctdb_tcp_array *array,
1757 struct ctdb_tcp_connection *tcp)
1761 if (array == NULL) {
1765 for (i=0;i<array->num;i++) {
1766 if (ctdb_same_sockaddr(&array->connections[i].src_addr, &tcp->src_addr) &&
1767 ctdb_same_sockaddr(&array->connections[i].dst_addr, &tcp->dst_addr)) {
1768 return &array->connections[i];
1777 called by a daemon to inform us of a TCP connection that one of its
1778 clients managing that should tickled with an ACK when IP takeover is
1781 int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata, bool tcp_update_needed)
1783 struct ctdb_tcp_connection *p = (struct ctdb_tcp_connection *)indata.dptr;
1784 struct ctdb_tcp_array *tcparray;
1785 struct ctdb_tcp_connection tcp;
1786 struct ctdb_vnn *vnn;
1788 vnn = find_public_ip_vnn(ctdb, &p->dst_addr);
1790 DEBUG(DEBUG_INFO,(__location__ " got TCP_ADD control for an address which is not a public address '%s'\n",
1791 ctdb_addr_to_str(&p->dst_addr)));
1797 tcparray = vnn->tcp_array;
1799 /* If this is the first tickle */
1800 if (tcparray == NULL) {
1801 tcparray = talloc_size(ctdb->nodes,
1802 offsetof(struct ctdb_tcp_array, connections) +
1803 sizeof(struct ctdb_tcp_connection) * 1);
1804 CTDB_NO_MEMORY(ctdb, tcparray);
1805 vnn->tcp_array = tcparray;
1808 tcparray->connections = talloc_size(tcparray, sizeof(struct ctdb_tcp_connection));
1809 CTDB_NO_MEMORY(ctdb, tcparray->connections);
1811 tcparray->connections[tcparray->num].src_addr = p->src_addr;
1812 tcparray->connections[tcparray->num].dst_addr = p->dst_addr;
1815 if (tcp_update_needed) {
1816 vnn->tcp_update_needed = true;
1822 /* Do we already have this tickle ?*/
1823 tcp.src_addr = p->src_addr;
1824 tcp.dst_addr = p->dst_addr;
1825 if (ctdb_tcp_find(vnn->tcp_array, &tcp) != NULL) {
1826 DEBUG(DEBUG_DEBUG,("Already had tickle info for %s:%u for vnn:%u\n",
1827 ctdb_addr_to_str(&tcp.dst_addr),
1828 ntohs(tcp.dst_addr.ip.sin_port),
1833 /* A new tickle, we must add it to the array */
1834 tcparray->connections = talloc_realloc(tcparray, tcparray->connections,
1835 struct ctdb_tcp_connection,
1837 CTDB_NO_MEMORY(ctdb, tcparray->connections);
1839 vnn->tcp_array = tcparray;
1840 tcparray->connections[tcparray->num].src_addr = p->src_addr;
1841 tcparray->connections[tcparray->num].dst_addr = p->dst_addr;
1844 DEBUG(DEBUG_INFO,("Added tickle info for %s:%u from vnn %u\n",
1845 ctdb_addr_to_str(&tcp.dst_addr),
1846 ntohs(tcp.dst_addr.ip.sin_port),
1849 if (tcp_update_needed) {
1850 vnn->tcp_update_needed = true;
1858 called by a daemon to inform us of a TCP connection that one of its
1859 clients managing that should tickled with an ACK when IP takeover is
1862 static void ctdb_remove_tcp_connection(struct ctdb_context *ctdb, struct ctdb_tcp_connection *conn)
1864 struct ctdb_tcp_connection *tcpp;
1865 struct ctdb_vnn *vnn = find_public_ip_vnn(ctdb, &conn->dst_addr);
1868 DEBUG(DEBUG_ERR,(__location__ " unable to find public address %s\n",
1869 ctdb_addr_to_str(&conn->dst_addr)));
1873 /* if the array is empty we cant remove it
1874 and we dont need to do anything
1876 if (vnn->tcp_array == NULL) {
1877 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist (array is empty) %s:%u\n",
1878 ctdb_addr_to_str(&conn->dst_addr),
1879 ntohs(conn->dst_addr.ip.sin_port)));
1884 /* See if we know this connection
1885 if we dont know this connection then we dont need to do anything
1887 tcpp = ctdb_tcp_find(vnn->tcp_array, conn);
1889 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist %s:%u\n",
1890 ctdb_addr_to_str(&conn->dst_addr),
1891 ntohs(conn->dst_addr.ip.sin_port)));
1896 /* We need to remove this entry from the array.
1897 Instead of allocating a new array and copying data to it
1898 we cheat and just copy the last entry in the existing array
1899 to the entry that is to be removed and just shring the
1902 *tcpp = vnn->tcp_array->connections[vnn->tcp_array->num - 1];
1903 vnn->tcp_array->num--;
1905 /* If we deleted the last entry we also need to remove the entire array
1907 if (vnn->tcp_array->num == 0) {
1908 talloc_free(vnn->tcp_array);
1909 vnn->tcp_array = NULL;
1912 vnn->tcp_update_needed = true;
1914 DEBUG(DEBUG_INFO,("Removed tickle info for %s:%u\n",
1915 ctdb_addr_to_str(&conn->src_addr),
1916 ntohs(conn->src_addr.ip.sin_port)));
1921 called by a daemon to inform us of a TCP connection that one of its
1922 clients used are no longer needed in the tickle database
1924 int32_t ctdb_control_tcp_remove(struct ctdb_context *ctdb, TDB_DATA indata)
1926 struct ctdb_tcp_connection *conn = (struct ctdb_tcp_connection *)indata.dptr;
1928 ctdb_remove_tcp_connection(ctdb, conn);
1935 called when a daemon restarts - send all tickes for all public addresses
1936 we are serving immediately to the new node.
1938 int32_t ctdb_control_startup(struct ctdb_context *ctdb, uint32_t vnn)
1940 /*XXX here we should send all tickes we are serving to the new node */
1946 called when a client structure goes away - hook to remove
1947 elements from the tcp_list in all daemons
1949 void ctdb_takeover_client_destructor_hook(struct ctdb_client *client)
1951 while (client->tcp_list) {
1952 struct ctdb_tcp_list *tcp = client->tcp_list;
1953 DLIST_REMOVE(client->tcp_list, tcp);
1954 ctdb_remove_tcp_connection(client->ctdb, &tcp->connection);
1960 release all IPs on shutdown
1962 void ctdb_release_all_ips(struct ctdb_context *ctdb)
1964 struct ctdb_vnn *vnn;
1966 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1967 if (!ctdb_sys_have_ip(&vnn->public_address)) {
1968 ctdb_vnn_unassign_iface(ctdb, vnn);
1974 ctdb_event_script_args(ctdb, CTDB_EVENT_RELEASE_IP, "%s %s %u",
1975 ctdb_vnn_iface_string(vnn),
1976 ctdb_addr_to_str(&vnn->public_address),
1977 vnn->public_netmask_bits);
1978 release_kill_clients(ctdb, &vnn->public_address);
1979 ctdb_vnn_unassign_iface(ctdb, vnn);
1985 get list of public IPs
1987 int32_t ctdb_control_get_public_ips(struct ctdb_context *ctdb,
1988 struct ctdb_req_control *c, TDB_DATA *outdata)
1991 struct ctdb_all_public_ips *ips;
1992 struct ctdb_vnn *vnn;
1993 bool only_available = false;
1995 if (c->flags & CTDB_PUBLIC_IP_FLAGS_ONLY_AVAILABLE) {
1996 only_available = true;
1999 /* count how many public ip structures we have */
2001 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2005 len = offsetof(struct ctdb_all_public_ips, ips) +
2006 num*sizeof(struct ctdb_public_ip);
2007 ips = talloc_zero_size(outdata, len);
2008 CTDB_NO_MEMORY(ctdb, ips);
2011 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2012 if (only_available && !ctdb_vnn_available(ctdb, vnn)) {
2015 ips->ips[i].pnn = vnn->pnn;
2016 ips->ips[i].addr = vnn->public_address;
2020 len = offsetof(struct ctdb_all_public_ips, ips) +
2021 i*sizeof(struct ctdb_public_ip);
2023 outdata->dsize = len;
2024 outdata->dptr = (uint8_t *)ips;
2031 get list of public IPs, old ipv4 style. only returns ipv4 addresses
2033 int32_t ctdb_control_get_public_ipsv4(struct ctdb_context *ctdb,
2034 struct ctdb_req_control *c, TDB_DATA *outdata)
2037 struct ctdb_all_public_ipsv4 *ips;
2038 struct ctdb_vnn *vnn;
2040 /* count how many public ip structures we have */
2042 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2043 if (vnn->public_address.sa.sa_family != AF_INET) {
2049 len = offsetof(struct ctdb_all_public_ipsv4, ips) +
2050 num*sizeof(struct ctdb_public_ipv4);
2051 ips = talloc_zero_size(outdata, len);
2052 CTDB_NO_MEMORY(ctdb, ips);
2054 outdata->dsize = len;
2055 outdata->dptr = (uint8_t *)ips;
2059 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2060 if (vnn->public_address.sa.sa_family != AF_INET) {
2063 ips->ips[i].pnn = vnn->pnn;
2064 ips->ips[i].sin = vnn->public_address.ip;
2071 int32_t ctdb_control_get_public_ip_info(struct ctdb_context *ctdb,
2072 struct ctdb_req_control *c,
2077 ctdb_sock_addr *addr;
2078 struct ctdb_control_public_ip_info *info;
2079 struct ctdb_vnn *vnn;
2081 addr = (ctdb_sock_addr *)indata.dptr;
2083 vnn = find_public_ip_vnn(ctdb, addr);
2085 /* if it is not a public ip it could be our 'single ip' */
2086 if (ctdb->single_ip_vnn) {
2087 if (ctdb_same_ip(&ctdb->single_ip_vnn->public_address, addr)) {
2088 vnn = ctdb->single_ip_vnn;
2093 DEBUG(DEBUG_ERR,(__location__ " Could not get public ip info, "
2094 "'%s'not a public address\n",
2095 ctdb_addr_to_str(addr)));
2099 /* count how many public ip structures we have */
2101 for (;vnn->ifaces[num];) {
2105 len = offsetof(struct ctdb_control_public_ip_info, ifaces) +
2106 num*sizeof(struct ctdb_control_iface_info);
2107 info = talloc_zero_size(outdata, len);
2108 CTDB_NO_MEMORY(ctdb, info);
2110 info->ip.addr = vnn->public_address;
2111 info->ip.pnn = vnn->pnn;
2112 info->active_idx = 0xFFFFFFFF;
2114 for (i=0; vnn->ifaces[i]; i++) {
2115 struct ctdb_iface *cur;
2117 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
2119 DEBUG(DEBUG_CRIT, (__location__ " internal error iface[%s] unknown\n",
2123 if (vnn->iface == cur) {
2124 info->active_idx = i;
2126 strcpy(info->ifaces[i].name, cur->name);
2127 info->ifaces[i].link_state = cur->link_up;
2128 info->ifaces[i].references = cur->references;
2131 len = offsetof(struct ctdb_control_public_ip_info, ifaces) +
2132 i*sizeof(struct ctdb_control_iface_info);
2134 outdata->dsize = len;
2135 outdata->dptr = (uint8_t *)info;
2140 int32_t ctdb_control_get_ifaces(struct ctdb_context *ctdb,
2141 struct ctdb_req_control *c,
2145 struct ctdb_control_get_ifaces *ifaces;
2146 struct ctdb_iface *cur;
2148 /* count how many public ip structures we have */
2150 for (cur=ctdb->ifaces;cur;cur=cur->next) {
2154 len = offsetof(struct ctdb_control_get_ifaces, ifaces) +
2155 num*sizeof(struct ctdb_control_iface_info);
2156 ifaces = talloc_zero_size(outdata, len);
2157 CTDB_NO_MEMORY(ctdb, ifaces);
2160 for (cur=ctdb->ifaces;cur;cur=cur->next) {
2161 strcpy(ifaces->ifaces[i].name, cur->name);
2162 ifaces->ifaces[i].link_state = cur->link_up;
2163 ifaces->ifaces[i].references = cur->references;
2167 len = offsetof(struct ctdb_control_get_ifaces, ifaces) +
2168 i*sizeof(struct ctdb_control_iface_info);
2170 outdata->dsize = len;
2171 outdata->dptr = (uint8_t *)ifaces;
2176 int32_t ctdb_control_set_iface_link(struct ctdb_context *ctdb,
2177 struct ctdb_req_control *c,
2180 struct ctdb_control_iface_info *info;
2181 struct ctdb_iface *iface;
2182 bool link_up = false;
2184 info = (struct ctdb_control_iface_info *)indata.dptr;
2186 if (info->name[CTDB_IFACE_SIZE] != '\0') {
2187 int len = strnlen(info->name, CTDB_IFACE_SIZE);
2188 DEBUG(DEBUG_ERR, (__location__ " name[%*.*s] not terminated\n",
2189 len, len, info->name));
2193 switch (info->link_state) {
2201 DEBUG(DEBUG_ERR, (__location__ " link_state[%u] invalid\n",
2202 (unsigned int)info->link_state));
2206 if (info->references != 0) {
2207 DEBUG(DEBUG_ERR, (__location__ " references[%u] should be 0\n",
2208 (unsigned int)info->references));
2212 iface = ctdb_find_iface(ctdb, info->name);
2213 if (iface == NULL) {
2214 DEBUG(DEBUG_ERR, (__location__ "iface[%s] is unknown\n",
2219 if (link_up == iface->link_up) {
2223 DEBUG(iface->link_up?DEBUG_ERR:DEBUG_NOTICE,
2224 ("iface[%s] has changed it's link status %s => %s\n",
2226 iface->link_up?"up":"down",
2227 link_up?"up":"down"));
2229 iface->link_up = link_up;
2235 structure containing the listening socket and the list of tcp connections
2236 that the ctdb daemon is to kill
2238 struct ctdb_kill_tcp {
2239 struct ctdb_vnn *vnn;
2240 struct ctdb_context *ctdb;
2242 struct fd_event *fde;
2243 trbt_tree_t *connections;
2248 a tcp connection that is to be killed
2250 struct ctdb_killtcp_con {
2251 ctdb_sock_addr src_addr;
2252 ctdb_sock_addr dst_addr;
2254 struct ctdb_kill_tcp *killtcp;
2257 /* this function is used to create a key to represent this socketpair
2258 in the killtcp tree.
2259 this key is used to insert and lookup matching socketpairs that are
2260 to be tickled and RST
2262 #define KILLTCP_KEYLEN 10
2263 static uint32_t *killtcp_key(ctdb_sock_addr *src, ctdb_sock_addr *dst)
2265 static uint32_t key[KILLTCP_KEYLEN];
2267 bzero(key, sizeof(key));
2269 if (src->sa.sa_family != dst->sa.sa_family) {
2270 DEBUG(DEBUG_ERR, (__location__ " ERROR, different families passed :%u vs %u\n", src->sa.sa_family, dst->sa.sa_family));
2274 switch (src->sa.sa_family) {
2276 key[0] = dst->ip.sin_addr.s_addr;
2277 key[1] = src->ip.sin_addr.s_addr;
2278 key[2] = dst->ip.sin_port;
2279 key[3] = src->ip.sin_port;
2282 key[0] = dst->ip6.sin6_addr.s6_addr32[3];
2283 key[1] = src->ip6.sin6_addr.s6_addr32[3];
2284 key[2] = dst->ip6.sin6_addr.s6_addr32[2];
2285 key[3] = src->ip6.sin6_addr.s6_addr32[2];
2286 key[4] = dst->ip6.sin6_addr.s6_addr32[1];
2287 key[5] = src->ip6.sin6_addr.s6_addr32[1];
2288 key[6] = dst->ip6.sin6_addr.s6_addr32[0];
2289 key[7] = src->ip6.sin6_addr.s6_addr32[0];
2290 key[8] = dst->ip6.sin6_port;
2291 key[9] = src->ip6.sin6_port;
2294 DEBUG(DEBUG_ERR, (__location__ " ERROR, unknown family passed :%u\n", src->sa.sa_family));
2302 called when we get a read event on the raw socket
2304 static void capture_tcp_handler(struct event_context *ev, struct fd_event *fde,
2305 uint16_t flags, void *private_data)
2307 struct ctdb_kill_tcp *killtcp = talloc_get_type(private_data, struct ctdb_kill_tcp);
2308 struct ctdb_killtcp_con *con;
2309 ctdb_sock_addr src, dst;
2310 uint32_t ack_seq, seq;
2312 if (!(flags & EVENT_FD_READ)) {
2316 if (ctdb_sys_read_tcp_packet(killtcp->capture_fd,
2317 killtcp->private_data,
2319 &ack_seq, &seq) != 0) {
2320 /* probably a non-tcp ACK packet */
2324 /* check if we have this guy in our list of connections
2327 con = trbt_lookuparray32(killtcp->connections,
2328 KILLTCP_KEYLEN, killtcp_key(&src, &dst));
2330 /* no this was some other packet we can just ignore */
2334 /* This one has been tickled !
2335 now reset him and remove him from the list.
2337 DEBUG(DEBUG_INFO, ("sending a tcp reset to kill connection :%d -> %s:%d\n",
2338 ntohs(con->dst_addr.ip.sin_port),
2339 ctdb_addr_to_str(&con->src_addr),
2340 ntohs(con->src_addr.ip.sin_port)));
2342 ctdb_sys_send_tcp(&con->dst_addr, &con->src_addr, ack_seq, seq, 1);
2347 /* when traversing the list of all tcp connections to send tickle acks to
2348 (so that we can capture the ack coming back and kill the connection
2350 this callback is called for each connection we are currently trying to kill
2352 static void tickle_connection_traverse(void *param, void *data)
2354 struct ctdb_killtcp_con *con = talloc_get_type(data, struct ctdb_killtcp_con);
2356 /* have tried too many times, just give up */
2357 if (con->count >= 5) {
2358 /* can't delete in traverse: reparent to delete_cons */
2359 talloc_steal(param, con);
2363 /* othervise, try tickling it again */
2366 (ctdb_sock_addr *)&con->dst_addr,
2367 (ctdb_sock_addr *)&con->src_addr,
2373 called every second until all sentenced connections have been reset
2375 static void ctdb_tickle_sentenced_connections(struct event_context *ev, struct timed_event *te,
2376 struct timeval t, void *private_data)
2378 struct ctdb_kill_tcp *killtcp = talloc_get_type(private_data, struct ctdb_kill_tcp);
2379 void *delete_cons = talloc_new(NULL);
2381 /* loop over all connections sending tickle ACKs */
2382 trbt_traversearray32(killtcp->connections, KILLTCP_KEYLEN, tickle_connection_traverse, delete_cons);
2384 /* now we've finished traverse, it's safe to do deletion. */
2385 talloc_free(delete_cons);
2387 /* If there are no more connections to kill we can remove the
2388 entire killtcp structure
2390 if ( (killtcp->connections == NULL) ||
2391 (killtcp->connections->root == NULL) ) {
2392 talloc_free(killtcp);
2396 /* try tickling them again in a seconds time
2398 event_add_timed(killtcp->ctdb->ev, killtcp, timeval_current_ofs(1, 0),
2399 ctdb_tickle_sentenced_connections, killtcp);
2403 destroy the killtcp structure
2405 static int ctdb_killtcp_destructor(struct ctdb_kill_tcp *killtcp)
2408 killtcp->vnn->killtcp = NULL;
2414 /* nothing fancy here, just unconditionally replace any existing
2415 connection structure with the new one.
2417 dont even free the old one if it did exist, that one is talloc_stolen
2418 by the same node in the tree anyway and will be deleted when the new data
2421 static void *add_killtcp_callback(void *parm, void *data)
2427 add a tcp socket to the list of connections we want to RST
2429 static int ctdb_killtcp_add_connection(struct ctdb_context *ctdb,
2433 ctdb_sock_addr src, dst;
2434 struct ctdb_kill_tcp *killtcp;
2435 struct ctdb_killtcp_con *con;
2436 struct ctdb_vnn *vnn;
2438 ctdb_canonicalize_ip(s, &src);
2439 ctdb_canonicalize_ip(d, &dst);
2441 vnn = find_public_ip_vnn(ctdb, &dst);
2443 vnn = find_public_ip_vnn(ctdb, &src);
2446 /* if it is not a public ip it could be our 'single ip' */
2447 if (ctdb->single_ip_vnn) {
2448 if (ctdb_same_ip(&ctdb->single_ip_vnn->public_address, &dst)) {
2449 vnn = ctdb->single_ip_vnn;
2454 DEBUG(DEBUG_ERR,(__location__ " Could not killtcp, not a public address\n"));
2458 killtcp = vnn->killtcp;
2460 /* If this is the first connection to kill we must allocate
2463 if (killtcp == NULL) {
2464 killtcp = talloc_zero(ctdb, struct ctdb_kill_tcp);
2465 CTDB_NO_MEMORY(ctdb, killtcp);
2468 killtcp->ctdb = ctdb;
2469 killtcp->capture_fd = -1;
2470 killtcp->connections = trbt_create(killtcp, 0);
2472 vnn->killtcp = killtcp;
2473 talloc_set_destructor(killtcp, ctdb_killtcp_destructor);
2478 /* create a structure that describes this connection we want to
2479 RST and store it in killtcp->connections
2481 con = talloc(killtcp, struct ctdb_killtcp_con);
2482 CTDB_NO_MEMORY(ctdb, con);
2483 con->src_addr = src;
2484 con->dst_addr = dst;
2486 con->killtcp = killtcp;
2489 trbt_insertarray32_callback(killtcp->connections,
2490 KILLTCP_KEYLEN, killtcp_key(&con->dst_addr, &con->src_addr),
2491 add_killtcp_callback, con);
2494 If we dont have a socket to listen on yet we must create it
2496 if (killtcp->capture_fd == -1) {
2497 const char *iface = ctdb_vnn_iface_string(vnn);
2498 killtcp->capture_fd = ctdb_sys_open_capture_socket(iface, &killtcp->private_data);
2499 if (killtcp->capture_fd == -1) {
2500 DEBUG(DEBUG_CRIT,(__location__ " Failed to open capturing "
2501 "socket on iface '%s' for killtcp (%s)\n",
2502 iface, strerror(errno)));
2508 if (killtcp->fde == NULL) {
2509 killtcp->fde = event_add_fd(ctdb->ev, killtcp, killtcp->capture_fd,
2511 capture_tcp_handler, killtcp);
2512 tevent_fd_set_auto_close(killtcp->fde);
2514 /* We also need to set up some events to tickle all these connections
2515 until they are all reset
2517 event_add_timed(ctdb->ev, killtcp, timeval_current_ofs(1, 0),
2518 ctdb_tickle_sentenced_connections, killtcp);
2521 /* tickle him once now */
2530 talloc_free(vnn->killtcp);
2531 vnn->killtcp = NULL;
2536 kill a TCP connection.
2538 int32_t ctdb_control_kill_tcp(struct ctdb_context *ctdb, TDB_DATA indata)
2540 struct ctdb_control_killtcp *killtcp = (struct ctdb_control_killtcp *)indata.dptr;
2542 return ctdb_killtcp_add_connection(ctdb, &killtcp->src_addr, &killtcp->dst_addr);
2546 called by a daemon to inform us of the entire list of TCP tickles for
2547 a particular public address.
2548 this control should only be sent by the node that is currently serving
2549 that public address.
2551 int32_t ctdb_control_set_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata)
2553 struct ctdb_control_tcp_tickle_list *list = (struct ctdb_control_tcp_tickle_list *)indata.dptr;
2554 struct ctdb_tcp_array *tcparray;
2555 struct ctdb_vnn *vnn;
2557 /* We must at least have tickles.num or else we cant verify the size
2558 of the received data blob
2560 if (indata.dsize < offsetof(struct ctdb_control_tcp_tickle_list,
2561 tickles.connections)) {
2562 DEBUG(DEBUG_ERR,("Bad indata in ctdb_control_set_tcp_tickle_list. Not enough data for the tickle.num field\n"));
2566 /* verify that the size of data matches what we expect */
2567 if (indata.dsize < offsetof(struct ctdb_control_tcp_tickle_list,
2568 tickles.connections)
2569 + sizeof(struct ctdb_tcp_connection)
2570 * list->tickles.num) {
2571 DEBUG(DEBUG_ERR,("Bad indata in ctdb_control_set_tcp_tickle_list\n"));
2575 vnn = find_public_ip_vnn(ctdb, &list->addr);
2577 DEBUG(DEBUG_INFO,(__location__ " Could not set tcp tickle list, '%s' is not a public address\n",
2578 ctdb_addr_to_str(&list->addr)));
2583 /* remove any old ticklelist we might have */
2584 talloc_free(vnn->tcp_array);
2585 vnn->tcp_array = NULL;
2587 tcparray = talloc(ctdb->nodes, struct ctdb_tcp_array);
2588 CTDB_NO_MEMORY(ctdb, tcparray);
2590 tcparray->num = list->tickles.num;
2592 tcparray->connections = talloc_array(tcparray, struct ctdb_tcp_connection, tcparray->num);
2593 CTDB_NO_MEMORY(ctdb, tcparray->connections);
2595 memcpy(tcparray->connections, &list->tickles.connections[0],
2596 sizeof(struct ctdb_tcp_connection)*tcparray->num);
2598 /* We now have a new fresh tickle list array for this vnn */
2599 vnn->tcp_array = talloc_steal(vnn, tcparray);
2605 called to return the full list of tickles for the puclic address associated
2606 with the provided vnn
2608 int32_t ctdb_control_get_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata)
2610 ctdb_sock_addr *addr = (ctdb_sock_addr *)indata.dptr;
2611 struct ctdb_control_tcp_tickle_list *list;
2612 struct ctdb_tcp_array *tcparray;
2614 struct ctdb_vnn *vnn;
2616 vnn = find_public_ip_vnn(ctdb, addr);
2618 DEBUG(DEBUG_ERR,(__location__ " Could not get tcp tickle list, '%s' is not a public address\n",
2619 ctdb_addr_to_str(addr)));
2624 tcparray = vnn->tcp_array;
2626 num = tcparray->num;
2631 outdata->dsize = offsetof(struct ctdb_control_tcp_tickle_list,
2632 tickles.connections)
2633 + sizeof(struct ctdb_tcp_connection) * num;
2635 outdata->dptr = talloc_size(outdata, outdata->dsize);
2636 CTDB_NO_MEMORY(ctdb, outdata->dptr);
2637 list = (struct ctdb_control_tcp_tickle_list *)outdata->dptr;
2640 list->tickles.num = num;
2642 memcpy(&list->tickles.connections[0], tcparray->connections,
2643 sizeof(struct ctdb_tcp_connection) * num);
2651 set the list of all tcp tickles for a public address
2653 static int ctdb_ctrl_set_tcp_tickles(struct ctdb_context *ctdb,
2654 struct timeval timeout, uint32_t destnode,
2655 ctdb_sock_addr *addr,
2656 struct ctdb_tcp_array *tcparray)
2660 struct ctdb_control_tcp_tickle_list *list;
2663 num = tcparray->num;
2668 data.dsize = offsetof(struct ctdb_control_tcp_tickle_list,
2669 tickles.connections) +
2670 sizeof(struct ctdb_tcp_connection) * num;
2671 data.dptr = talloc_size(ctdb, data.dsize);
2672 CTDB_NO_MEMORY(ctdb, data.dptr);
2674 list = (struct ctdb_control_tcp_tickle_list *)data.dptr;
2676 list->tickles.num = num;
2678 memcpy(&list->tickles.connections[0], tcparray->connections, sizeof(struct ctdb_tcp_connection) * num);
2681 ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0,
2682 CTDB_CONTROL_SET_TCP_TICKLE_LIST,
2683 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
2685 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set tcp tickles failed\n"));
2689 talloc_free(data.dptr);
2696 perform tickle updates if required
2698 static void ctdb_update_tcp_tickles(struct event_context *ev,
2699 struct timed_event *te,
2700 struct timeval t, void *private_data)
2702 struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
2704 struct ctdb_vnn *vnn;
2706 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2707 /* we only send out updates for public addresses that
2710 if (ctdb->pnn != vnn->pnn) {
2713 /* We only send out the updates if we need to */
2714 if (!vnn->tcp_update_needed) {
2717 ret = ctdb_ctrl_set_tcp_tickles(ctdb,
2719 CTDB_BROADCAST_CONNECTED,
2720 &vnn->public_address,
2723 DEBUG(DEBUG_ERR,("Failed to send the tickle update for public address %s\n",
2724 ctdb_addr_to_str(&vnn->public_address)));
2728 event_add_timed(ctdb->ev, ctdb->tickle_update_context,
2729 timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
2730 ctdb_update_tcp_tickles, ctdb);
2735 start periodic update of tcp tickles
2737 void ctdb_start_tcp_tickle_update(struct ctdb_context *ctdb)
2739 ctdb->tickle_update_context = talloc_new(ctdb);
2741 event_add_timed(ctdb->ev, ctdb->tickle_update_context,
2742 timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
2743 ctdb_update_tcp_tickles, ctdb);
2749 struct control_gratious_arp {
2750 struct ctdb_context *ctdb;
2751 ctdb_sock_addr addr;
2757 send a control_gratuitous arp
2759 static void send_gratious_arp(struct event_context *ev, struct timed_event *te,
2760 struct timeval t, void *private_data)
2763 struct control_gratious_arp *arp = talloc_get_type(private_data,
2764 struct control_gratious_arp);
2766 ret = ctdb_sys_send_arp(&arp->addr, arp->iface);
2768 DEBUG(DEBUG_ERR,(__location__ " sending of gratious arp on iface '%s' failed (%s)\n",
2769 arp->iface, strerror(errno)));
2774 if (arp->count == CTDB_ARP_REPEAT) {
2779 event_add_timed(arp->ctdb->ev, arp,
2780 timeval_current_ofs(CTDB_ARP_INTERVAL, 0),
2781 send_gratious_arp, arp);
2788 int32_t ctdb_control_send_gratious_arp(struct ctdb_context *ctdb, TDB_DATA indata)
2790 struct ctdb_control_gratious_arp *gratious_arp = (struct ctdb_control_gratious_arp *)indata.dptr;
2791 struct control_gratious_arp *arp;
2793 /* verify the size of indata */
2794 if (indata.dsize < offsetof(struct ctdb_control_gratious_arp, iface)) {
2795 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_gratious_arp structure. Got %u require %u bytes\n",
2796 (unsigned)indata.dsize,
2797 (unsigned)offsetof(struct ctdb_control_gratious_arp, iface)));
2801 ( offsetof(struct ctdb_control_gratious_arp, iface)
2802 + gratious_arp->len ) ){
2804 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2805 "but should be %u bytes\n",
2806 (unsigned)indata.dsize,
2807 (unsigned)(offsetof(struct ctdb_control_gratious_arp, iface)+gratious_arp->len)));
2812 arp = talloc(ctdb, struct control_gratious_arp);
2813 CTDB_NO_MEMORY(ctdb, arp);
2816 arp->addr = gratious_arp->addr;
2817 arp->iface = talloc_strdup(arp, gratious_arp->iface);
2818 CTDB_NO_MEMORY(ctdb, arp->iface);
2821 event_add_timed(arp->ctdb->ev, arp,
2822 timeval_zero(), send_gratious_arp, arp);
2827 int32_t ctdb_control_add_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2829 struct ctdb_control_ip_iface *pub = (struct ctdb_control_ip_iface *)indata.dptr;
2832 /* verify the size of indata */
2833 if (indata.dsize < offsetof(struct ctdb_control_ip_iface, iface)) {
2834 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_ip_iface structure\n"));
2838 ( offsetof(struct ctdb_control_ip_iface, iface)
2841 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2842 "but should be %u bytes\n",
2843 (unsigned)indata.dsize,
2844 (unsigned)(offsetof(struct ctdb_control_ip_iface, iface)+pub->len)));
2848 ret = ctdb_add_public_address(ctdb, &pub->addr, pub->mask, &pub->iface[0]);
2851 DEBUG(DEBUG_ERR,(__location__ " Failed to add public address\n"));
2859 called when releaseip event finishes for del_public_address
2861 static void delete_ip_callback(struct ctdb_context *ctdb, int status,
2864 talloc_free(private_data);
2867 int32_t ctdb_control_del_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2869 struct ctdb_control_ip_iface *pub = (struct ctdb_control_ip_iface *)indata.dptr;
2870 struct ctdb_vnn *vnn;
2873 /* verify the size of indata */
2874 if (indata.dsize < offsetof(struct ctdb_control_ip_iface, iface)) {
2875 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_ip_iface structure\n"));
2879 ( offsetof(struct ctdb_control_ip_iface, iface)
2882 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2883 "but should be %u bytes\n",
2884 (unsigned)indata.dsize,
2885 (unsigned)(offsetof(struct ctdb_control_ip_iface, iface)+pub->len)));
2889 /* walk over all public addresses until we find a match */
2890 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2891 if (ctdb_same_ip(&vnn->public_address, &pub->addr)) {
2892 TALLOC_CTX *mem_ctx;
2894 DLIST_REMOVE(ctdb->vnn, vnn);
2895 if (vnn->iface == NULL) {
2900 mem_ctx = talloc_new(ctdb);
2901 ret = ctdb_event_script_callback(ctdb,
2902 mem_ctx, delete_ip_callback, mem_ctx,
2904 CTDB_EVENT_RELEASE_IP,
2906 ctdb_vnn_iface_string(vnn),
2907 ctdb_addr_to_str(&vnn->public_address),
2908 vnn->public_netmask_bits);
2909 ctdb_vnn_unassign_iface(ctdb, vnn);
2921 /* This function is called from the recovery daemon to verify that a remote
2922 node has the expected ip allocation.
2923 This is verified against ctdb->ip_tree
2925 int verify_remote_ip_allocation(struct ctdb_context *ctdb, struct ctdb_all_public_ips *ips)
2927 struct ctdb_public_ip_list *tmp_ip;
2930 if (ctdb->ip_tree == NULL) {
2931 /* dont know the expected allocation yet, assume remote node
2940 for (i=0; i<ips->num; i++) {
2941 tmp_ip = trbt_lookuparray32(ctdb->ip_tree, IP_KEYLEN, ip_key(&ips->ips[i].addr));
2942 if (tmp_ip == NULL) {
2943 DEBUG(DEBUG_ERR,(__location__ " Could not find host for address %s, reassign ips\n", ctdb_addr_to_str(&ips->ips[i].addr)));
2947 if (tmp_ip->pnn == -1 || ips->ips[i].pnn == -1) {
2951 if (tmp_ip->pnn != ips->ips[i].pnn) {
2952 DEBUG(DEBUG_ERR,("Inconsistent ip allocation. Trigger reallocation. Thinks %s is held by node %u while it is held by node %u\n", ctdb_addr_to_str(&ips->ips[i].addr), ips->ips[i].pnn, tmp_ip->pnn));
2960 int update_ip_assignment_tree(struct ctdb_context *ctdb, struct ctdb_public_ip *ip)
2962 struct ctdb_public_ip_list *tmp_ip;
2964 if (ctdb->ip_tree == NULL) {
2965 DEBUG(DEBUG_ERR,("No ctdb->ip_tree yet. Failed to update ip assignment\n"));
2969 tmp_ip = trbt_lookuparray32(ctdb->ip_tree, IP_KEYLEN, ip_key(&ip->addr));
2970 if (tmp_ip == NULL) {
2971 DEBUG(DEBUG_ERR,(__location__ " Could not find record for address %s, update ip\n", ctdb_addr_to_str(&ip->addr)));
2975 DEBUG(DEBUG_NOTICE,("Updated ip assignment tree for ip : %s from node %u to node %u\n", ctdb_addr_to_str(&ip->addr), tmp_ip->pnn, ip->pnn));
2976 tmp_ip->pnn = ip->pnn;