4 Copyright (C) Ronnie Sahlberg 2007
5 Copyright (C) Andrew Tridgell 2007
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, see <http://www.gnu.org/licenses/>.
21 #include "lib/tevent/tevent.h"
22 #include "lib/tdb/include/tdb.h"
23 #include "lib/util/dlinklist.h"
24 #include "system/network.h"
25 #include "system/filesys.h"
26 #include "system/wait.h"
27 #include "../include/ctdb_private.h"
28 #include "../common/rb_tree.h"
31 #define TAKEOVER_TIMEOUT() timeval_current_ofs(ctdb->tunable.takeover_timeout,0)
33 #define CTDB_ARP_INTERVAL 1
34 #define CTDB_ARP_REPEAT 3
37 struct ctdb_iface *prev, *next;
43 static const char *ctdb_vnn_iface_string(const struct ctdb_vnn *vnn)
46 return vnn->iface->name;
52 static int ctdb_add_local_iface(struct ctdb_context *ctdb, const char *iface)
56 /* Verify that we dont have an entry for this ip yet */
57 for (i=ctdb->ifaces;i;i=i->next) {
58 if (strcmp(i->name, iface) == 0) {
63 /* create a new structure for this interface */
64 i = talloc_zero(ctdb, struct ctdb_iface);
65 CTDB_NO_MEMORY_FATAL(ctdb, i);
66 i->name = talloc_strdup(i, iface);
67 CTDB_NO_MEMORY(ctdb, i->name);
70 DLIST_ADD(ctdb->ifaces, i);
75 static struct ctdb_iface *ctdb_find_iface(struct ctdb_context *ctdb,
80 /* Verify that we dont have an entry for this ip yet */
81 for (i=ctdb->ifaces;i;i=i->next) {
82 if (strcmp(i->name, iface) == 0) {
90 static struct ctdb_iface *ctdb_vnn_best_iface(struct ctdb_context *ctdb,
94 struct ctdb_iface *cur = NULL;
95 struct ctdb_iface *best = NULL;
97 for (i=0; vnn->ifaces[i]; i++) {
99 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
113 if (cur->references < best->references) {
122 static int32_t ctdb_vnn_assign_iface(struct ctdb_context *ctdb,
123 struct ctdb_vnn *vnn)
125 struct ctdb_iface *best = NULL;
128 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
129 "still assigned to iface '%s'\n",
130 ctdb_addr_to_str(&vnn->public_address),
131 ctdb_vnn_iface_string(vnn)));
135 best = ctdb_vnn_best_iface(ctdb, vnn);
137 DEBUG(DEBUG_ERR, (__location__ " public address '%s' "
138 "cannot assign to iface any iface\n",
139 ctdb_addr_to_str(&vnn->public_address)));
145 vnn->pnn = ctdb->pnn;
147 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
148 "now assigned to iface '%s' refs[%d]\n",
149 ctdb_addr_to_str(&vnn->public_address),
150 ctdb_vnn_iface_string(vnn),
155 static void ctdb_vnn_unassign_iface(struct ctdb_context *ctdb,
156 struct ctdb_vnn *vnn)
158 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
159 "now unassigned (old iface '%s' refs[%d])\n",
160 ctdb_addr_to_str(&vnn->public_address),
161 ctdb_vnn_iface_string(vnn),
162 vnn->iface?vnn->iface->references:0));
164 vnn->iface->references--;
167 if (vnn->pnn == ctdb->pnn) {
172 static bool ctdb_vnn_available(struct ctdb_context *ctdb,
173 struct ctdb_vnn *vnn)
177 if (vnn->iface && vnn->iface->link_up) {
181 for (i=0; vnn->ifaces[i]; i++) {
182 struct ctdb_iface *cur;
184 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
197 struct ctdb_takeover_arp {
198 struct ctdb_context *ctdb;
201 struct ctdb_tcp_array *tcparray;
202 struct ctdb_vnn *vnn;
207 lists of tcp endpoints
209 struct ctdb_tcp_list {
210 struct ctdb_tcp_list *prev, *next;
211 struct ctdb_tcp_connection connection;
215 list of clients to kill on IP release
217 struct ctdb_client_ip {
218 struct ctdb_client_ip *prev, *next;
219 struct ctdb_context *ctdb;
226 send a gratuitous arp
228 static void ctdb_control_send_arp(struct event_context *ev, struct timed_event *te,
229 struct timeval t, void *private_data)
231 struct ctdb_takeover_arp *arp = talloc_get_type(private_data,
232 struct ctdb_takeover_arp);
234 struct ctdb_tcp_array *tcparray;
235 const char *iface = ctdb_vnn_iface_string(arp->vnn);
237 ret = ctdb_sys_send_arp(&arp->addr, iface);
239 DEBUG(DEBUG_CRIT,(__location__ " sending of arp failed on iface '%s' (%s)\n",
240 iface, strerror(errno)));
243 tcparray = arp->tcparray;
245 for (i=0;i<tcparray->num;i++) {
246 struct ctdb_tcp_connection *tcon;
248 tcon = &tcparray->connections[i];
249 DEBUG(DEBUG_INFO,("sending tcp tickle ack for %u->%s:%u\n",
250 (unsigned)ntohs(tcon->dst_addr.ip.sin_port),
251 ctdb_addr_to_str(&tcon->src_addr),
252 (unsigned)ntohs(tcon->src_addr.ip.sin_port)));
253 ret = ctdb_sys_send_tcp(
258 DEBUG(DEBUG_CRIT,(__location__ " Failed to send tcp tickle ack for %s\n",
259 ctdb_addr_to_str(&tcon->src_addr)));
266 if (arp->count == CTDB_ARP_REPEAT) {
271 event_add_timed(arp->ctdb->ev, arp->vnn->takeover_ctx,
272 timeval_current_ofs(CTDB_ARP_INTERVAL, 100000),
273 ctdb_control_send_arp, arp);
276 static int32_t ctdb_announce_vnn_iface(struct ctdb_context *ctdb,
277 struct ctdb_vnn *vnn)
279 struct ctdb_takeover_arp *arp;
280 struct ctdb_tcp_array *tcparray;
282 if (!vnn->takeover_ctx) {
283 vnn->takeover_ctx = talloc_new(vnn);
284 if (!vnn->takeover_ctx) {
289 arp = talloc_zero(vnn->takeover_ctx, struct ctdb_takeover_arp);
295 arp->addr = vnn->public_address;
298 tcparray = vnn->tcp_array;
300 /* add all of the known tcp connections for this IP to the
301 list of tcp connections to send tickle acks for */
302 arp->tcparray = talloc_steal(arp, tcparray);
304 vnn->tcp_array = NULL;
305 vnn->tcp_update_needed = true;
308 event_add_timed(arp->ctdb->ev, vnn->takeover_ctx,
309 timeval_zero(), ctdb_control_send_arp, arp);
314 struct takeover_callback_state {
315 struct ctdb_req_control *c;
316 ctdb_sock_addr *addr;
317 struct ctdb_vnn *vnn;
320 struct ctdb_do_takeip_state {
321 struct ctdb_req_control *c;
322 struct ctdb_vnn *vnn;
326 called when takeip event finishes
328 static void ctdb_do_takeip_callback(struct ctdb_context *ctdb, int status,
331 struct ctdb_do_takeip_state *state =
332 talloc_get_type(private_data, struct ctdb_do_takeip_state);
337 if (status == -ETIME) {
340 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
341 ctdb_addr_to_str(&state->vnn->public_address),
342 ctdb_vnn_iface_string(state->vnn)));
343 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
348 ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
350 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
355 data.dptr = (uint8_t *)ctdb_addr_to_str(&state->vnn->public_address);
356 data.dsize = strlen((char *)data.dptr) + 1;
357 DEBUG(DEBUG_INFO,(__location__ " sending TAKE_IP for '%s'\n", data.dptr));
359 ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_TAKE_IP, data);
362 /* the control succeeded */
363 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
369 take over an ip address
371 static int32_t ctdb_do_takeip(struct ctdb_context *ctdb,
372 struct ctdb_req_control *c,
373 struct ctdb_vnn *vnn)
376 struct ctdb_do_takeip_state *state;
378 ret = ctdb_vnn_assign_iface(ctdb, vnn);
380 DEBUG(DEBUG_ERR,("Takeover of IP %s/%u failed to "
381 "assin a usable interface\n",
382 ctdb_addr_to_str(&vnn->public_address),
383 vnn->public_netmask_bits));
387 state = talloc(vnn, struct ctdb_do_takeip_state);
388 CTDB_NO_MEMORY(ctdb, state);
390 state->c = talloc_steal(ctdb, c);
393 DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u on interface %s\n",
394 ctdb_addr_to_str(&vnn->public_address),
395 vnn->public_netmask_bits,
396 ctdb_vnn_iface_string(vnn)));
398 ret = ctdb_event_script_callback(ctdb,
400 ctdb_do_takeip_callback,
405 ctdb_vnn_iface_string(vnn),
406 ctdb_addr_to_str(&vnn->public_address),
407 vnn->public_netmask_bits);
410 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
411 ctdb_addr_to_str(&vnn->public_address),
412 ctdb_vnn_iface_string(vnn)));
420 struct ctdb_do_updateip_state {
421 struct ctdb_req_control *c;
422 struct ctdb_iface *old;
423 struct ctdb_vnn *vnn;
427 called when updateip event finishes
429 static void ctdb_do_updateip_callback(struct ctdb_context *ctdb, int status,
432 struct ctdb_do_updateip_state *state =
433 talloc_get_type(private_data, struct ctdb_do_updateip_state);
437 if (status == -ETIME) {
440 DEBUG(DEBUG_ERR,(__location__ " Failed to move IP %s from interface %s to %s\n",
441 ctdb_addr_to_str(&state->vnn->public_address),
443 ctdb_vnn_iface_string(state->vnn)));
446 * All we can do is reset the old interface
447 * and let the next run fix it
449 ctdb_vnn_unassign_iface(ctdb, state->vnn);
450 state->vnn->iface = state->old;
451 state->vnn->iface->references++;
453 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
458 ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
460 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
465 /* the control succeeded */
466 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
472 update (move) an ip address
474 static int32_t ctdb_do_updateip(struct ctdb_context *ctdb,
475 struct ctdb_req_control *c,
476 struct ctdb_vnn *vnn)
479 struct ctdb_do_updateip_state *state;
480 struct ctdb_iface *old = vnn->iface;
482 ctdb_vnn_unassign_iface(ctdb, vnn);
483 ret = ctdb_vnn_assign_iface(ctdb, vnn);
485 DEBUG(DEBUG_ERR,("update of IP %s/%u failed to "
486 "assin a usable interface (old iface '%s')\n",
487 ctdb_addr_to_str(&vnn->public_address),
488 vnn->public_netmask_bits,
493 state = talloc(vnn, struct ctdb_do_updateip_state);
494 CTDB_NO_MEMORY(ctdb, state);
496 state->c = talloc_steal(ctdb, c);
500 DEBUG(DEBUG_NOTICE,("Update of IP %s/%u from "
501 "interface %s to %s\n",
502 ctdb_addr_to_str(&vnn->public_address),
503 vnn->public_netmask_bits,
505 ctdb_vnn_iface_string(vnn)));
507 ret = ctdb_event_script_callback(ctdb,
509 ctdb_do_updateip_callback,
512 CTDB_EVENT_UPDATE_IP,
515 ctdb_vnn_iface_string(vnn),
516 ctdb_addr_to_str(&vnn->public_address),
517 vnn->public_netmask_bits);
519 DEBUG(DEBUG_ERR,(__location__ " Failed update IP %s from interface %s to %s\n",
520 ctdb_addr_to_str(&vnn->public_address),
521 old->name, ctdb_vnn_iface_string(vnn)));
530 Find the vnn of the node that has a public ip address
531 returns -1 if the address is not known as a public address
533 static struct ctdb_vnn *find_public_ip_vnn(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
535 struct ctdb_vnn *vnn;
537 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
538 if (ctdb_same_ip(&vnn->public_address, addr)) {
547 take over an ip address
549 int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb,
550 struct ctdb_req_control *c,
555 struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
556 struct ctdb_vnn *vnn;
557 bool have_ip = false;
558 bool do_updateip = false;
559 bool do_takeip = false;
560 struct ctdb_iface *best_iface = NULL;
562 if (pip->pnn != ctdb->pnn) {
563 DEBUG(DEBUG_ERR,(__location__" takeoverip called for an ip '%s' "
564 "with pnn %d, but we're node %d\n",
565 ctdb_addr_to_str(&pip->addr),
566 pip->pnn, ctdb->pnn));
570 /* update out vnn list */
571 vnn = find_public_ip_vnn(ctdb, &pip->addr);
573 DEBUG(DEBUG_INFO,("takeoverip called for an ip '%s' that is not a public address\n",
574 ctdb_addr_to_str(&pip->addr)));
578 have_ip = ctdb_sys_have_ip(&pip->addr);
579 best_iface = ctdb_vnn_best_iface(ctdb, vnn);
580 if (best_iface == NULL) {
581 DEBUG(DEBUG_ERR,("takeoverip of IP %s/%u failed to find"
582 "a usable interface (old %s, have_ip %d)\n",
583 ctdb_addr_to_str(&vnn->public_address),
584 vnn->public_netmask_bits,
585 ctdb_vnn_iface_string(vnn),
590 if (vnn->iface == NULL && vnn->pnn == -1 && have_ip && best_iface != NULL) {
591 DEBUG(DEBUG_ERR,("Taking over newly created ip\n"));
595 if (vnn->iface == NULL && have_ip) {
596 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
597 "but we have no interface assigned, has someone manually configured it?"
599 ctdb_addr_to_str(&vnn->public_address)));
604 if (vnn->pnn != ctdb->pnn && have_ip && vnn->pnn != -1) {
605 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
606 "and we have it on iface[%s], but it was assigned to node %d"
607 "and we are node %d, banning ourself\n",
608 ctdb_addr_to_str(&vnn->public_address),
609 ctdb_vnn_iface_string(vnn), vnn->pnn, ctdb->pnn));
615 if (vnn->iface->link_up) {
616 /* only move when the rebalance gains something */
617 if (vnn->iface->references > (best_iface->references + 1)) {
620 } else if (vnn->iface != best_iface) {
627 ctdb_vnn_unassign_iface(ctdb, vnn);
634 ret = ctdb_do_takeip(ctdb, c, vnn);
638 } else if (do_updateip) {
639 ret = ctdb_do_updateip(ctdb, c, vnn);
645 * The interface is up and the kernel known the ip
648 DEBUG(DEBUG_INFO,("Redundant takeover of IP %s/%u on interface %s (ip already held)\n",
649 ctdb_addr_to_str(&pip->addr),
650 vnn->public_netmask_bits,
651 ctdb_vnn_iface_string(vnn)));
655 /* tell ctdb_control.c that we will be replying asynchronously */
662 takeover an ip address old v4 style
664 int32_t ctdb_control_takeover_ipv4(struct ctdb_context *ctdb,
665 struct ctdb_req_control *c,
671 data.dsize = sizeof(struct ctdb_public_ip);
672 data.dptr = (uint8_t *)talloc_zero(c, struct ctdb_public_ip);
673 CTDB_NO_MEMORY(ctdb, data.dptr);
675 memcpy(data.dptr, indata.dptr, indata.dsize);
676 return ctdb_control_takeover_ip(ctdb, c, data, async_reply);
680 kill any clients that are registered with a IP that is being released
682 static void release_kill_clients(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
684 struct ctdb_client_ip *ip;
686 DEBUG(DEBUG_INFO,("release_kill_clients for ip %s\n",
687 ctdb_addr_to_str(addr)));
689 for (ip=ctdb->client_ip_list; ip; ip=ip->next) {
690 ctdb_sock_addr tmp_addr;
693 DEBUG(DEBUG_INFO,("checking for client %u with IP %s\n",
695 ctdb_addr_to_str(&ip->addr)));
697 if (ctdb_same_ip(&tmp_addr, addr)) {
698 struct ctdb_client *client = ctdb_reqid_find(ctdb,
701 DEBUG(DEBUG_INFO,("matched client %u with IP %s and pid %u\n",
703 ctdb_addr_to_str(&ip->addr),
706 if (client->pid != 0) {
707 DEBUG(DEBUG_INFO,(__location__ " Killing client pid %u for IP %s on client_id %u\n",
708 (unsigned)client->pid,
709 ctdb_addr_to_str(addr),
711 kill(client->pid, SIGKILL);
718 called when releaseip event finishes
720 static void release_ip_callback(struct ctdb_context *ctdb, int status,
723 struct takeover_callback_state *state =
724 talloc_get_type(private_data, struct takeover_callback_state);
727 if (status == -ETIME) {
731 /* send a message to all clients of this node telling them
732 that the cluster has been reconfigured and they should
733 release any sockets on this IP */
734 data.dptr = (uint8_t *)talloc_strdup(state, ctdb_addr_to_str(state->addr));
735 CTDB_NO_MEMORY_VOID(ctdb, data.dptr);
736 data.dsize = strlen((char *)data.dptr)+1;
738 DEBUG(DEBUG_INFO,(__location__ " sending RELEASE_IP for '%s'\n", data.dptr));
740 ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_RELEASE_IP, data);
742 /* kill clients that have registered with this IP */
743 release_kill_clients(ctdb, state->addr);
745 ctdb_vnn_unassign_iface(ctdb, state->vnn);
747 /* the control succeeded */
748 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
753 release an ip address
755 int32_t ctdb_control_release_ip(struct ctdb_context *ctdb,
756 struct ctdb_req_control *c,
761 struct takeover_callback_state *state;
762 struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
763 struct ctdb_vnn *vnn;
765 /* update our vnn list */
766 vnn = find_public_ip_vnn(ctdb, &pip->addr);
768 DEBUG(DEBUG_INFO,("releaseip called for an ip '%s' that is not a public address\n",
769 ctdb_addr_to_str(&pip->addr)));
774 /* stop any previous arps */
775 talloc_free(vnn->takeover_ctx);
776 vnn->takeover_ctx = NULL;
778 if (!ctdb_sys_have_ip(&pip->addr)) {
779 DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u on interface %s (ip not held)\n",
780 ctdb_addr_to_str(&pip->addr),
781 vnn->public_netmask_bits,
782 ctdb_vnn_iface_string(vnn)));
783 ctdb_vnn_unassign_iface(ctdb, vnn);
787 if (vnn->iface == NULL) {
788 DEBUG(DEBUG_CRIT,(__location__ " release_ip of IP %s is known to the kernel, "
789 "but we have no interface assigned, has someone manually configured it?"
791 ctdb_addr_to_str(&vnn->public_address)));
796 DEBUG(DEBUG_NOTICE,("Release of IP %s/%u on interface %s node:%d\n",
797 ctdb_addr_to_str(&pip->addr),
798 vnn->public_netmask_bits,
799 ctdb_vnn_iface_string(vnn),
802 state = talloc(ctdb, struct takeover_callback_state);
803 CTDB_NO_MEMORY(ctdb, state);
805 state->c = talloc_steal(state, c);
806 state->addr = talloc(state, ctdb_sock_addr);
807 CTDB_NO_MEMORY(ctdb, state->addr);
808 *state->addr = pip->addr;
811 ret = ctdb_event_script_callback(ctdb,
812 state, release_ip_callback, state,
814 CTDB_EVENT_RELEASE_IP,
816 ctdb_vnn_iface_string(vnn),
817 ctdb_addr_to_str(&pip->addr),
818 vnn->public_netmask_bits);
820 DEBUG(DEBUG_ERR,(__location__ " Failed to release IP %s on interface %s\n",
821 ctdb_addr_to_str(&pip->addr),
822 ctdb_vnn_iface_string(vnn)));
827 /* tell the control that we will be reply asynchronously */
833 release an ip address old v4 style
835 int32_t ctdb_control_release_ipv4(struct ctdb_context *ctdb,
836 struct ctdb_req_control *c,
842 data.dsize = sizeof(struct ctdb_public_ip);
843 data.dptr = (uint8_t *)talloc_zero(c, struct ctdb_public_ip);
844 CTDB_NO_MEMORY(ctdb, data.dptr);
846 memcpy(data.dptr, indata.dptr, indata.dsize);
847 return ctdb_control_release_ip(ctdb, c, data, async_reply);
851 static int ctdb_add_public_address(struct ctdb_context *ctdb,
852 ctdb_sock_addr *addr,
853 unsigned mask, const char *ifaces)
855 struct ctdb_vnn *vnn;
862 /* Verify that we dont have an entry for this ip yet */
863 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
864 if (ctdb_same_sockaddr(addr, &vnn->public_address)) {
865 DEBUG(DEBUG_CRIT,("Same ip '%s' specified multiple times in the public address list \n",
866 ctdb_addr_to_str(addr)));
871 /* create a new vnn structure for this ip address */
872 vnn = talloc_zero(ctdb, struct ctdb_vnn);
873 CTDB_NO_MEMORY_FATAL(ctdb, vnn);
874 vnn->ifaces = talloc_array(vnn, const char *, num + 2);
875 tmp = talloc_strdup(vnn, ifaces);
876 CTDB_NO_MEMORY_FATAL(ctdb, tmp);
877 for (iface = strtok(tmp, ","); iface; iface = strtok(NULL, ",")) {
878 vnn->ifaces = talloc_realloc(vnn, vnn->ifaces, const char *, num + 2);
879 CTDB_NO_MEMORY_FATAL(ctdb, vnn->ifaces);
880 vnn->ifaces[num] = talloc_strdup(vnn, iface);
881 CTDB_NO_MEMORY_FATAL(ctdb, vnn->ifaces[num]);
885 vnn->ifaces[num] = NULL;
886 vnn->public_address = *addr;
887 vnn->public_netmask_bits = mask;
889 if (ctdb_sys_have_ip(addr)) {
890 DEBUG(DEBUG_ERR,("We are already hosting public address '%s'. setting PNN to ourself:%d\n", ctdb_addr_to_str(addr), ctdb->pnn));
891 vnn->pnn = ctdb->pnn;
894 for (i=0; vnn->ifaces[i]; i++) {
895 ret = ctdb_add_local_iface(ctdb, vnn->ifaces[i]);
897 DEBUG(DEBUG_CRIT, (__location__ " failed to add iface[%s] "
898 "for public_address[%s]\n",
899 vnn->ifaces[i], ctdb_addr_to_str(addr)));
904 vnn->iface = ctdb_find_iface(ctdb, vnn->ifaces[i]);
908 DLIST_ADD(ctdb->vnn, vnn);
914 setup the event script directory
916 int ctdb_set_event_script_dir(struct ctdb_context *ctdb, const char *script_dir)
918 ctdb->event_script_dir = talloc_strdup(ctdb, script_dir);
919 CTDB_NO_MEMORY(ctdb, ctdb->event_script_dir);
924 setup the public address lists from a file
926 int ctdb_set_public_addresses(struct ctdb_context *ctdb, const char *alist)
932 lines = file_lines_load(alist, &nlines, ctdb);
934 ctdb_set_error(ctdb, "Failed to load public address list '%s'\n", alist);
937 while (nlines > 0 && strcmp(lines[nlines-1], "") == 0) {
941 for (i=0;i<nlines;i++) {
949 while ((*line == ' ') || (*line == '\t')) {
955 if (strcmp(line, "") == 0) {
958 tok = strtok(line, " \t");
960 tok = strtok(NULL, " \t");
962 if (NULL == ctdb->default_public_interface) {
963 DEBUG(DEBUG_CRIT,("No default public interface and no interface specified at line %u of public address list\n",
968 ifaces = ctdb->default_public_interface;
973 if (!addrstr || !parse_ip_mask(addrstr, ifaces, &addr, &mask)) {
974 DEBUG(DEBUG_CRIT,("Badly formed line %u in public address list\n", i+1));
978 if (ctdb_add_public_address(ctdb, &addr, mask, ifaces)) {
979 DEBUG(DEBUG_CRIT,("Failed to add line %u to the public address list\n", i+1));
989 int ctdb_set_single_public_ip(struct ctdb_context *ctdb,
993 struct ctdb_vnn *svnn;
997 svnn = talloc_zero(ctdb, struct ctdb_vnn);
998 CTDB_NO_MEMORY(ctdb, svnn);
1000 svnn->ifaces = talloc_array(svnn, const char *, 2);
1001 CTDB_NO_MEMORY(ctdb, svnn->ifaces);
1002 svnn->ifaces[0] = talloc_strdup(svnn->ifaces, iface);
1003 CTDB_NO_MEMORY(ctdb, svnn->ifaces[0]);
1004 svnn->ifaces[1] = NULL;
1006 ok = parse_ip(ip, iface, 0, &svnn->public_address);
1012 ret = ctdb_add_local_iface(ctdb, svnn->ifaces[0]);
1014 DEBUG(DEBUG_CRIT, (__location__ " failed to add iface[%s] "
1015 "for single_ip[%s]\n",
1017 ctdb_addr_to_str(&svnn->public_address)));
1022 ret = ctdb_vnn_assign_iface(ctdb, svnn);
1028 ctdb->single_ip_vnn = svnn;
1032 struct ctdb_public_ip_list {
1033 struct ctdb_public_ip_list *next;
1035 ctdb_sock_addr addr;
1039 /* Given a physical node, return the number of
1040 public addresses that is currently assigned to this node.
1042 static int node_ip_coverage(struct ctdb_context *ctdb,
1044 struct ctdb_public_ip_list *ips)
1048 for (;ips;ips=ips->next) {
1049 if (ips->pnn == pnn) {
1057 /* Check if this is a public ip known to the node, i.e. can that
1058 node takeover this ip ?
1060 static int can_node_serve_ip(struct ctdb_context *ctdb, int32_t pnn,
1061 struct ctdb_public_ip_list *ip)
1063 struct ctdb_all_public_ips *public_ips;
1066 public_ips = ctdb->nodes[pnn]->available_public_ips;
1068 if (public_ips == NULL) {
1072 for (i=0;i<public_ips->num;i++) {
1073 if (ctdb_same_ip(&ip->addr, &public_ips->ips[i].addr)) {
1074 /* yes, this node can serve this public ip */
1083 /* search the node lists list for a node to takeover this ip.
1084 pick the node that currently are serving the least number of ips
1085 so that the ips get spread out evenly.
1087 static int find_takeover_node(struct ctdb_context *ctdb,
1088 struct ctdb_node_map *nodemap, uint32_t mask,
1089 struct ctdb_public_ip_list *ip,
1090 struct ctdb_public_ip_list *all_ips)
1092 int pnn, min=0, num;
1096 for (i=0;i<nodemap->num;i++) {
1097 if (nodemap->nodes[i].flags & mask) {
1098 /* This node is not healty and can not be used to serve
1104 /* verify that this node can serve this ip */
1105 if (can_node_serve_ip(ctdb, i, ip)) {
1106 /* no it couldnt so skip to the next node */
1110 num = node_ip_coverage(ctdb, i, all_ips);
1111 /* was this the first node we checked ? */
1123 DEBUG(DEBUG_WARNING,(__location__ " Could not find node to take over public address '%s'\n",
1124 ctdb_addr_to_str(&ip->addr)));
1134 static uint32_t *ip_key(ctdb_sock_addr *ip)
1136 static uint32_t key[IP_KEYLEN];
1138 bzero(key, sizeof(key));
1140 switch (ip->sa.sa_family) {
1142 key[3] = htonl(ip->ip.sin_addr.s_addr);
1145 key[0] = htonl(ip->ip6.sin6_addr.s6_addr32[0]);
1146 key[1] = htonl(ip->ip6.sin6_addr.s6_addr32[1]);
1147 key[2] = htonl(ip->ip6.sin6_addr.s6_addr32[2]);
1148 key[3] = htonl(ip->ip6.sin6_addr.s6_addr32[3]);
1151 DEBUG(DEBUG_ERR, (__location__ " ERROR, unknown family passed :%u\n", ip->sa.sa_family));
1158 static void *add_ip_callback(void *parm, void *data)
1160 struct ctdb_public_ip_list *this_ip = parm;
1161 struct ctdb_public_ip_list *prev_ip = data;
1163 if (prev_ip == NULL) {
1166 if (this_ip->pnn == -1) {
1167 this_ip->pnn = prev_ip->pnn;
1173 void getips_count_callback(void *param, void *data)
1175 struct ctdb_public_ip_list **ip_list = (struct ctdb_public_ip_list **)param;
1176 struct ctdb_public_ip_list *new_ip = (struct ctdb_public_ip_list *)data;
1178 new_ip->next = *ip_list;
1182 static struct ctdb_public_ip_list *
1183 create_merged_ip_list(struct ctdb_context *ctdb)
1186 struct ctdb_public_ip_list *ip_list;
1187 struct ctdb_all_public_ips *public_ips;
1189 if (ctdb->ip_tree != NULL) {
1190 talloc_free(ctdb->ip_tree);
1191 ctdb->ip_tree = NULL;
1193 ctdb->ip_tree = trbt_create(ctdb, 0);
1195 for (i=0;i<ctdb->num_nodes;i++) {
1196 public_ips = ctdb->nodes[i]->known_public_ips;
1198 if (ctdb->nodes[i]->flags & NODE_FLAGS_DELETED) {
1202 /* there were no public ips for this node */
1203 if (public_ips == NULL) {
1207 for (j=0;j<public_ips->num;j++) {
1208 struct ctdb_public_ip_list *tmp_ip;
1210 tmp_ip = talloc_zero(ctdb->ip_tree, struct ctdb_public_ip_list);
1211 CTDB_NO_MEMORY_NULL(ctdb, tmp_ip);
1212 tmp_ip->pnn = public_ips->ips[j].pnn;
1213 tmp_ip->addr = public_ips->ips[j].addr;
1214 tmp_ip->next = NULL;
1216 trbt_insertarray32_callback(ctdb->ip_tree,
1217 IP_KEYLEN, ip_key(&public_ips->ips[j].addr),
1224 trbt_traversearray32(ctdb->ip_tree, IP_KEYLEN, getips_count_callback, &ip_list);
1230 make any IP alias changes for public addresses that are necessary
1232 int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap)
1234 int i, num_healthy, retries;
1235 struct ctdb_public_ip ip;
1236 struct ctdb_public_ipv4 ipv4;
1237 uint32_t mask, *nodes;
1238 struct ctdb_public_ip_list *all_ips, *tmp_ip;
1239 int maxnode, maxnum=0, minnode, minnum=0, num;
1241 struct timeval timeout;
1242 struct client_async_data *async_data;
1243 struct ctdb_client_control_state *state;
1244 TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
1247 * ip failover is completely disabled, just send out the
1248 * ipreallocated event.
1250 if (ctdb->tunable.disable_ip_failover != 0) {
1256 /* Count how many completely healthy nodes we have */
1258 for (i=0;i<nodemap->num;i++) {
1259 if (!(nodemap->nodes[i].flags & (NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED))) {
1264 if (num_healthy > 0) {
1265 /* We have healthy nodes, so only consider them for
1266 serving public addresses
1268 mask = NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED;
1270 /* We didnt have any completely healthy nodes so
1271 use "disabled" nodes as a fallback
1273 mask = NODE_FLAGS_INACTIVE;
1276 /* since nodes only know about those public addresses that
1277 can be served by that particular node, no single node has
1278 a full list of all public addresses that exist in the cluster.
1279 Walk over all node structures and create a merged list of
1280 all public addresses that exist in the cluster.
1282 keep the tree of ips around as ctdb->ip_tree
1284 all_ips = create_merged_ip_list(ctdb);
1286 /* If we want deterministic ip allocations, i.e. that the ip addresses
1287 will always be allocated the same way for a specific set of
1288 available/unavailable nodes.
1290 if (1 == ctdb->tunable.deterministic_public_ips) {
1291 DEBUG(DEBUG_NOTICE,("Deterministic IPs enabled. Resetting all ip allocations\n"));
1292 for (i=0,tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next,i++) {
1293 tmp_ip->pnn = i%nodemap->num;
1298 /* mark all public addresses with a masked node as being served by
1301 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1302 if (tmp_ip->pnn == -1) {
1305 if (nodemap->nodes[tmp_ip->pnn].flags & mask) {
1310 /* verify that the assigned nodes can serve that public ip
1311 and set it to -1 if not
1313 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1314 if (tmp_ip->pnn == -1) {
1317 if (can_node_serve_ip(ctdb, tmp_ip->pnn, tmp_ip) != 0) {
1318 /* this node can not serve this ip. */
1324 /* now we must redistribute all public addresses with takeover node
1325 -1 among the nodes available
1329 /* loop over all ip's and find a physical node to cover for
1332 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1333 if (tmp_ip->pnn == -1) {
1334 if (find_takeover_node(ctdb, nodemap, mask, tmp_ip, all_ips)) {
1335 DEBUG(DEBUG_WARNING,("Failed to find node to cover ip %s\n",
1336 ctdb_addr_to_str(&tmp_ip->addr)));
1341 /* If we dont want ips to fail back after a node becomes healthy
1342 again, we wont even try to reallocat the ip addresses so that
1343 they are evenly spread out.
1344 This can NOT be used at the same time as DeterministicIPs !
1346 if (1 == ctdb->tunable.no_ip_failback) {
1347 if (1 == ctdb->tunable.deterministic_public_ips) {
1348 DEBUG(DEBUG_ERR, ("ERROR: You can not use 'DeterministicIPs' and 'NoIPFailback' at the same time\n"));
1354 /* now, try to make sure the ip adresses are evenly distributed
1356 for each ip address, loop over all nodes that can serve this
1357 ip and make sure that the difference between the node
1358 serving the most and the node serving the least ip's are not greater
1361 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1362 if (tmp_ip->pnn == -1) {
1366 /* Get the highest and lowest number of ips's served by any
1367 valid node which can serve this ip.
1371 for (i=0;i<nodemap->num;i++) {
1372 if (nodemap->nodes[i].flags & mask) {
1376 /* only check nodes that can actually serve this ip */
1377 if (can_node_serve_ip(ctdb, i, tmp_ip)) {
1378 /* no it couldnt so skip to the next node */
1382 num = node_ip_coverage(ctdb, i, all_ips);
1383 if (maxnode == -1) {
1392 if (minnode == -1) {
1402 if (maxnode == -1) {
1403 DEBUG(DEBUG_WARNING,(__location__ " Could not find maxnode. May not be able to serve ip '%s'\n",
1404 ctdb_addr_to_str(&tmp_ip->addr)));
1409 /* If we want deterministic IPs then dont try to reallocate
1410 them to spread out the load.
1412 if (1 == ctdb->tunable.deterministic_public_ips) {
1416 /* if the spread between the smallest and largest coverage by
1417 a node is >=2 we steal one of the ips from the node with
1418 most coverage to even things out a bit.
1419 try to do this at most 5 times since we dont want to spend
1420 too much time balancing the ip coverage.
1422 if ( (maxnum > minnum+1)
1424 struct ctdb_public_ip_list *tmp;
1426 /* mark one of maxnode's vnn's as unassigned and try
1429 for (tmp=all_ips;tmp;tmp=tmp->next) {
1430 if (tmp->pnn == maxnode) {
1440 /* finished distributing the public addresses, now just send the
1441 info out to the nodes
1445 /* at this point ->pnn is the node which will own each IP
1446 or -1 if there is no node that can cover this ip
1449 /* now tell all nodes to delete any alias that they should not
1450 have. This will be a NOOP on nodes that don't currently
1451 hold the given alias */
1452 async_data = talloc_zero(tmp_ctx, struct client_async_data);
1453 CTDB_NO_MEMORY_FATAL(ctdb, async_data);
1455 for (i=0;i<nodemap->num;i++) {
1456 /* don't talk to unconnected nodes, but do talk to banned nodes */
1457 if (nodemap->nodes[i].flags & NODE_FLAGS_DISCONNECTED) {
1461 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1462 if (tmp_ip->pnn == nodemap->nodes[i].pnn) {
1463 /* This node should be serving this
1464 vnn so dont tell it to release the ip
1468 if (tmp_ip->addr.sa.sa_family == AF_INET) {
1469 ipv4.pnn = tmp_ip->pnn;
1470 ipv4.sin = tmp_ip->addr.ip;
1472 timeout = TAKEOVER_TIMEOUT();
1473 data.dsize = sizeof(ipv4);
1474 data.dptr = (uint8_t *)&ipv4;
1475 state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
1476 0, CTDB_CONTROL_RELEASE_IPv4, 0,
1480 ip.pnn = tmp_ip->pnn;
1481 ip.addr = tmp_ip->addr;
1483 timeout = TAKEOVER_TIMEOUT();
1484 data.dsize = sizeof(ip);
1485 data.dptr = (uint8_t *)&ip;
1486 state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
1487 0, CTDB_CONTROL_RELEASE_IP, 0,
1492 if (state == NULL) {
1493 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_RELEASE_IP to node %u\n", nodemap->nodes[i].pnn));
1494 talloc_free(tmp_ctx);
1498 ctdb_client_async_add(async_data, state);
1501 if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1502 DEBUG(DEBUG_ERR,(__location__ " Async control CTDB_CONTROL_RELEASE_IP failed\n"));
1503 talloc_free(tmp_ctx);
1506 talloc_free(async_data);
1509 /* tell all nodes to get their own IPs */
1510 async_data = talloc_zero(tmp_ctx, struct client_async_data);
1511 CTDB_NO_MEMORY_FATAL(ctdb, async_data);
1512 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1513 if (tmp_ip->pnn == -1) {
1514 /* this IP won't be taken over */
1518 if (tmp_ip->addr.sa.sa_family == AF_INET) {
1519 ipv4.pnn = tmp_ip->pnn;
1520 ipv4.sin = tmp_ip->addr.ip;
1522 timeout = TAKEOVER_TIMEOUT();
1523 data.dsize = sizeof(ipv4);
1524 data.dptr = (uint8_t *)&ipv4;
1525 state = ctdb_control_send(ctdb, tmp_ip->pnn,
1526 0, CTDB_CONTROL_TAKEOVER_IPv4, 0,
1530 ip.pnn = tmp_ip->pnn;
1531 ip.addr = tmp_ip->addr;
1533 timeout = TAKEOVER_TIMEOUT();
1534 data.dsize = sizeof(ip);
1535 data.dptr = (uint8_t *)&ip;
1536 state = ctdb_control_send(ctdb, tmp_ip->pnn,
1537 0, CTDB_CONTROL_TAKEOVER_IP, 0,
1541 if (state == NULL) {
1542 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_TAKEOVER_IP to node %u\n", tmp_ip->pnn));
1543 talloc_free(tmp_ctx);
1547 ctdb_client_async_add(async_data, state);
1549 if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1550 DEBUG(DEBUG_ERR,(__location__ " Async control CTDB_CONTROL_TAKEOVER_IP failed\n"));
1551 talloc_free(tmp_ctx);
1556 /* tell all nodes to update natwg */
1557 /* send the flags update natgw on all connected nodes */
1558 data.dptr = discard_const("ipreallocated");
1559 data.dsize = strlen((char *)data.dptr) + 1;
1560 nodes = list_of_connected_nodes(ctdb, nodemap, tmp_ctx, true);
1561 if (ctdb_client_async_control(ctdb, CTDB_CONTROL_RUN_EVENTSCRIPTS,
1562 nodes, 0, TAKEOVER_TIMEOUT(),
1566 DEBUG(DEBUG_ERR, (__location__ " ctdb_control to updatenatgw failed\n"));
1569 talloc_free(tmp_ctx);
1575 destroy a ctdb_client_ip structure
1577 static int ctdb_client_ip_destructor(struct ctdb_client_ip *ip)
1579 DEBUG(DEBUG_DEBUG,("destroying client tcp for %s:%u (client_id %u)\n",
1580 ctdb_addr_to_str(&ip->addr),
1581 ntohs(ip->addr.ip.sin_port),
1584 DLIST_REMOVE(ip->ctdb->client_ip_list, ip);
1589 called by a client to inform us of a TCP connection that it is managing
1590 that should tickled with an ACK when IP takeover is done
1591 we handle both the old ipv4 style of packets as well as the new ipv4/6
1594 int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id,
1597 struct ctdb_client *client = ctdb_reqid_find(ctdb, client_id, struct ctdb_client);
1598 struct ctdb_control_tcp *old_addr = NULL;
1599 struct ctdb_control_tcp_addr new_addr;
1600 struct ctdb_control_tcp_addr *tcp_sock = NULL;
1601 struct ctdb_tcp_list *tcp;
1602 struct ctdb_tcp_connection t;
1605 struct ctdb_client_ip *ip;
1606 struct ctdb_vnn *vnn;
1607 ctdb_sock_addr addr;
1609 switch (indata.dsize) {
1610 case sizeof(struct ctdb_control_tcp):
1611 old_addr = (struct ctdb_control_tcp *)indata.dptr;
1612 ZERO_STRUCT(new_addr);
1613 tcp_sock = &new_addr;
1614 tcp_sock->src.ip = old_addr->src;
1615 tcp_sock->dest.ip = old_addr->dest;
1617 case sizeof(struct ctdb_control_tcp_addr):
1618 tcp_sock = (struct ctdb_control_tcp_addr *)indata.dptr;
1621 DEBUG(DEBUG_ERR,(__location__ " Invalid data structure passed "
1622 "to ctdb_control_tcp_client. size was %d but "
1623 "only allowed sizes are %lu and %lu\n",
1625 (long unsigned)sizeof(struct ctdb_control_tcp),
1626 (long unsigned)sizeof(struct ctdb_control_tcp_addr)));
1630 addr = tcp_sock->src;
1631 ctdb_canonicalize_ip(&addr, &tcp_sock->src);
1632 addr = tcp_sock->dest;
1633 ctdb_canonicalize_ip(&addr, &tcp_sock->dest);
1636 memcpy(&addr, &tcp_sock->dest, sizeof(addr));
1637 vnn = find_public_ip_vnn(ctdb, &addr);
1639 switch (addr.sa.sa_family) {
1641 if (ntohl(addr.ip.sin_addr.s_addr) != INADDR_LOOPBACK) {
1642 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public address.\n",
1643 ctdb_addr_to_str(&addr)));
1647 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public ipv6 address.\n",
1648 ctdb_addr_to_str(&addr)));
1651 DEBUG(DEBUG_ERR,(__location__ " Unknown family type %d\n", addr.sa.sa_family));
1657 if (vnn->pnn != ctdb->pnn) {
1658 DEBUG(DEBUG_ERR,("Attempt to register tcp client for IP %s we don't hold - failing (client_id %u pid %u)\n",
1659 ctdb_addr_to_str(&addr),
1660 client_id, client->pid));
1661 /* failing this call will tell smbd to die */
1665 ip = talloc(client, struct ctdb_client_ip);
1666 CTDB_NO_MEMORY(ctdb, ip);
1670 ip->client_id = client_id;
1671 talloc_set_destructor(ip, ctdb_client_ip_destructor);
1672 DLIST_ADD(ctdb->client_ip_list, ip);
1674 tcp = talloc(client, struct ctdb_tcp_list);
1675 CTDB_NO_MEMORY(ctdb, tcp);
1677 tcp->connection.src_addr = tcp_sock->src;
1678 tcp->connection.dst_addr = tcp_sock->dest;
1680 DLIST_ADD(client->tcp_list, tcp);
1682 t.src_addr = tcp_sock->src;
1683 t.dst_addr = tcp_sock->dest;
1685 data.dptr = (uint8_t *)&t;
1686 data.dsize = sizeof(t);
1688 switch (addr.sa.sa_family) {
1690 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1691 (unsigned)ntohs(tcp_sock->dest.ip.sin_port),
1692 ctdb_addr_to_str(&tcp_sock->src),
1693 (unsigned)ntohs(tcp_sock->src.ip.sin_port), client_id, client->pid));
1696 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1697 (unsigned)ntohs(tcp_sock->dest.ip6.sin6_port),
1698 ctdb_addr_to_str(&tcp_sock->src),
1699 (unsigned)ntohs(tcp_sock->src.ip6.sin6_port), client_id, client->pid));
1702 DEBUG(DEBUG_ERR,(__location__ " Unknown family %d\n", addr.sa.sa_family));
1706 /* tell all nodes about this tcp connection */
1707 ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0,
1708 CTDB_CONTROL_TCP_ADD,
1709 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
1711 DEBUG(DEBUG_ERR,(__location__ " Failed to send CTDB_CONTROL_TCP_ADD\n"));
1719 find a tcp address on a list
1721 static struct ctdb_tcp_connection *ctdb_tcp_find(struct ctdb_tcp_array *array,
1722 struct ctdb_tcp_connection *tcp)
1726 if (array == NULL) {
1730 for (i=0;i<array->num;i++) {
1731 if (ctdb_same_sockaddr(&array->connections[i].src_addr, &tcp->src_addr) &&
1732 ctdb_same_sockaddr(&array->connections[i].dst_addr, &tcp->dst_addr)) {
1733 return &array->connections[i];
1742 called by a daemon to inform us of a TCP connection that one of its
1743 clients managing that should tickled with an ACK when IP takeover is
1746 int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata, bool tcp_update_needed)
1748 struct ctdb_tcp_connection *p = (struct ctdb_tcp_connection *)indata.dptr;
1749 struct ctdb_tcp_array *tcparray;
1750 struct ctdb_tcp_connection tcp;
1751 struct ctdb_vnn *vnn;
1753 vnn = find_public_ip_vnn(ctdb, &p->dst_addr);
1755 DEBUG(DEBUG_INFO,(__location__ " got TCP_ADD control for an address which is not a public address '%s'\n",
1756 ctdb_addr_to_str(&p->dst_addr)));
1762 tcparray = vnn->tcp_array;
1764 /* If this is the first tickle */
1765 if (tcparray == NULL) {
1766 tcparray = talloc_size(ctdb->nodes,
1767 offsetof(struct ctdb_tcp_array, connections) +
1768 sizeof(struct ctdb_tcp_connection) * 1);
1769 CTDB_NO_MEMORY(ctdb, tcparray);
1770 vnn->tcp_array = tcparray;
1773 tcparray->connections = talloc_size(tcparray, sizeof(struct ctdb_tcp_connection));
1774 CTDB_NO_MEMORY(ctdb, tcparray->connections);
1776 tcparray->connections[tcparray->num].src_addr = p->src_addr;
1777 tcparray->connections[tcparray->num].dst_addr = p->dst_addr;
1780 if (tcp_update_needed) {
1781 vnn->tcp_update_needed = true;
1787 /* Do we already have this tickle ?*/
1788 tcp.src_addr = p->src_addr;
1789 tcp.dst_addr = p->dst_addr;
1790 if (ctdb_tcp_find(vnn->tcp_array, &tcp) != NULL) {
1791 DEBUG(DEBUG_DEBUG,("Already had tickle info for %s:%u for vnn:%u\n",
1792 ctdb_addr_to_str(&tcp.dst_addr),
1793 ntohs(tcp.dst_addr.ip.sin_port),
1798 /* A new tickle, we must add it to the array */
1799 tcparray->connections = talloc_realloc(tcparray, tcparray->connections,
1800 struct ctdb_tcp_connection,
1802 CTDB_NO_MEMORY(ctdb, tcparray->connections);
1804 vnn->tcp_array = tcparray;
1805 tcparray->connections[tcparray->num].src_addr = p->src_addr;
1806 tcparray->connections[tcparray->num].dst_addr = p->dst_addr;
1809 DEBUG(DEBUG_INFO,("Added tickle info for %s:%u from vnn %u\n",
1810 ctdb_addr_to_str(&tcp.dst_addr),
1811 ntohs(tcp.dst_addr.ip.sin_port),
1814 if (tcp_update_needed) {
1815 vnn->tcp_update_needed = true;
1823 called by a daemon to inform us of a TCP connection that one of its
1824 clients managing that should tickled with an ACK when IP takeover is
1827 static void ctdb_remove_tcp_connection(struct ctdb_context *ctdb, struct ctdb_tcp_connection *conn)
1829 struct ctdb_tcp_connection *tcpp;
1830 struct ctdb_vnn *vnn = find_public_ip_vnn(ctdb, &conn->dst_addr);
1833 DEBUG(DEBUG_ERR,(__location__ " unable to find public address %s\n",
1834 ctdb_addr_to_str(&conn->dst_addr)));
1838 /* if the array is empty we cant remove it
1839 and we dont need to do anything
1841 if (vnn->tcp_array == NULL) {
1842 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist (array is empty) %s:%u\n",
1843 ctdb_addr_to_str(&conn->dst_addr),
1844 ntohs(conn->dst_addr.ip.sin_port)));
1849 /* See if we know this connection
1850 if we dont know this connection then we dont need to do anything
1852 tcpp = ctdb_tcp_find(vnn->tcp_array, conn);
1854 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist %s:%u\n",
1855 ctdb_addr_to_str(&conn->dst_addr),
1856 ntohs(conn->dst_addr.ip.sin_port)));
1861 /* We need to remove this entry from the array.
1862 Instead of allocating a new array and copying data to it
1863 we cheat and just copy the last entry in the existing array
1864 to the entry that is to be removed and just shring the
1867 *tcpp = vnn->tcp_array->connections[vnn->tcp_array->num - 1];
1868 vnn->tcp_array->num--;
1870 /* If we deleted the last entry we also need to remove the entire array
1872 if (vnn->tcp_array->num == 0) {
1873 talloc_free(vnn->tcp_array);
1874 vnn->tcp_array = NULL;
1877 vnn->tcp_update_needed = true;
1879 DEBUG(DEBUG_INFO,("Removed tickle info for %s:%u\n",
1880 ctdb_addr_to_str(&conn->src_addr),
1881 ntohs(conn->src_addr.ip.sin_port)));
1886 called by a daemon to inform us of a TCP connection that one of its
1887 clients used are no longer needed in the tickle database
1889 int32_t ctdb_control_tcp_remove(struct ctdb_context *ctdb, TDB_DATA indata)
1891 struct ctdb_tcp_connection *conn = (struct ctdb_tcp_connection *)indata.dptr;
1893 ctdb_remove_tcp_connection(ctdb, conn);
1900 called when a daemon restarts - send all tickes for all public addresses
1901 we are serving immediately to the new node.
1903 int32_t ctdb_control_startup(struct ctdb_context *ctdb, uint32_t vnn)
1905 /*XXX here we should send all tickes we are serving to the new node */
1911 called when a client structure goes away - hook to remove
1912 elements from the tcp_list in all daemons
1914 void ctdb_takeover_client_destructor_hook(struct ctdb_client *client)
1916 while (client->tcp_list) {
1917 struct ctdb_tcp_list *tcp = client->tcp_list;
1918 DLIST_REMOVE(client->tcp_list, tcp);
1919 ctdb_remove_tcp_connection(client->ctdb, &tcp->connection);
1925 release all IPs on shutdown
1927 void ctdb_release_all_ips(struct ctdb_context *ctdb)
1929 struct ctdb_vnn *vnn;
1931 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1932 if (!ctdb_sys_have_ip(&vnn->public_address)) {
1933 ctdb_vnn_unassign_iface(ctdb, vnn);
1939 ctdb_event_script_args(ctdb, CTDB_EVENT_RELEASE_IP, "%s %s %u",
1940 ctdb_vnn_iface_string(vnn),
1941 ctdb_addr_to_str(&vnn->public_address),
1942 vnn->public_netmask_bits);
1943 release_kill_clients(ctdb, &vnn->public_address);
1944 ctdb_vnn_unassign_iface(ctdb, vnn);
1950 get list of public IPs
1952 int32_t ctdb_control_get_public_ips(struct ctdb_context *ctdb,
1953 struct ctdb_req_control *c, TDB_DATA *outdata)
1956 struct ctdb_all_public_ips *ips;
1957 struct ctdb_vnn *vnn;
1958 bool only_available = false;
1960 if (c->flags & CTDB_PUBLIC_IP_FLAGS_ONLY_AVAILABLE) {
1961 only_available = true;
1964 /* count how many public ip structures we have */
1966 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1970 len = offsetof(struct ctdb_all_public_ips, ips) +
1971 num*sizeof(struct ctdb_public_ip);
1972 ips = talloc_zero_size(outdata, len);
1973 CTDB_NO_MEMORY(ctdb, ips);
1976 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1977 if (only_available && !ctdb_vnn_available(ctdb, vnn)) {
1980 ips->ips[i].pnn = vnn->pnn;
1981 ips->ips[i].addr = vnn->public_address;
1985 len = offsetof(struct ctdb_all_public_ips, ips) +
1986 i*sizeof(struct ctdb_public_ip);
1988 outdata->dsize = len;
1989 outdata->dptr = (uint8_t *)ips;
1996 get list of public IPs, old ipv4 style. only returns ipv4 addresses
1998 int32_t ctdb_control_get_public_ipsv4(struct ctdb_context *ctdb,
1999 struct ctdb_req_control *c, TDB_DATA *outdata)
2002 struct ctdb_all_public_ipsv4 *ips;
2003 struct ctdb_vnn *vnn;
2005 /* count how many public ip structures we have */
2007 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2008 if (vnn->public_address.sa.sa_family != AF_INET) {
2014 len = offsetof(struct ctdb_all_public_ipsv4, ips) +
2015 num*sizeof(struct ctdb_public_ipv4);
2016 ips = talloc_zero_size(outdata, len);
2017 CTDB_NO_MEMORY(ctdb, ips);
2019 outdata->dsize = len;
2020 outdata->dptr = (uint8_t *)ips;
2024 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2025 if (vnn->public_address.sa.sa_family != AF_INET) {
2028 ips->ips[i].pnn = vnn->pnn;
2029 ips->ips[i].sin = vnn->public_address.ip;
2036 int32_t ctdb_control_get_public_ip_info(struct ctdb_context *ctdb,
2037 struct ctdb_req_control *c,
2042 ctdb_sock_addr *addr;
2043 struct ctdb_control_public_ip_info *info;
2044 struct ctdb_vnn *vnn;
2046 addr = (ctdb_sock_addr *)indata.dptr;
2048 vnn = find_public_ip_vnn(ctdb, addr);
2050 /* if it is not a public ip it could be our 'single ip' */
2051 if (ctdb->single_ip_vnn) {
2052 if (ctdb_same_ip(&ctdb->single_ip_vnn->public_address, addr)) {
2053 vnn = ctdb->single_ip_vnn;
2058 DEBUG(DEBUG_ERR,(__location__ " Could not get public ip info, "
2059 "'%s'not a public address\n",
2060 ctdb_addr_to_str(addr)));
2064 /* count how many public ip structures we have */
2066 for (;vnn->ifaces[num];) {
2070 len = offsetof(struct ctdb_control_public_ip_info, ifaces) +
2071 num*sizeof(struct ctdb_control_iface_info);
2072 info = talloc_zero_size(outdata, len);
2073 CTDB_NO_MEMORY(ctdb, info);
2075 info->ip.addr = vnn->public_address;
2076 info->ip.pnn = vnn->pnn;
2077 info->active_idx = 0xFFFFFFFF;
2079 for (i=0; vnn->ifaces[i]; i++) {
2080 struct ctdb_iface *cur;
2082 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
2084 DEBUG(DEBUG_CRIT, (__location__ " internal error iface[%s] unknown\n",
2088 if (vnn->iface == cur) {
2089 info->active_idx = i;
2091 strcpy(info->ifaces[i].name, cur->name);
2092 info->ifaces[i].link_state = cur->link_up;
2093 info->ifaces[i].references = cur->references;
2096 len = offsetof(struct ctdb_control_public_ip_info, ifaces) +
2097 i*sizeof(struct ctdb_control_iface_info);
2099 outdata->dsize = len;
2100 outdata->dptr = (uint8_t *)info;
2105 int32_t ctdb_control_get_ifaces(struct ctdb_context *ctdb,
2106 struct ctdb_req_control *c,
2110 struct ctdb_control_get_ifaces *ifaces;
2111 struct ctdb_iface *cur;
2113 /* count how many public ip structures we have */
2115 for (cur=ctdb->ifaces;cur;cur=cur->next) {
2119 len = offsetof(struct ctdb_control_get_ifaces, ifaces) +
2120 num*sizeof(struct ctdb_control_iface_info);
2121 ifaces = talloc_zero_size(outdata, len);
2122 CTDB_NO_MEMORY(ctdb, ifaces);
2125 for (cur=ctdb->ifaces;cur;cur=cur->next) {
2126 strcpy(ifaces->ifaces[i].name, cur->name);
2127 ifaces->ifaces[i].link_state = cur->link_up;
2128 ifaces->ifaces[i].references = cur->references;
2132 len = offsetof(struct ctdb_control_get_ifaces, ifaces) +
2133 i*sizeof(struct ctdb_control_iface_info);
2135 outdata->dsize = len;
2136 outdata->dptr = (uint8_t *)ifaces;
2141 int32_t ctdb_control_set_iface_link(struct ctdb_context *ctdb,
2142 struct ctdb_req_control *c,
2145 struct ctdb_control_iface_info *info;
2146 struct ctdb_iface *iface;
2147 bool link_up = false;
2149 info = (struct ctdb_control_iface_info *)indata.dptr;
2151 if (info->name[CTDB_IFACE_SIZE] != '\0') {
2152 int len = strnlen(info->name, CTDB_IFACE_SIZE);
2153 DEBUG(DEBUG_ERR, (__location__ " name[%*.*s] not terminated\n",
2154 len, len, info->name));
2158 switch (info->link_state) {
2166 DEBUG(DEBUG_ERR, (__location__ " link_state[%u] invalid\n",
2167 (unsigned int)info->link_state));
2171 if (info->references != 0) {
2172 DEBUG(DEBUG_ERR, (__location__ " references[%u] should be 0\n",
2173 (unsigned int)info->references));
2177 iface = ctdb_find_iface(ctdb, info->name);
2178 if (iface == NULL) {
2179 DEBUG(DEBUG_ERR, (__location__ "iface[%s] is unknown\n",
2184 if (link_up == iface->link_up) {
2188 DEBUG(iface->link_up?DEBUG_ERR:DEBUG_NOTICE,
2189 ("iface[%s] has changed it's link status %s => %s\n",
2191 iface->link_up?"up":"down",
2192 link_up?"up":"down"));
2194 iface->link_up = link_up;
2200 structure containing the listening socket and the list of tcp connections
2201 that the ctdb daemon is to kill
2203 struct ctdb_kill_tcp {
2204 struct ctdb_vnn *vnn;
2205 struct ctdb_context *ctdb;
2207 struct fd_event *fde;
2208 trbt_tree_t *connections;
2213 a tcp connection that is to be killed
2215 struct ctdb_killtcp_con {
2216 ctdb_sock_addr src_addr;
2217 ctdb_sock_addr dst_addr;
2219 struct ctdb_kill_tcp *killtcp;
2222 /* this function is used to create a key to represent this socketpair
2223 in the killtcp tree.
2224 this key is used to insert and lookup matching socketpairs that are
2225 to be tickled and RST
2227 #define KILLTCP_KEYLEN 10
2228 static uint32_t *killtcp_key(ctdb_sock_addr *src, ctdb_sock_addr *dst)
2230 static uint32_t key[KILLTCP_KEYLEN];
2232 bzero(key, sizeof(key));
2234 if (src->sa.sa_family != dst->sa.sa_family) {
2235 DEBUG(DEBUG_ERR, (__location__ " ERROR, different families passed :%u vs %u\n", src->sa.sa_family, dst->sa.sa_family));
2239 switch (src->sa.sa_family) {
2241 key[0] = dst->ip.sin_addr.s_addr;
2242 key[1] = src->ip.sin_addr.s_addr;
2243 key[2] = dst->ip.sin_port;
2244 key[3] = src->ip.sin_port;
2247 key[0] = dst->ip6.sin6_addr.s6_addr32[3];
2248 key[1] = src->ip6.sin6_addr.s6_addr32[3];
2249 key[2] = dst->ip6.sin6_addr.s6_addr32[2];
2250 key[3] = src->ip6.sin6_addr.s6_addr32[2];
2251 key[4] = dst->ip6.sin6_addr.s6_addr32[1];
2252 key[5] = src->ip6.sin6_addr.s6_addr32[1];
2253 key[6] = dst->ip6.sin6_addr.s6_addr32[0];
2254 key[7] = src->ip6.sin6_addr.s6_addr32[0];
2255 key[8] = dst->ip6.sin6_port;
2256 key[9] = src->ip6.sin6_port;
2259 DEBUG(DEBUG_ERR, (__location__ " ERROR, unknown family passed :%u\n", src->sa.sa_family));
2267 called when we get a read event on the raw socket
2269 static void capture_tcp_handler(struct event_context *ev, struct fd_event *fde,
2270 uint16_t flags, void *private_data)
2272 struct ctdb_kill_tcp *killtcp = talloc_get_type(private_data, struct ctdb_kill_tcp);
2273 struct ctdb_killtcp_con *con;
2274 ctdb_sock_addr src, dst;
2275 uint32_t ack_seq, seq;
2277 if (!(flags & EVENT_FD_READ)) {
2281 if (ctdb_sys_read_tcp_packet(killtcp->capture_fd,
2282 killtcp->private_data,
2284 &ack_seq, &seq) != 0) {
2285 /* probably a non-tcp ACK packet */
2289 /* check if we have this guy in our list of connections
2292 con = trbt_lookuparray32(killtcp->connections,
2293 KILLTCP_KEYLEN, killtcp_key(&src, &dst));
2295 /* no this was some other packet we can just ignore */
2299 /* This one has been tickled !
2300 now reset him and remove him from the list.
2302 DEBUG(DEBUG_INFO, ("sending a tcp reset to kill connection :%d -> %s:%d\n",
2303 ntohs(con->dst_addr.ip.sin_port),
2304 ctdb_addr_to_str(&con->src_addr),
2305 ntohs(con->src_addr.ip.sin_port)));
2307 ctdb_sys_send_tcp(&con->dst_addr, &con->src_addr, ack_seq, seq, 1);
2312 /* when traversing the list of all tcp connections to send tickle acks to
2313 (so that we can capture the ack coming back and kill the connection
2315 this callback is called for each connection we are currently trying to kill
2317 static void tickle_connection_traverse(void *param, void *data)
2319 struct ctdb_killtcp_con *con = talloc_get_type(data, struct ctdb_killtcp_con);
2321 /* have tried too many times, just give up */
2322 if (con->count >= 5) {
2323 /* can't delete in traverse: reparent to delete_cons */
2324 talloc_steal(param, con);
2328 /* othervise, try tickling it again */
2331 (ctdb_sock_addr *)&con->dst_addr,
2332 (ctdb_sock_addr *)&con->src_addr,
2338 called every second until all sentenced connections have been reset
2340 static void ctdb_tickle_sentenced_connections(struct event_context *ev, struct timed_event *te,
2341 struct timeval t, void *private_data)
2343 struct ctdb_kill_tcp *killtcp = talloc_get_type(private_data, struct ctdb_kill_tcp);
2344 void *delete_cons = talloc_new(NULL);
2346 /* loop over all connections sending tickle ACKs */
2347 trbt_traversearray32(killtcp->connections, KILLTCP_KEYLEN, tickle_connection_traverse, delete_cons);
2349 /* now we've finished traverse, it's safe to do deletion. */
2350 talloc_free(delete_cons);
2352 /* If there are no more connections to kill we can remove the
2353 entire killtcp structure
2355 if ( (killtcp->connections == NULL) ||
2356 (killtcp->connections->root == NULL) ) {
2357 talloc_free(killtcp);
2361 /* try tickling them again in a seconds time
2363 event_add_timed(killtcp->ctdb->ev, killtcp, timeval_current_ofs(1, 0),
2364 ctdb_tickle_sentenced_connections, killtcp);
2368 destroy the killtcp structure
2370 static int ctdb_killtcp_destructor(struct ctdb_kill_tcp *killtcp)
2373 killtcp->vnn->killtcp = NULL;
2379 /* nothing fancy here, just unconditionally replace any existing
2380 connection structure with the new one.
2382 dont even free the old one if it did exist, that one is talloc_stolen
2383 by the same node in the tree anyway and will be deleted when the new data
2386 static void *add_killtcp_callback(void *parm, void *data)
2392 add a tcp socket to the list of connections we want to RST
2394 static int ctdb_killtcp_add_connection(struct ctdb_context *ctdb,
2398 ctdb_sock_addr src, dst;
2399 struct ctdb_kill_tcp *killtcp;
2400 struct ctdb_killtcp_con *con;
2401 struct ctdb_vnn *vnn;
2403 ctdb_canonicalize_ip(s, &src);
2404 ctdb_canonicalize_ip(d, &dst);
2406 vnn = find_public_ip_vnn(ctdb, &dst);
2408 vnn = find_public_ip_vnn(ctdb, &src);
2411 /* if it is not a public ip it could be our 'single ip' */
2412 if (ctdb->single_ip_vnn) {
2413 if (ctdb_same_ip(&ctdb->single_ip_vnn->public_address, &dst)) {
2414 vnn = ctdb->single_ip_vnn;
2419 DEBUG(DEBUG_ERR,(__location__ " Could not killtcp, not a public address\n"));
2423 killtcp = vnn->killtcp;
2425 /* If this is the first connection to kill we must allocate
2428 if (killtcp == NULL) {
2429 killtcp = talloc_zero(ctdb, struct ctdb_kill_tcp);
2430 CTDB_NO_MEMORY(ctdb, killtcp);
2433 killtcp->ctdb = ctdb;
2434 killtcp->capture_fd = -1;
2435 killtcp->connections = trbt_create(killtcp, 0);
2437 vnn->killtcp = killtcp;
2438 talloc_set_destructor(killtcp, ctdb_killtcp_destructor);
2443 /* create a structure that describes this connection we want to
2444 RST and store it in killtcp->connections
2446 con = talloc(killtcp, struct ctdb_killtcp_con);
2447 CTDB_NO_MEMORY(ctdb, con);
2448 con->src_addr = src;
2449 con->dst_addr = dst;
2451 con->killtcp = killtcp;
2454 trbt_insertarray32_callback(killtcp->connections,
2455 KILLTCP_KEYLEN, killtcp_key(&con->dst_addr, &con->src_addr),
2456 add_killtcp_callback, con);
2459 If we dont have a socket to listen on yet we must create it
2461 if (killtcp->capture_fd == -1) {
2462 const char *iface = ctdb_vnn_iface_string(vnn);
2463 killtcp->capture_fd = ctdb_sys_open_capture_socket(iface, &killtcp->private_data);
2464 if (killtcp->capture_fd == -1) {
2465 DEBUG(DEBUG_CRIT,(__location__ " Failed to open capturing "
2466 "socket on iface '%s' for killtcp (%s)\n",
2467 iface, strerror(errno)));
2473 if (killtcp->fde == NULL) {
2474 killtcp->fde = event_add_fd(ctdb->ev, killtcp, killtcp->capture_fd,
2476 capture_tcp_handler, killtcp);
2477 tevent_fd_set_auto_close(killtcp->fde);
2479 /* We also need to set up some events to tickle all these connections
2480 until they are all reset
2482 event_add_timed(ctdb->ev, killtcp, timeval_current_ofs(1, 0),
2483 ctdb_tickle_sentenced_connections, killtcp);
2486 /* tickle him once now */
2495 talloc_free(vnn->killtcp);
2496 vnn->killtcp = NULL;
2501 kill a TCP connection.
2503 int32_t ctdb_control_kill_tcp(struct ctdb_context *ctdb, TDB_DATA indata)
2505 struct ctdb_control_killtcp *killtcp = (struct ctdb_control_killtcp *)indata.dptr;
2507 return ctdb_killtcp_add_connection(ctdb, &killtcp->src_addr, &killtcp->dst_addr);
2511 called by a daemon to inform us of the entire list of TCP tickles for
2512 a particular public address.
2513 this control should only be sent by the node that is currently serving
2514 that public address.
2516 int32_t ctdb_control_set_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata)
2518 struct ctdb_control_tcp_tickle_list *list = (struct ctdb_control_tcp_tickle_list *)indata.dptr;
2519 struct ctdb_tcp_array *tcparray;
2520 struct ctdb_vnn *vnn;
2522 /* We must at least have tickles.num or else we cant verify the size
2523 of the received data blob
2525 if (indata.dsize < offsetof(struct ctdb_control_tcp_tickle_list,
2526 tickles.connections)) {
2527 DEBUG(DEBUG_ERR,("Bad indata in ctdb_control_set_tcp_tickle_list. Not enough data for the tickle.num field\n"));
2531 /* verify that the size of data matches what we expect */
2532 if (indata.dsize < offsetof(struct ctdb_control_tcp_tickle_list,
2533 tickles.connections)
2534 + sizeof(struct ctdb_tcp_connection)
2535 * list->tickles.num) {
2536 DEBUG(DEBUG_ERR,("Bad indata in ctdb_control_set_tcp_tickle_list\n"));
2540 vnn = find_public_ip_vnn(ctdb, &list->addr);
2542 DEBUG(DEBUG_INFO,(__location__ " Could not set tcp tickle list, '%s' is not a public address\n",
2543 ctdb_addr_to_str(&list->addr)));
2548 /* remove any old ticklelist we might have */
2549 talloc_free(vnn->tcp_array);
2550 vnn->tcp_array = NULL;
2552 tcparray = talloc(ctdb->nodes, struct ctdb_tcp_array);
2553 CTDB_NO_MEMORY(ctdb, tcparray);
2555 tcparray->num = list->tickles.num;
2557 tcparray->connections = talloc_array(tcparray, struct ctdb_tcp_connection, tcparray->num);
2558 CTDB_NO_MEMORY(ctdb, tcparray->connections);
2560 memcpy(tcparray->connections, &list->tickles.connections[0],
2561 sizeof(struct ctdb_tcp_connection)*tcparray->num);
2563 /* We now have a new fresh tickle list array for this vnn */
2564 vnn->tcp_array = talloc_steal(vnn, tcparray);
2570 called to return the full list of tickles for the puclic address associated
2571 with the provided vnn
2573 int32_t ctdb_control_get_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata)
2575 ctdb_sock_addr *addr = (ctdb_sock_addr *)indata.dptr;
2576 struct ctdb_control_tcp_tickle_list *list;
2577 struct ctdb_tcp_array *tcparray;
2579 struct ctdb_vnn *vnn;
2581 vnn = find_public_ip_vnn(ctdb, addr);
2583 DEBUG(DEBUG_ERR,(__location__ " Could not get tcp tickle list, '%s' is not a public address\n",
2584 ctdb_addr_to_str(addr)));
2589 tcparray = vnn->tcp_array;
2591 num = tcparray->num;
2596 outdata->dsize = offsetof(struct ctdb_control_tcp_tickle_list,
2597 tickles.connections)
2598 + sizeof(struct ctdb_tcp_connection) * num;
2600 outdata->dptr = talloc_size(outdata, outdata->dsize);
2601 CTDB_NO_MEMORY(ctdb, outdata->dptr);
2602 list = (struct ctdb_control_tcp_tickle_list *)outdata->dptr;
2605 list->tickles.num = num;
2607 memcpy(&list->tickles.connections[0], tcparray->connections,
2608 sizeof(struct ctdb_tcp_connection) * num);
2616 set the list of all tcp tickles for a public address
2618 static int ctdb_ctrl_set_tcp_tickles(struct ctdb_context *ctdb,
2619 struct timeval timeout, uint32_t destnode,
2620 ctdb_sock_addr *addr,
2621 struct ctdb_tcp_array *tcparray)
2625 struct ctdb_control_tcp_tickle_list *list;
2628 num = tcparray->num;
2633 data.dsize = offsetof(struct ctdb_control_tcp_tickle_list,
2634 tickles.connections) +
2635 sizeof(struct ctdb_tcp_connection) * num;
2636 data.dptr = talloc_size(ctdb, data.dsize);
2637 CTDB_NO_MEMORY(ctdb, data.dptr);
2639 list = (struct ctdb_control_tcp_tickle_list *)data.dptr;
2641 list->tickles.num = num;
2643 memcpy(&list->tickles.connections[0], tcparray->connections, sizeof(struct ctdb_tcp_connection) * num);
2646 ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0,
2647 CTDB_CONTROL_SET_TCP_TICKLE_LIST,
2648 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
2650 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set tcp tickles failed\n"));
2654 talloc_free(data.dptr);
2661 perform tickle updates if required
2663 static void ctdb_update_tcp_tickles(struct event_context *ev,
2664 struct timed_event *te,
2665 struct timeval t, void *private_data)
2667 struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
2669 struct ctdb_vnn *vnn;
2671 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2672 /* we only send out updates for public addresses that
2675 if (ctdb->pnn != vnn->pnn) {
2678 /* We only send out the updates if we need to */
2679 if (!vnn->tcp_update_needed) {
2682 ret = ctdb_ctrl_set_tcp_tickles(ctdb,
2684 CTDB_BROADCAST_CONNECTED,
2685 &vnn->public_address,
2688 DEBUG(DEBUG_ERR,("Failed to send the tickle update for public address %s\n",
2689 ctdb_addr_to_str(&vnn->public_address)));
2693 event_add_timed(ctdb->ev, ctdb->tickle_update_context,
2694 timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
2695 ctdb_update_tcp_tickles, ctdb);
2700 start periodic update of tcp tickles
2702 void ctdb_start_tcp_tickle_update(struct ctdb_context *ctdb)
2704 ctdb->tickle_update_context = talloc_new(ctdb);
2706 event_add_timed(ctdb->ev, ctdb->tickle_update_context,
2707 timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
2708 ctdb_update_tcp_tickles, ctdb);
2714 struct control_gratious_arp {
2715 struct ctdb_context *ctdb;
2716 ctdb_sock_addr addr;
2722 send a control_gratuitous arp
2724 static void send_gratious_arp(struct event_context *ev, struct timed_event *te,
2725 struct timeval t, void *private_data)
2728 struct control_gratious_arp *arp = talloc_get_type(private_data,
2729 struct control_gratious_arp);
2731 ret = ctdb_sys_send_arp(&arp->addr, arp->iface);
2733 DEBUG(DEBUG_ERR,(__location__ " sending of gratious arp on iface '%s' failed (%s)\n",
2734 arp->iface, strerror(errno)));
2739 if (arp->count == CTDB_ARP_REPEAT) {
2744 event_add_timed(arp->ctdb->ev, arp,
2745 timeval_current_ofs(CTDB_ARP_INTERVAL, 0),
2746 send_gratious_arp, arp);
2753 int32_t ctdb_control_send_gratious_arp(struct ctdb_context *ctdb, TDB_DATA indata)
2755 struct ctdb_control_gratious_arp *gratious_arp = (struct ctdb_control_gratious_arp *)indata.dptr;
2756 struct control_gratious_arp *arp;
2758 /* verify the size of indata */
2759 if (indata.dsize < offsetof(struct ctdb_control_gratious_arp, iface)) {
2760 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_gratious_arp structure. Got %u require %u bytes\n",
2761 (unsigned)indata.dsize,
2762 (unsigned)offsetof(struct ctdb_control_gratious_arp, iface)));
2766 ( offsetof(struct ctdb_control_gratious_arp, iface)
2767 + gratious_arp->len ) ){
2769 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2770 "but should be %u bytes\n",
2771 (unsigned)indata.dsize,
2772 (unsigned)(offsetof(struct ctdb_control_gratious_arp, iface)+gratious_arp->len)));
2777 arp = talloc(ctdb, struct control_gratious_arp);
2778 CTDB_NO_MEMORY(ctdb, arp);
2781 arp->addr = gratious_arp->addr;
2782 arp->iface = talloc_strdup(arp, gratious_arp->iface);
2783 CTDB_NO_MEMORY(ctdb, arp->iface);
2786 event_add_timed(arp->ctdb->ev, arp,
2787 timeval_zero(), send_gratious_arp, arp);
2792 int32_t ctdb_control_add_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2794 struct ctdb_control_ip_iface *pub = (struct ctdb_control_ip_iface *)indata.dptr;
2797 /* verify the size of indata */
2798 if (indata.dsize < offsetof(struct ctdb_control_ip_iface, iface)) {
2799 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_ip_iface structure\n"));
2803 ( offsetof(struct ctdb_control_ip_iface, iface)
2806 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2807 "but should be %u bytes\n",
2808 (unsigned)indata.dsize,
2809 (unsigned)(offsetof(struct ctdb_control_ip_iface, iface)+pub->len)));
2813 ret = ctdb_add_public_address(ctdb, &pub->addr, pub->mask, &pub->iface[0]);
2816 DEBUG(DEBUG_ERR,(__location__ " Failed to add public address\n"));
2824 called when releaseip event finishes for del_public_address
2826 static void delete_ip_callback(struct ctdb_context *ctdb, int status,
2829 talloc_free(private_data);
2832 int32_t ctdb_control_del_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2834 struct ctdb_control_ip_iface *pub = (struct ctdb_control_ip_iface *)indata.dptr;
2835 struct ctdb_vnn *vnn;
2838 /* verify the size of indata */
2839 if (indata.dsize < offsetof(struct ctdb_control_ip_iface, iface)) {
2840 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_ip_iface structure\n"));
2844 ( offsetof(struct ctdb_control_ip_iface, iface)
2847 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2848 "but should be %u bytes\n",
2849 (unsigned)indata.dsize,
2850 (unsigned)(offsetof(struct ctdb_control_ip_iface, iface)+pub->len)));
2854 /* walk over all public addresses until we find a match */
2855 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2856 if (ctdb_same_ip(&vnn->public_address, &pub->addr)) {
2857 TALLOC_CTX *mem_ctx;
2859 DLIST_REMOVE(ctdb->vnn, vnn);
2860 if (vnn->iface == NULL) {
2865 mem_ctx = talloc_new(ctdb);
2866 ret = ctdb_event_script_callback(ctdb,
2867 mem_ctx, delete_ip_callback, mem_ctx,
2869 CTDB_EVENT_RELEASE_IP,
2871 ctdb_vnn_iface_string(vnn),
2872 ctdb_addr_to_str(&vnn->public_address),
2873 vnn->public_netmask_bits);
2874 ctdb_vnn_unassign_iface(ctdb, vnn);
2886 /* This function is called from the recovery daemon to verify that a remote
2887 node has the expected ip allocation.
2888 This is verified against ctdb->ip_tree
2890 int verify_remote_ip_allocation(struct ctdb_context *ctdb, struct ctdb_all_public_ips *ips)
2892 struct ctdb_public_ip_list *tmp_ip;
2895 if (ctdb->ip_tree == NULL) {
2896 /* dont know the expected allocation yet, assume remote node
2905 for (i=0; i<ips->num; i++) {
2906 tmp_ip = trbt_lookuparray32(ctdb->ip_tree, IP_KEYLEN, ip_key(&ips->ips[i].addr));
2907 if (tmp_ip == NULL) {
2908 DEBUG(DEBUG_ERR,(__location__ " Could not find host for address %s, reassign ips\n", ctdb_addr_to_str(&ips->ips[i].addr)));
2912 if (tmp_ip->pnn == -1 || ips->ips[i].pnn == -1) {
2916 if (tmp_ip->pnn != ips->ips[i].pnn) {
2917 DEBUG(DEBUG_ERR,("Inconsistent ip allocation. Trigger reallocation. Thinks %s is held by node %u while it is held by node %u\n", ctdb_addr_to_str(&ips->ips[i].addr), ips->ips[i].pnn, tmp_ip->pnn));
2925 int update_ip_assignment_tree(struct ctdb_context *ctdb, struct ctdb_public_ip *ip)
2927 struct ctdb_public_ip_list *tmp_ip;
2929 if (ctdb->ip_tree == NULL) {
2930 DEBUG(DEBUG_ERR,("No ctdb->ip_tree yet. Failed to update ip assignment\n"));
2934 tmp_ip = trbt_lookuparray32(ctdb->ip_tree, IP_KEYLEN, ip_key(&ip->addr));
2935 if (tmp_ip == NULL) {
2936 DEBUG(DEBUG_ERR,(__location__ " Could not find record for address %s, update ip\n", ctdb_addr_to_str(&ip->addr)));
2940 DEBUG(DEBUG_NOTICE,("Updated ip assignment tree for ip : %s from node %u to node %u\n", ctdb_addr_to_str(&ip->addr), tmp_ip->pnn, ip->pnn));
2941 tmp_ip->pnn = ip->pnn;