4 Copyright (C) Ronnie Sahlberg 2007
5 Copyright (C) Andrew Tridgell 2007
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, see <http://www.gnu.org/licenses/>.
21 #include "lib/tevent/tevent.h"
22 #include "lib/tdb/include/tdb.h"
23 #include "lib/util/dlinklist.h"
24 #include "system/network.h"
25 #include "system/filesys.h"
26 #include "system/wait.h"
27 #include "../include/ctdb_private.h"
28 #include "../common/rb_tree.h"
31 #define TAKEOVER_TIMEOUT() timeval_current_ofs(ctdb->tunable.takeover_timeout,0)
33 #define CTDB_ARP_INTERVAL 1
34 #define CTDB_ARP_REPEAT 3
37 struct ctdb_iface *prev, *next;
43 static const char *ctdb_vnn_iface_string(const struct ctdb_vnn *vnn)
46 return vnn->iface->name;
52 static int ctdb_add_local_iface(struct ctdb_context *ctdb, const char *iface)
56 /* Verify that we dont have an entry for this ip yet */
57 for (i=ctdb->ifaces;i;i=i->next) {
58 if (strcmp(i->name, iface) == 0) {
63 /* create a new structure for this interface */
64 i = talloc_zero(ctdb, struct ctdb_iface);
65 CTDB_NO_MEMORY_FATAL(ctdb, i);
66 i->name = talloc_strdup(i, iface);
67 CTDB_NO_MEMORY(ctdb, i->name);
70 DLIST_ADD(ctdb->ifaces, i);
75 static struct ctdb_iface *ctdb_find_iface(struct ctdb_context *ctdb,
80 /* Verify that we dont have an entry for this ip yet */
81 for (i=ctdb->ifaces;i;i=i->next) {
82 if (strcmp(i->name, iface) == 0) {
90 static struct ctdb_iface *ctdb_vnn_best_iface(struct ctdb_context *ctdb,
94 struct ctdb_iface *cur = NULL;
95 struct ctdb_iface *best = NULL;
97 for (i=0; vnn->ifaces[i]; i++) {
99 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
113 if (cur->references < best->references) {
122 static int32_t ctdb_vnn_assign_iface(struct ctdb_context *ctdb,
123 struct ctdb_vnn *vnn)
125 struct ctdb_iface *best = NULL;
128 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
129 "still assigned to iface '%s'\n",
130 ctdb_addr_to_str(&vnn->public_address),
131 ctdb_vnn_iface_string(vnn)));
135 best = ctdb_vnn_best_iface(ctdb, vnn);
137 DEBUG(DEBUG_ERR, (__location__ " public address '%s' "
138 "cannot assign to iface any iface\n",
139 ctdb_addr_to_str(&vnn->public_address)));
145 vnn->pnn = ctdb->pnn;
147 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
148 "now assigned to iface '%s' refs[%d]\n",
149 ctdb_addr_to_str(&vnn->public_address),
150 ctdb_vnn_iface_string(vnn),
155 static void ctdb_vnn_unassign_iface(struct ctdb_context *ctdb,
156 struct ctdb_vnn *vnn)
158 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
159 "now unassigned (old iface '%s' refs[%d])\n",
160 ctdb_addr_to_str(&vnn->public_address),
161 ctdb_vnn_iface_string(vnn),
162 vnn->iface?vnn->iface->references:0));
164 vnn->iface->references--;
167 if (vnn->pnn == ctdb->pnn) {
172 static bool ctdb_vnn_available(struct ctdb_context *ctdb,
173 struct ctdb_vnn *vnn)
177 if (vnn->iface && vnn->iface->link_up) {
181 for (i=0; vnn->ifaces[i]; i++) {
182 struct ctdb_iface *cur;
184 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
197 struct ctdb_takeover_arp {
198 struct ctdb_context *ctdb;
201 struct ctdb_tcp_array *tcparray;
202 struct ctdb_vnn *vnn;
207 lists of tcp endpoints
209 struct ctdb_tcp_list {
210 struct ctdb_tcp_list *prev, *next;
211 struct ctdb_tcp_connection connection;
215 list of clients to kill on IP release
217 struct ctdb_client_ip {
218 struct ctdb_client_ip *prev, *next;
219 struct ctdb_context *ctdb;
226 send a gratuitous arp
228 static void ctdb_control_send_arp(struct event_context *ev, struct timed_event *te,
229 struct timeval t, void *private_data)
231 struct ctdb_takeover_arp *arp = talloc_get_type(private_data,
232 struct ctdb_takeover_arp);
234 struct ctdb_tcp_array *tcparray;
235 const char *iface = ctdb_vnn_iface_string(arp->vnn);
237 ret = ctdb_sys_send_arp(&arp->addr, iface);
239 DEBUG(DEBUG_CRIT,(__location__ " sending of arp failed on iface '%s' (%s)\n",
240 iface, strerror(errno)));
243 tcparray = arp->tcparray;
245 for (i=0;i<tcparray->num;i++) {
246 struct ctdb_tcp_connection *tcon;
248 tcon = &tcparray->connections[i];
249 DEBUG(DEBUG_INFO,("sending tcp tickle ack for %u->%s:%u\n",
250 (unsigned)ntohs(tcon->dst_addr.ip.sin_port),
251 ctdb_addr_to_str(&tcon->src_addr),
252 (unsigned)ntohs(tcon->src_addr.ip.sin_port)));
253 ret = ctdb_sys_send_tcp(
258 DEBUG(DEBUG_CRIT,(__location__ " Failed to send tcp tickle ack for %s\n",
259 ctdb_addr_to_str(&tcon->src_addr)));
266 if (arp->count == CTDB_ARP_REPEAT) {
271 event_add_timed(arp->ctdb->ev, arp->vnn->takeover_ctx,
272 timeval_current_ofs(CTDB_ARP_INTERVAL, 100000),
273 ctdb_control_send_arp, arp);
276 static int32_t ctdb_announce_vnn_iface(struct ctdb_context *ctdb,
277 struct ctdb_vnn *vnn)
279 struct ctdb_takeover_arp *arp;
280 struct ctdb_tcp_array *tcparray;
282 if (!vnn->takeover_ctx) {
283 vnn->takeover_ctx = talloc_new(vnn);
284 if (!vnn->takeover_ctx) {
289 arp = talloc_zero(vnn->takeover_ctx, struct ctdb_takeover_arp);
295 arp->addr = vnn->public_address;
298 tcparray = vnn->tcp_array;
300 /* add all of the known tcp connections for this IP to the
301 list of tcp connections to send tickle acks for */
302 arp->tcparray = talloc_steal(arp, tcparray);
304 vnn->tcp_array = NULL;
305 vnn->tcp_update_needed = true;
308 event_add_timed(arp->ctdb->ev, vnn->takeover_ctx,
309 timeval_zero(), ctdb_control_send_arp, arp);
314 struct takeover_callback_state {
315 struct ctdb_req_control *c;
316 ctdb_sock_addr *addr;
317 struct ctdb_vnn *vnn;
320 struct ctdb_do_takeip_state {
321 struct ctdb_req_control *c;
322 struct ctdb_vnn *vnn;
326 called when takeip event finishes
328 static void ctdb_do_takeip_callback(struct ctdb_context *ctdb, int status,
331 struct ctdb_do_takeip_state *state =
332 talloc_get_type(private_data, struct ctdb_do_takeip_state);
337 if (status == -ETIME) {
340 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
341 ctdb_addr_to_str(&state->vnn->public_address),
342 ctdb_vnn_iface_string(state->vnn)));
343 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
348 ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
350 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
355 data.dptr = (uint8_t *)ctdb_addr_to_str(&state->vnn->public_address);
356 data.dsize = strlen((char *)data.dptr) + 1;
357 DEBUG(DEBUG_INFO,(__location__ " sending TAKE_IP for '%s'\n", data.dptr));
359 ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_TAKE_IP, data);
362 /* the control succeeded */
363 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
369 take over an ip address
371 static int32_t ctdb_do_takeip(struct ctdb_context *ctdb,
372 struct ctdb_req_control *c,
373 struct ctdb_vnn *vnn)
376 struct ctdb_do_takeip_state *state;
378 ret = ctdb_vnn_assign_iface(ctdb, vnn);
380 DEBUG(DEBUG_ERR,("Takeover of IP %s/%u failed to "
381 "assin a usable interface\n",
382 ctdb_addr_to_str(&vnn->public_address),
383 vnn->public_netmask_bits));
387 state = talloc(vnn, struct ctdb_do_takeip_state);
388 CTDB_NO_MEMORY(ctdb, state);
390 state->c = talloc_steal(ctdb, c);
393 DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u on interface %s\n",
394 ctdb_addr_to_str(&vnn->public_address),
395 vnn->public_netmask_bits,
396 ctdb_vnn_iface_string(vnn)));
398 ret = ctdb_event_script_callback(ctdb,
400 ctdb_do_takeip_callback,
405 ctdb_vnn_iface_string(vnn),
406 ctdb_addr_to_str(&vnn->public_address),
407 vnn->public_netmask_bits);
410 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
411 ctdb_addr_to_str(&vnn->public_address),
412 ctdb_vnn_iface_string(vnn)));
420 struct ctdb_do_updateip_state {
421 struct ctdb_req_control *c;
422 struct ctdb_iface *old;
423 struct ctdb_vnn *vnn;
427 called when updateip event finishes
429 static void ctdb_do_updateip_callback(struct ctdb_context *ctdb, int status,
432 struct ctdb_do_updateip_state *state =
433 talloc_get_type(private_data, struct ctdb_do_updateip_state);
437 if (status == -ETIME) {
440 DEBUG(DEBUG_ERR,(__location__ " Failed to move IP %s from interface %s to %s\n",
441 ctdb_addr_to_str(&state->vnn->public_address),
443 ctdb_vnn_iface_string(state->vnn)));
446 * All we can do is reset the old interface
447 * and let the next run fix it
449 ctdb_vnn_unassign_iface(ctdb, state->vnn);
450 state->vnn->iface = state->old;
451 state->vnn->iface->references++;
453 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
458 ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
460 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
465 /* the control succeeded */
466 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
472 update (move) an ip address
474 static int32_t ctdb_do_updateip(struct ctdb_context *ctdb,
475 struct ctdb_req_control *c,
476 struct ctdb_vnn *vnn)
479 struct ctdb_do_updateip_state *state;
480 struct ctdb_iface *old = vnn->iface;
482 ctdb_vnn_unassign_iface(ctdb, vnn);
483 ret = ctdb_vnn_assign_iface(ctdb, vnn);
485 DEBUG(DEBUG_ERR,("update of IP %s/%u failed to "
486 "assin a usable interface (old iface '%s')\n",
487 ctdb_addr_to_str(&vnn->public_address),
488 vnn->public_netmask_bits,
493 state = talloc(vnn, struct ctdb_do_updateip_state);
494 CTDB_NO_MEMORY(ctdb, state);
496 state->c = talloc_steal(ctdb, c);
500 DEBUG(DEBUG_NOTICE,("Update of IP %s/%u from "
501 "interface %s to %s\n",
502 ctdb_addr_to_str(&vnn->public_address),
503 vnn->public_netmask_bits,
505 ctdb_vnn_iface_string(vnn)));
507 ret = ctdb_event_script_callback(ctdb,
509 ctdb_do_updateip_callback,
512 CTDB_EVENT_UPDATE_IP,
515 ctdb_vnn_iface_string(vnn),
516 ctdb_addr_to_str(&vnn->public_address),
517 vnn->public_netmask_bits);
519 DEBUG(DEBUG_ERR,(__location__ " Failed update IP %s from interface %s to %s\n",
520 ctdb_addr_to_str(&vnn->public_address),
521 old->name, ctdb_vnn_iface_string(vnn)));
530 Find the vnn of the node that has a public ip address
531 returns -1 if the address is not known as a public address
533 static struct ctdb_vnn *find_public_ip_vnn(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
535 struct ctdb_vnn *vnn;
537 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
538 if (ctdb_same_ip(&vnn->public_address, addr)) {
547 take over an ip address
549 int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb,
550 struct ctdb_req_control *c,
555 struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
556 struct ctdb_vnn *vnn;
557 bool have_ip = false;
558 bool do_updateip = false;
559 bool do_takeip = false;
560 struct ctdb_iface *best_iface = NULL;
562 if (pip->pnn != ctdb->pnn) {
563 DEBUG(DEBUG_ERR,(__location__" takeoverip called for an ip '%s' "
564 "with pnn %d, but we're node %d\n",
565 ctdb_addr_to_str(&pip->addr),
566 pip->pnn, ctdb->pnn));
570 /* update out vnn list */
571 vnn = find_public_ip_vnn(ctdb, &pip->addr);
573 DEBUG(DEBUG_INFO,("takeoverip called for an ip '%s' that is not a public address\n",
574 ctdb_addr_to_str(&pip->addr)));
578 have_ip = ctdb_sys_have_ip(&pip->addr);
579 best_iface = ctdb_vnn_best_iface(ctdb, vnn);
580 if (best_iface == NULL) {
581 DEBUG(DEBUG_ERR,("takeoverip of IP %s/%u failed to find"
582 "a usable interface (old %s, have_ip %d)\n",
583 ctdb_addr_to_str(&vnn->public_address),
584 vnn->public_netmask_bits,
585 ctdb_vnn_iface_string(vnn),
590 if (vnn->iface == NULL && vnn->pnn == -1 && have_ip && best_iface != NULL) {
591 DEBUG(DEBUG_ERR,("Taking over newly created ip\n"));
595 if (vnn->iface == NULL && have_ip) {
596 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
597 "but we have no interface assigned, has someone manually configured it? Ignore for now.\n",
598 ctdb_addr_to_str(&vnn->public_address)));
602 if (vnn->pnn != ctdb->pnn && have_ip && vnn->pnn != -1) {
603 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
604 "and we have it on iface[%s], but it was assigned to node %d"
605 "and we are node %d, banning ourself\n",
606 ctdb_addr_to_str(&vnn->public_address),
607 ctdb_vnn_iface_string(vnn), vnn->pnn, ctdb->pnn));
612 if (vnn->pnn == -1 && have_ip) {
613 vnn->pnn = ctdb->pnn;
614 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
615 "and we already have it on iface[%s], update local daemon\n",
616 ctdb_addr_to_str(&vnn->public_address),
617 ctdb_vnn_iface_string(vnn)));
622 if (vnn->iface->link_up) {
623 /* only move when the rebalance gains something */
624 if (vnn->iface->references > (best_iface->references + 1)) {
627 } else if (vnn->iface != best_iface) {
634 ctdb_vnn_unassign_iface(ctdb, vnn);
641 ret = ctdb_do_takeip(ctdb, c, vnn);
645 } else if (do_updateip) {
646 ret = ctdb_do_updateip(ctdb, c, vnn);
652 * The interface is up and the kernel known the ip
655 DEBUG(DEBUG_INFO,("Redundant takeover of IP %s/%u on interface %s (ip already held)\n",
656 ctdb_addr_to_str(&pip->addr),
657 vnn->public_netmask_bits,
658 ctdb_vnn_iface_string(vnn)));
662 /* tell ctdb_control.c that we will be replying asynchronously */
669 takeover an ip address old v4 style
671 int32_t ctdb_control_takeover_ipv4(struct ctdb_context *ctdb,
672 struct ctdb_req_control *c,
678 data.dsize = sizeof(struct ctdb_public_ip);
679 data.dptr = (uint8_t *)talloc_zero(c, struct ctdb_public_ip);
680 CTDB_NO_MEMORY(ctdb, data.dptr);
682 memcpy(data.dptr, indata.dptr, indata.dsize);
683 return ctdb_control_takeover_ip(ctdb, c, data, async_reply);
687 kill any clients that are registered with a IP that is being released
689 static void release_kill_clients(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
691 struct ctdb_client_ip *ip;
693 DEBUG(DEBUG_INFO,("release_kill_clients for ip %s\n",
694 ctdb_addr_to_str(addr)));
696 for (ip=ctdb->client_ip_list; ip; ip=ip->next) {
697 ctdb_sock_addr tmp_addr;
700 DEBUG(DEBUG_INFO,("checking for client %u with IP %s\n",
702 ctdb_addr_to_str(&ip->addr)));
704 if (ctdb_same_ip(&tmp_addr, addr)) {
705 struct ctdb_client *client = ctdb_reqid_find(ctdb,
708 DEBUG(DEBUG_INFO,("matched client %u with IP %s and pid %u\n",
710 ctdb_addr_to_str(&ip->addr),
713 if (client->pid != 0) {
714 DEBUG(DEBUG_INFO,(__location__ " Killing client pid %u for IP %s on client_id %u\n",
715 (unsigned)client->pid,
716 ctdb_addr_to_str(addr),
718 kill(client->pid, SIGKILL);
725 called when releaseip event finishes
727 static void release_ip_callback(struct ctdb_context *ctdb, int status,
730 struct takeover_callback_state *state =
731 talloc_get_type(private_data, struct takeover_callback_state);
734 if (status == -ETIME) {
738 /* send a message to all clients of this node telling them
739 that the cluster has been reconfigured and they should
740 release any sockets on this IP */
741 data.dptr = (uint8_t *)talloc_strdup(state, ctdb_addr_to_str(state->addr));
742 CTDB_NO_MEMORY_VOID(ctdb, data.dptr);
743 data.dsize = strlen((char *)data.dptr)+1;
745 DEBUG(DEBUG_INFO,(__location__ " sending RELEASE_IP for '%s'\n", data.dptr));
747 ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_RELEASE_IP, data);
749 /* kill clients that have registered with this IP */
750 release_kill_clients(ctdb, state->addr);
752 ctdb_vnn_unassign_iface(ctdb, state->vnn);
754 /* the control succeeded */
755 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
760 release an ip address
762 int32_t ctdb_control_release_ip(struct ctdb_context *ctdb,
763 struct ctdb_req_control *c,
768 struct takeover_callback_state *state;
769 struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
770 struct ctdb_vnn *vnn;
772 /* update our vnn list */
773 vnn = find_public_ip_vnn(ctdb, &pip->addr);
775 DEBUG(DEBUG_INFO,("releaseip called for an ip '%s' that is not a public address\n",
776 ctdb_addr_to_str(&pip->addr)));
781 /* stop any previous arps */
782 talloc_free(vnn->takeover_ctx);
783 vnn->takeover_ctx = NULL;
785 if (!ctdb_sys_have_ip(&pip->addr)) {
786 DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u on interface %s (ip not held)\n",
787 ctdb_addr_to_str(&pip->addr),
788 vnn->public_netmask_bits,
789 ctdb_vnn_iface_string(vnn)));
790 ctdb_vnn_unassign_iface(ctdb, vnn);
794 if (vnn->iface == NULL) {
795 DEBUG(DEBUG_ERR,(__location__ " release_ip of IP %s is known to the kernel, "
796 "but we have no interface assigned, has someone manually configured it? Ignore for now.\n",
797 ctdb_addr_to_str(&vnn->public_address)));
801 DEBUG(DEBUG_NOTICE,("Release of IP %s/%u on interface %s node:%d\n",
802 ctdb_addr_to_str(&pip->addr),
803 vnn->public_netmask_bits,
804 ctdb_vnn_iface_string(vnn),
807 state = talloc(ctdb, struct takeover_callback_state);
808 CTDB_NO_MEMORY(ctdb, state);
810 state->c = talloc_steal(state, c);
811 state->addr = talloc(state, ctdb_sock_addr);
812 CTDB_NO_MEMORY(ctdb, state->addr);
813 *state->addr = pip->addr;
816 ret = ctdb_event_script_callback(ctdb,
817 state, release_ip_callback, state,
819 CTDB_EVENT_RELEASE_IP,
821 ctdb_vnn_iface_string(vnn),
822 ctdb_addr_to_str(&pip->addr),
823 vnn->public_netmask_bits);
825 DEBUG(DEBUG_ERR,(__location__ " Failed to release IP %s on interface %s\n",
826 ctdb_addr_to_str(&pip->addr),
827 ctdb_vnn_iface_string(vnn)));
832 /* tell the control that we will be reply asynchronously */
838 release an ip address old v4 style
840 int32_t ctdb_control_release_ipv4(struct ctdb_context *ctdb,
841 struct ctdb_req_control *c,
847 data.dsize = sizeof(struct ctdb_public_ip);
848 data.dptr = (uint8_t *)talloc_zero(c, struct ctdb_public_ip);
849 CTDB_NO_MEMORY(ctdb, data.dptr);
851 memcpy(data.dptr, indata.dptr, indata.dsize);
852 return ctdb_control_release_ip(ctdb, c, data, async_reply);
856 static int ctdb_add_public_address(struct ctdb_context *ctdb,
857 ctdb_sock_addr *addr,
858 unsigned mask, const char *ifaces)
860 struct ctdb_vnn *vnn;
867 /* Verify that we dont have an entry for this ip yet */
868 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
869 if (ctdb_same_sockaddr(addr, &vnn->public_address)) {
870 DEBUG(DEBUG_CRIT,("Same ip '%s' specified multiple times in the public address list \n",
871 ctdb_addr_to_str(addr)));
876 /* create a new vnn structure for this ip address */
877 vnn = talloc_zero(ctdb, struct ctdb_vnn);
878 CTDB_NO_MEMORY_FATAL(ctdb, vnn);
879 vnn->ifaces = talloc_array(vnn, const char *, num + 2);
880 tmp = talloc_strdup(vnn, ifaces);
881 CTDB_NO_MEMORY_FATAL(ctdb, tmp);
882 for (iface = strtok(tmp, ","); iface; iface = strtok(NULL, ",")) {
883 vnn->ifaces = talloc_realloc(vnn, vnn->ifaces, const char *, num + 2);
884 CTDB_NO_MEMORY_FATAL(ctdb, vnn->ifaces);
885 vnn->ifaces[num] = talloc_strdup(vnn, iface);
886 CTDB_NO_MEMORY_FATAL(ctdb, vnn->ifaces[num]);
890 vnn->ifaces[num] = NULL;
891 vnn->public_address = *addr;
892 vnn->public_netmask_bits = mask;
894 if (ctdb_sys_have_ip(addr)) {
895 DEBUG(DEBUG_ERR,("We are already hosting public address '%s'. setting PNN to ourself:%d\n", ctdb_addr_to_str(addr), ctdb->pnn));
896 vnn->pnn = ctdb->pnn;
899 for (i=0; vnn->ifaces[i]; i++) {
900 ret = ctdb_add_local_iface(ctdb, vnn->ifaces[i]);
902 DEBUG(DEBUG_CRIT, (__location__ " failed to add iface[%s] "
903 "for public_address[%s]\n",
904 vnn->ifaces[i], ctdb_addr_to_str(addr)));
909 vnn->iface = ctdb_find_iface(ctdb, vnn->ifaces[i]);
913 DLIST_ADD(ctdb->vnn, vnn);
919 setup the event script directory
921 int ctdb_set_event_script_dir(struct ctdb_context *ctdb, const char *script_dir)
923 ctdb->event_script_dir = talloc_strdup(ctdb, script_dir);
924 CTDB_NO_MEMORY(ctdb, ctdb->event_script_dir);
929 setup the public address lists from a file
931 int ctdb_set_public_addresses(struct ctdb_context *ctdb, const char *alist)
937 lines = file_lines_load(alist, &nlines, ctdb);
939 ctdb_set_error(ctdb, "Failed to load public address list '%s'\n", alist);
942 while (nlines > 0 && strcmp(lines[nlines-1], "") == 0) {
946 for (i=0;i<nlines;i++) {
954 while ((*line == ' ') || (*line == '\t')) {
960 if (strcmp(line, "") == 0) {
963 tok = strtok(line, " \t");
965 tok = strtok(NULL, " \t");
967 if (NULL == ctdb->default_public_interface) {
968 DEBUG(DEBUG_CRIT,("No default public interface and no interface specified at line %u of public address list\n",
973 ifaces = ctdb->default_public_interface;
978 if (!addrstr || !parse_ip_mask(addrstr, ifaces, &addr, &mask)) {
979 DEBUG(DEBUG_CRIT,("Badly formed line %u in public address list\n", i+1));
983 if (ctdb_add_public_address(ctdb, &addr, mask, ifaces)) {
984 DEBUG(DEBUG_CRIT,("Failed to add line %u to the public address list\n", i+1));
994 int ctdb_set_single_public_ip(struct ctdb_context *ctdb,
998 struct ctdb_vnn *svnn;
999 struct ctdb_iface *cur = NULL;
1003 svnn = talloc_zero(ctdb, struct ctdb_vnn);
1004 CTDB_NO_MEMORY(ctdb, svnn);
1006 svnn->ifaces = talloc_array(svnn, const char *, 2);
1007 CTDB_NO_MEMORY(ctdb, svnn->ifaces);
1008 svnn->ifaces[0] = talloc_strdup(svnn->ifaces, iface);
1009 CTDB_NO_MEMORY(ctdb, svnn->ifaces[0]);
1010 svnn->ifaces[1] = NULL;
1012 ok = parse_ip(ip, iface, 0, &svnn->public_address);
1018 ret = ctdb_add_local_iface(ctdb, svnn->ifaces[0]);
1020 DEBUG(DEBUG_CRIT, (__location__ " failed to add iface[%s] "
1021 "for single_ip[%s]\n",
1023 ctdb_addr_to_str(&svnn->public_address)));
1028 /* assume the single public ip interface is initially "good" */
1029 cur = ctdb_find_iface(ctdb, iface);
1031 DEBUG(DEBUG_CRIT,("Can not find public interface %s used by --single-public-ip", iface));
1034 cur->link_up = true;
1036 ret = ctdb_vnn_assign_iface(ctdb, svnn);
1042 ctdb->single_ip_vnn = svnn;
1046 struct ctdb_public_ip_list {
1047 struct ctdb_public_ip_list *next;
1049 ctdb_sock_addr addr;
1053 /* Given a physical node, return the number of
1054 public addresses that is currently assigned to this node.
1056 static int node_ip_coverage(struct ctdb_context *ctdb,
1058 struct ctdb_public_ip_list *ips)
1062 for (;ips;ips=ips->next) {
1063 if (ips->pnn == pnn) {
1071 /* Check if this is a public ip known to the node, i.e. can that
1072 node takeover this ip ?
1074 static int can_node_serve_ip(struct ctdb_context *ctdb, int32_t pnn,
1075 struct ctdb_public_ip_list *ip)
1077 struct ctdb_all_public_ips *public_ips;
1080 public_ips = ctdb->nodes[pnn]->available_public_ips;
1082 if (public_ips == NULL) {
1086 for (i=0;i<public_ips->num;i++) {
1087 if (ctdb_same_ip(&ip->addr, &public_ips->ips[i].addr)) {
1088 /* yes, this node can serve this public ip */
1097 /* search the node lists list for a node to takeover this ip.
1098 pick the node that currently are serving the least number of ips
1099 so that the ips get spread out evenly.
1101 static int find_takeover_node(struct ctdb_context *ctdb,
1102 struct ctdb_node_map *nodemap, uint32_t mask,
1103 struct ctdb_public_ip_list *ip,
1104 struct ctdb_public_ip_list *all_ips)
1106 int pnn, min=0, num;
1110 for (i=0;i<nodemap->num;i++) {
1111 if (nodemap->nodes[i].flags & mask) {
1112 /* This node is not healty and can not be used to serve
1118 /* verify that this node can serve this ip */
1119 if (can_node_serve_ip(ctdb, i, ip)) {
1120 /* no it couldnt so skip to the next node */
1124 num = node_ip_coverage(ctdb, i, all_ips);
1125 /* was this the first node we checked ? */
1137 DEBUG(DEBUG_WARNING,(__location__ " Could not find node to take over public address '%s'\n",
1138 ctdb_addr_to_str(&ip->addr)));
1148 static uint32_t *ip_key(ctdb_sock_addr *ip)
1150 static uint32_t key[IP_KEYLEN];
1152 bzero(key, sizeof(key));
1154 switch (ip->sa.sa_family) {
1156 key[3] = htonl(ip->ip.sin_addr.s_addr);
1159 key[0] = htonl(ip->ip6.sin6_addr.s6_addr32[0]);
1160 key[1] = htonl(ip->ip6.sin6_addr.s6_addr32[1]);
1161 key[2] = htonl(ip->ip6.sin6_addr.s6_addr32[2]);
1162 key[3] = htonl(ip->ip6.sin6_addr.s6_addr32[3]);
1165 DEBUG(DEBUG_ERR, (__location__ " ERROR, unknown family passed :%u\n", ip->sa.sa_family));
1172 static void *add_ip_callback(void *parm, void *data)
1174 struct ctdb_public_ip_list *this_ip = parm;
1175 struct ctdb_public_ip_list *prev_ip = data;
1177 if (prev_ip == NULL) {
1180 if (this_ip->pnn == -1) {
1181 this_ip->pnn = prev_ip->pnn;
1187 void getips_count_callback(void *param, void *data)
1189 struct ctdb_public_ip_list **ip_list = (struct ctdb_public_ip_list **)param;
1190 struct ctdb_public_ip_list *new_ip = (struct ctdb_public_ip_list *)data;
1192 new_ip->next = *ip_list;
1196 static struct ctdb_public_ip_list *
1197 create_merged_ip_list(struct ctdb_context *ctdb)
1200 struct ctdb_public_ip_list *ip_list;
1201 struct ctdb_all_public_ips *public_ips;
1203 if (ctdb->ip_tree != NULL) {
1204 talloc_free(ctdb->ip_tree);
1205 ctdb->ip_tree = NULL;
1207 ctdb->ip_tree = trbt_create(ctdb, 0);
1209 for (i=0;i<ctdb->num_nodes;i++) {
1210 public_ips = ctdb->nodes[i]->known_public_ips;
1212 if (ctdb->nodes[i]->flags & NODE_FLAGS_DELETED) {
1216 /* there were no public ips for this node */
1217 if (public_ips == NULL) {
1221 for (j=0;j<public_ips->num;j++) {
1222 struct ctdb_public_ip_list *tmp_ip;
1224 tmp_ip = talloc_zero(ctdb->ip_tree, struct ctdb_public_ip_list);
1225 CTDB_NO_MEMORY_NULL(ctdb, tmp_ip);
1226 tmp_ip->pnn = public_ips->ips[j].pnn;
1227 tmp_ip->addr = public_ips->ips[j].addr;
1228 tmp_ip->next = NULL;
1230 trbt_insertarray32_callback(ctdb->ip_tree,
1231 IP_KEYLEN, ip_key(&public_ips->ips[j].addr),
1238 trbt_traversearray32(ctdb->ip_tree, IP_KEYLEN, getips_count_callback, &ip_list);
1244 make any IP alias changes for public addresses that are necessary
1246 int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap)
1248 int i, num_healthy, retries;
1249 struct ctdb_public_ip ip;
1250 struct ctdb_public_ipv4 ipv4;
1251 uint32_t mask, *nodes;
1252 struct ctdb_public_ip_list *all_ips, *tmp_ip;
1253 int maxnode, maxnum=0, minnode, minnum=0, num;
1255 struct timeval timeout;
1256 struct client_async_data *async_data;
1257 struct ctdb_client_control_state *state;
1258 TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
1261 * ip failover is completely disabled, just send out the
1262 * ipreallocated event.
1264 if (ctdb->tunable.disable_ip_failover != 0) {
1270 /* Count how many completely healthy nodes we have */
1272 for (i=0;i<nodemap->num;i++) {
1273 if (!(nodemap->nodes[i].flags & (NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED))) {
1278 if (num_healthy > 0) {
1279 /* We have healthy nodes, so only consider them for
1280 serving public addresses
1282 mask = NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED;
1284 /* We didnt have any completely healthy nodes so
1285 use "disabled" nodes as a fallback
1287 mask = NODE_FLAGS_INACTIVE;
1290 /* since nodes only know about those public addresses that
1291 can be served by that particular node, no single node has
1292 a full list of all public addresses that exist in the cluster.
1293 Walk over all node structures and create a merged list of
1294 all public addresses that exist in the cluster.
1296 keep the tree of ips around as ctdb->ip_tree
1298 all_ips = create_merged_ip_list(ctdb);
1300 /* If we want deterministic ip allocations, i.e. that the ip addresses
1301 will always be allocated the same way for a specific set of
1302 available/unavailable nodes.
1304 if (1 == ctdb->tunable.deterministic_public_ips) {
1305 DEBUG(DEBUG_NOTICE,("Deterministic IPs enabled. Resetting all ip allocations\n"));
1306 for (i=0,tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next,i++) {
1307 tmp_ip->pnn = i%nodemap->num;
1312 /* mark all public addresses with a masked node as being served by
1315 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1316 if (tmp_ip->pnn == -1) {
1319 if (nodemap->nodes[tmp_ip->pnn].flags & mask) {
1324 /* verify that the assigned nodes can serve that public ip
1325 and set it to -1 if not
1327 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1328 if (tmp_ip->pnn == -1) {
1331 if (can_node_serve_ip(ctdb, tmp_ip->pnn, tmp_ip) != 0) {
1332 /* this node can not serve this ip. */
1338 /* now we must redistribute all public addresses with takeover node
1339 -1 among the nodes available
1343 /* loop over all ip's and find a physical node to cover for
1346 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1347 if (tmp_ip->pnn == -1) {
1348 if (find_takeover_node(ctdb, nodemap, mask, tmp_ip, all_ips)) {
1349 DEBUG(DEBUG_WARNING,("Failed to find node to cover ip %s\n",
1350 ctdb_addr_to_str(&tmp_ip->addr)));
1355 /* If we dont want ips to fail back after a node becomes healthy
1356 again, we wont even try to reallocat the ip addresses so that
1357 they are evenly spread out.
1358 This can NOT be used at the same time as DeterministicIPs !
1360 if (1 == ctdb->tunable.no_ip_failback) {
1361 if (1 == ctdb->tunable.deterministic_public_ips) {
1362 DEBUG(DEBUG_ERR, ("ERROR: You can not use 'DeterministicIPs' and 'NoIPFailback' at the same time\n"));
1368 /* now, try to make sure the ip adresses are evenly distributed
1370 for each ip address, loop over all nodes that can serve this
1371 ip and make sure that the difference between the node
1372 serving the most and the node serving the least ip's are not greater
1375 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1376 if (tmp_ip->pnn == -1) {
1380 /* Get the highest and lowest number of ips's served by any
1381 valid node which can serve this ip.
1385 for (i=0;i<nodemap->num;i++) {
1386 if (nodemap->nodes[i].flags & mask) {
1390 /* only check nodes that can actually serve this ip */
1391 if (can_node_serve_ip(ctdb, i, tmp_ip)) {
1392 /* no it couldnt so skip to the next node */
1396 num = node_ip_coverage(ctdb, i, all_ips);
1397 if (maxnode == -1) {
1406 if (minnode == -1) {
1416 if (maxnode == -1) {
1417 DEBUG(DEBUG_WARNING,(__location__ " Could not find maxnode. May not be able to serve ip '%s'\n",
1418 ctdb_addr_to_str(&tmp_ip->addr)));
1423 /* If we want deterministic IPs then dont try to reallocate
1424 them to spread out the load.
1426 if (1 == ctdb->tunable.deterministic_public_ips) {
1430 /* if the spread between the smallest and largest coverage by
1431 a node is >=2 we steal one of the ips from the node with
1432 most coverage to even things out a bit.
1433 try to do this at most 5 times since we dont want to spend
1434 too much time balancing the ip coverage.
1436 if ( (maxnum > minnum+1)
1438 struct ctdb_public_ip_list *tmp;
1440 /* mark one of maxnode's vnn's as unassigned and try
1443 for (tmp=all_ips;tmp;tmp=tmp->next) {
1444 if (tmp->pnn == maxnode) {
1454 /* finished distributing the public addresses, now just send the
1455 info out to the nodes
1459 /* at this point ->pnn is the node which will own each IP
1460 or -1 if there is no node that can cover this ip
1463 /* now tell all nodes to delete any alias that they should not
1464 have. This will be a NOOP on nodes that don't currently
1465 hold the given alias */
1466 async_data = talloc_zero(tmp_ctx, struct client_async_data);
1467 CTDB_NO_MEMORY_FATAL(ctdb, async_data);
1469 for (i=0;i<nodemap->num;i++) {
1470 /* don't talk to unconnected nodes, but do talk to banned nodes */
1471 if (nodemap->nodes[i].flags & NODE_FLAGS_DISCONNECTED) {
1475 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1476 if (tmp_ip->pnn == nodemap->nodes[i].pnn) {
1477 /* This node should be serving this
1478 vnn so dont tell it to release the ip
1482 if (tmp_ip->addr.sa.sa_family == AF_INET) {
1483 ipv4.pnn = tmp_ip->pnn;
1484 ipv4.sin = tmp_ip->addr.ip;
1486 timeout = TAKEOVER_TIMEOUT();
1487 data.dsize = sizeof(ipv4);
1488 data.dptr = (uint8_t *)&ipv4;
1489 state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
1490 0, CTDB_CONTROL_RELEASE_IPv4, 0,
1494 ip.pnn = tmp_ip->pnn;
1495 ip.addr = tmp_ip->addr;
1497 timeout = TAKEOVER_TIMEOUT();
1498 data.dsize = sizeof(ip);
1499 data.dptr = (uint8_t *)&ip;
1500 state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
1501 0, CTDB_CONTROL_RELEASE_IP, 0,
1506 if (state == NULL) {
1507 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_RELEASE_IP to node %u\n", nodemap->nodes[i].pnn));
1508 talloc_free(tmp_ctx);
1512 ctdb_client_async_add(async_data, state);
1515 if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1516 DEBUG(DEBUG_ERR,(__location__ " Async control CTDB_CONTROL_RELEASE_IP failed\n"));
1517 talloc_free(tmp_ctx);
1520 talloc_free(async_data);
1523 /* tell all nodes to get their own IPs */
1524 async_data = talloc_zero(tmp_ctx, struct client_async_data);
1525 CTDB_NO_MEMORY_FATAL(ctdb, async_data);
1526 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1527 if (tmp_ip->pnn == -1) {
1528 /* this IP won't be taken over */
1532 if (tmp_ip->addr.sa.sa_family == AF_INET) {
1533 ipv4.pnn = tmp_ip->pnn;
1534 ipv4.sin = tmp_ip->addr.ip;
1536 timeout = TAKEOVER_TIMEOUT();
1537 data.dsize = sizeof(ipv4);
1538 data.dptr = (uint8_t *)&ipv4;
1539 state = ctdb_control_send(ctdb, tmp_ip->pnn,
1540 0, CTDB_CONTROL_TAKEOVER_IPv4, 0,
1544 ip.pnn = tmp_ip->pnn;
1545 ip.addr = tmp_ip->addr;
1547 timeout = TAKEOVER_TIMEOUT();
1548 data.dsize = sizeof(ip);
1549 data.dptr = (uint8_t *)&ip;
1550 state = ctdb_control_send(ctdb, tmp_ip->pnn,
1551 0, CTDB_CONTROL_TAKEOVER_IP, 0,
1555 if (state == NULL) {
1556 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_TAKEOVER_IP to node %u\n", tmp_ip->pnn));
1557 talloc_free(tmp_ctx);
1561 ctdb_client_async_add(async_data, state);
1563 if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1564 DEBUG(DEBUG_ERR,(__location__ " Async control CTDB_CONTROL_TAKEOVER_IP failed\n"));
1565 talloc_free(tmp_ctx);
1570 /* tell all nodes to update natwg */
1571 /* send the flags update natgw on all connected nodes */
1572 data.dptr = discard_const("ipreallocated");
1573 data.dsize = strlen((char *)data.dptr) + 1;
1574 nodes = list_of_connected_nodes(ctdb, nodemap, tmp_ctx, true);
1575 if (ctdb_client_async_control(ctdb, CTDB_CONTROL_RUN_EVENTSCRIPTS,
1576 nodes, 0, TAKEOVER_TIMEOUT(),
1580 DEBUG(DEBUG_ERR, (__location__ " ctdb_control to updatenatgw failed\n"));
1583 talloc_free(tmp_ctx);
1589 destroy a ctdb_client_ip structure
1591 static int ctdb_client_ip_destructor(struct ctdb_client_ip *ip)
1593 DEBUG(DEBUG_DEBUG,("destroying client tcp for %s:%u (client_id %u)\n",
1594 ctdb_addr_to_str(&ip->addr),
1595 ntohs(ip->addr.ip.sin_port),
1598 DLIST_REMOVE(ip->ctdb->client_ip_list, ip);
1603 called by a client to inform us of a TCP connection that it is managing
1604 that should tickled with an ACK when IP takeover is done
1605 we handle both the old ipv4 style of packets as well as the new ipv4/6
1608 int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id,
1611 struct ctdb_client *client = ctdb_reqid_find(ctdb, client_id, struct ctdb_client);
1612 struct ctdb_control_tcp *old_addr = NULL;
1613 struct ctdb_control_tcp_addr new_addr;
1614 struct ctdb_control_tcp_addr *tcp_sock = NULL;
1615 struct ctdb_tcp_list *tcp;
1616 struct ctdb_tcp_connection t;
1619 struct ctdb_client_ip *ip;
1620 struct ctdb_vnn *vnn;
1621 ctdb_sock_addr addr;
1623 switch (indata.dsize) {
1624 case sizeof(struct ctdb_control_tcp):
1625 old_addr = (struct ctdb_control_tcp *)indata.dptr;
1626 ZERO_STRUCT(new_addr);
1627 tcp_sock = &new_addr;
1628 tcp_sock->src.ip = old_addr->src;
1629 tcp_sock->dest.ip = old_addr->dest;
1631 case sizeof(struct ctdb_control_tcp_addr):
1632 tcp_sock = (struct ctdb_control_tcp_addr *)indata.dptr;
1635 DEBUG(DEBUG_ERR,(__location__ " Invalid data structure passed "
1636 "to ctdb_control_tcp_client. size was %d but "
1637 "only allowed sizes are %lu and %lu\n",
1639 (long unsigned)sizeof(struct ctdb_control_tcp),
1640 (long unsigned)sizeof(struct ctdb_control_tcp_addr)));
1644 addr = tcp_sock->src;
1645 ctdb_canonicalize_ip(&addr, &tcp_sock->src);
1646 addr = tcp_sock->dest;
1647 ctdb_canonicalize_ip(&addr, &tcp_sock->dest);
1650 memcpy(&addr, &tcp_sock->dest, sizeof(addr));
1651 vnn = find_public_ip_vnn(ctdb, &addr);
1653 switch (addr.sa.sa_family) {
1655 if (ntohl(addr.ip.sin_addr.s_addr) != INADDR_LOOPBACK) {
1656 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public address.\n",
1657 ctdb_addr_to_str(&addr)));
1661 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public ipv6 address.\n",
1662 ctdb_addr_to_str(&addr)));
1665 DEBUG(DEBUG_ERR,(__location__ " Unknown family type %d\n", addr.sa.sa_family));
1671 if (vnn->pnn != ctdb->pnn) {
1672 DEBUG(DEBUG_ERR,("Attempt to register tcp client for IP %s we don't hold - failing (client_id %u pid %u)\n",
1673 ctdb_addr_to_str(&addr),
1674 client_id, client->pid));
1675 /* failing this call will tell smbd to die */
1679 ip = talloc(client, struct ctdb_client_ip);
1680 CTDB_NO_MEMORY(ctdb, ip);
1684 ip->client_id = client_id;
1685 talloc_set_destructor(ip, ctdb_client_ip_destructor);
1686 DLIST_ADD(ctdb->client_ip_list, ip);
1688 tcp = talloc(client, struct ctdb_tcp_list);
1689 CTDB_NO_MEMORY(ctdb, tcp);
1691 tcp->connection.src_addr = tcp_sock->src;
1692 tcp->connection.dst_addr = tcp_sock->dest;
1694 DLIST_ADD(client->tcp_list, tcp);
1696 t.src_addr = tcp_sock->src;
1697 t.dst_addr = tcp_sock->dest;
1699 data.dptr = (uint8_t *)&t;
1700 data.dsize = sizeof(t);
1702 switch (addr.sa.sa_family) {
1704 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1705 (unsigned)ntohs(tcp_sock->dest.ip.sin_port),
1706 ctdb_addr_to_str(&tcp_sock->src),
1707 (unsigned)ntohs(tcp_sock->src.ip.sin_port), client_id, client->pid));
1710 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1711 (unsigned)ntohs(tcp_sock->dest.ip6.sin6_port),
1712 ctdb_addr_to_str(&tcp_sock->src),
1713 (unsigned)ntohs(tcp_sock->src.ip6.sin6_port), client_id, client->pid));
1716 DEBUG(DEBUG_ERR,(__location__ " Unknown family %d\n", addr.sa.sa_family));
1720 /* tell all nodes about this tcp connection */
1721 ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0,
1722 CTDB_CONTROL_TCP_ADD,
1723 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
1725 DEBUG(DEBUG_ERR,(__location__ " Failed to send CTDB_CONTROL_TCP_ADD\n"));
1733 find a tcp address on a list
1735 static struct ctdb_tcp_connection *ctdb_tcp_find(struct ctdb_tcp_array *array,
1736 struct ctdb_tcp_connection *tcp)
1740 if (array == NULL) {
1744 for (i=0;i<array->num;i++) {
1745 if (ctdb_same_sockaddr(&array->connections[i].src_addr, &tcp->src_addr) &&
1746 ctdb_same_sockaddr(&array->connections[i].dst_addr, &tcp->dst_addr)) {
1747 return &array->connections[i];
1756 called by a daemon to inform us of a TCP connection that one of its
1757 clients managing that should tickled with an ACK when IP takeover is
1760 int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata, bool tcp_update_needed)
1762 struct ctdb_tcp_connection *p = (struct ctdb_tcp_connection *)indata.dptr;
1763 struct ctdb_tcp_array *tcparray;
1764 struct ctdb_tcp_connection tcp;
1765 struct ctdb_vnn *vnn;
1767 vnn = find_public_ip_vnn(ctdb, &p->dst_addr);
1769 DEBUG(DEBUG_INFO,(__location__ " got TCP_ADD control for an address which is not a public address '%s'\n",
1770 ctdb_addr_to_str(&p->dst_addr)));
1776 tcparray = vnn->tcp_array;
1778 /* If this is the first tickle */
1779 if (tcparray == NULL) {
1780 tcparray = talloc_size(ctdb->nodes,
1781 offsetof(struct ctdb_tcp_array, connections) +
1782 sizeof(struct ctdb_tcp_connection) * 1);
1783 CTDB_NO_MEMORY(ctdb, tcparray);
1784 vnn->tcp_array = tcparray;
1787 tcparray->connections = talloc_size(tcparray, sizeof(struct ctdb_tcp_connection));
1788 CTDB_NO_MEMORY(ctdb, tcparray->connections);
1790 tcparray->connections[tcparray->num].src_addr = p->src_addr;
1791 tcparray->connections[tcparray->num].dst_addr = p->dst_addr;
1794 if (tcp_update_needed) {
1795 vnn->tcp_update_needed = true;
1801 /* Do we already have this tickle ?*/
1802 tcp.src_addr = p->src_addr;
1803 tcp.dst_addr = p->dst_addr;
1804 if (ctdb_tcp_find(vnn->tcp_array, &tcp) != NULL) {
1805 DEBUG(DEBUG_DEBUG,("Already had tickle info for %s:%u for vnn:%u\n",
1806 ctdb_addr_to_str(&tcp.dst_addr),
1807 ntohs(tcp.dst_addr.ip.sin_port),
1812 /* A new tickle, we must add it to the array */
1813 tcparray->connections = talloc_realloc(tcparray, tcparray->connections,
1814 struct ctdb_tcp_connection,
1816 CTDB_NO_MEMORY(ctdb, tcparray->connections);
1818 vnn->tcp_array = tcparray;
1819 tcparray->connections[tcparray->num].src_addr = p->src_addr;
1820 tcparray->connections[tcparray->num].dst_addr = p->dst_addr;
1823 DEBUG(DEBUG_INFO,("Added tickle info for %s:%u from vnn %u\n",
1824 ctdb_addr_to_str(&tcp.dst_addr),
1825 ntohs(tcp.dst_addr.ip.sin_port),
1828 if (tcp_update_needed) {
1829 vnn->tcp_update_needed = true;
1837 called by a daemon to inform us of a TCP connection that one of its
1838 clients managing that should tickled with an ACK when IP takeover is
1841 static void ctdb_remove_tcp_connection(struct ctdb_context *ctdb, struct ctdb_tcp_connection *conn)
1843 struct ctdb_tcp_connection *tcpp;
1844 struct ctdb_vnn *vnn = find_public_ip_vnn(ctdb, &conn->dst_addr);
1847 DEBUG(DEBUG_ERR,(__location__ " unable to find public address %s\n",
1848 ctdb_addr_to_str(&conn->dst_addr)));
1852 /* if the array is empty we cant remove it
1853 and we dont need to do anything
1855 if (vnn->tcp_array == NULL) {
1856 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist (array is empty) %s:%u\n",
1857 ctdb_addr_to_str(&conn->dst_addr),
1858 ntohs(conn->dst_addr.ip.sin_port)));
1863 /* See if we know this connection
1864 if we dont know this connection then we dont need to do anything
1866 tcpp = ctdb_tcp_find(vnn->tcp_array, conn);
1868 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist %s:%u\n",
1869 ctdb_addr_to_str(&conn->dst_addr),
1870 ntohs(conn->dst_addr.ip.sin_port)));
1875 /* We need to remove this entry from the array.
1876 Instead of allocating a new array and copying data to it
1877 we cheat and just copy the last entry in the existing array
1878 to the entry that is to be removed and just shring the
1881 *tcpp = vnn->tcp_array->connections[vnn->tcp_array->num - 1];
1882 vnn->tcp_array->num--;
1884 /* If we deleted the last entry we also need to remove the entire array
1886 if (vnn->tcp_array->num == 0) {
1887 talloc_free(vnn->tcp_array);
1888 vnn->tcp_array = NULL;
1891 vnn->tcp_update_needed = true;
1893 DEBUG(DEBUG_INFO,("Removed tickle info for %s:%u\n",
1894 ctdb_addr_to_str(&conn->src_addr),
1895 ntohs(conn->src_addr.ip.sin_port)));
1900 called by a daemon to inform us of a TCP connection that one of its
1901 clients used are no longer needed in the tickle database
1903 int32_t ctdb_control_tcp_remove(struct ctdb_context *ctdb, TDB_DATA indata)
1905 struct ctdb_tcp_connection *conn = (struct ctdb_tcp_connection *)indata.dptr;
1907 ctdb_remove_tcp_connection(ctdb, conn);
1914 called when a daemon restarts - send all tickes for all public addresses
1915 we are serving immediately to the new node.
1917 int32_t ctdb_control_startup(struct ctdb_context *ctdb, uint32_t vnn)
1919 /*XXX here we should send all tickes we are serving to the new node */
1925 called when a client structure goes away - hook to remove
1926 elements from the tcp_list in all daemons
1928 void ctdb_takeover_client_destructor_hook(struct ctdb_client *client)
1930 while (client->tcp_list) {
1931 struct ctdb_tcp_list *tcp = client->tcp_list;
1932 DLIST_REMOVE(client->tcp_list, tcp);
1933 ctdb_remove_tcp_connection(client->ctdb, &tcp->connection);
1939 release all IPs on shutdown
1941 void ctdb_release_all_ips(struct ctdb_context *ctdb)
1943 struct ctdb_vnn *vnn;
1945 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1946 if (!ctdb_sys_have_ip(&vnn->public_address)) {
1947 ctdb_vnn_unassign_iface(ctdb, vnn);
1953 ctdb_event_script_args(ctdb, CTDB_EVENT_RELEASE_IP, "%s %s %u",
1954 ctdb_vnn_iface_string(vnn),
1955 ctdb_addr_to_str(&vnn->public_address),
1956 vnn->public_netmask_bits);
1957 release_kill_clients(ctdb, &vnn->public_address);
1958 ctdb_vnn_unassign_iface(ctdb, vnn);
1964 get list of public IPs
1966 int32_t ctdb_control_get_public_ips(struct ctdb_context *ctdb,
1967 struct ctdb_req_control *c, TDB_DATA *outdata)
1970 struct ctdb_all_public_ips *ips;
1971 struct ctdb_vnn *vnn;
1972 bool only_available = false;
1974 if (c->flags & CTDB_PUBLIC_IP_FLAGS_ONLY_AVAILABLE) {
1975 only_available = true;
1978 /* count how many public ip structures we have */
1980 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1984 len = offsetof(struct ctdb_all_public_ips, ips) +
1985 num*sizeof(struct ctdb_public_ip);
1986 ips = talloc_zero_size(outdata, len);
1987 CTDB_NO_MEMORY(ctdb, ips);
1990 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1991 if (only_available && !ctdb_vnn_available(ctdb, vnn)) {
1994 ips->ips[i].pnn = vnn->pnn;
1995 ips->ips[i].addr = vnn->public_address;
1999 len = offsetof(struct ctdb_all_public_ips, ips) +
2000 i*sizeof(struct ctdb_public_ip);
2002 outdata->dsize = len;
2003 outdata->dptr = (uint8_t *)ips;
2010 get list of public IPs, old ipv4 style. only returns ipv4 addresses
2012 int32_t ctdb_control_get_public_ipsv4(struct ctdb_context *ctdb,
2013 struct ctdb_req_control *c, TDB_DATA *outdata)
2016 struct ctdb_all_public_ipsv4 *ips;
2017 struct ctdb_vnn *vnn;
2019 /* count how many public ip structures we have */
2021 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2022 if (vnn->public_address.sa.sa_family != AF_INET) {
2028 len = offsetof(struct ctdb_all_public_ipsv4, ips) +
2029 num*sizeof(struct ctdb_public_ipv4);
2030 ips = talloc_zero_size(outdata, len);
2031 CTDB_NO_MEMORY(ctdb, ips);
2033 outdata->dsize = len;
2034 outdata->dptr = (uint8_t *)ips;
2038 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2039 if (vnn->public_address.sa.sa_family != AF_INET) {
2042 ips->ips[i].pnn = vnn->pnn;
2043 ips->ips[i].sin = vnn->public_address.ip;
2050 int32_t ctdb_control_get_public_ip_info(struct ctdb_context *ctdb,
2051 struct ctdb_req_control *c,
2056 ctdb_sock_addr *addr;
2057 struct ctdb_control_public_ip_info *info;
2058 struct ctdb_vnn *vnn;
2060 addr = (ctdb_sock_addr *)indata.dptr;
2062 vnn = find_public_ip_vnn(ctdb, addr);
2064 /* if it is not a public ip it could be our 'single ip' */
2065 if (ctdb->single_ip_vnn) {
2066 if (ctdb_same_ip(&ctdb->single_ip_vnn->public_address, addr)) {
2067 vnn = ctdb->single_ip_vnn;
2072 DEBUG(DEBUG_ERR,(__location__ " Could not get public ip info, "
2073 "'%s'not a public address\n",
2074 ctdb_addr_to_str(addr)));
2078 /* count how many public ip structures we have */
2080 for (;vnn->ifaces[num];) {
2084 len = offsetof(struct ctdb_control_public_ip_info, ifaces) +
2085 num*sizeof(struct ctdb_control_iface_info);
2086 info = talloc_zero_size(outdata, len);
2087 CTDB_NO_MEMORY(ctdb, info);
2089 info->ip.addr = vnn->public_address;
2090 info->ip.pnn = vnn->pnn;
2091 info->active_idx = 0xFFFFFFFF;
2093 for (i=0; vnn->ifaces[i]; i++) {
2094 struct ctdb_iface *cur;
2096 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
2098 DEBUG(DEBUG_CRIT, (__location__ " internal error iface[%s] unknown\n",
2102 if (vnn->iface == cur) {
2103 info->active_idx = i;
2105 strcpy(info->ifaces[i].name, cur->name);
2106 info->ifaces[i].link_state = cur->link_up;
2107 info->ifaces[i].references = cur->references;
2110 len = offsetof(struct ctdb_control_public_ip_info, ifaces) +
2111 i*sizeof(struct ctdb_control_iface_info);
2113 outdata->dsize = len;
2114 outdata->dptr = (uint8_t *)info;
2119 int32_t ctdb_control_get_ifaces(struct ctdb_context *ctdb,
2120 struct ctdb_req_control *c,
2124 struct ctdb_control_get_ifaces *ifaces;
2125 struct ctdb_iface *cur;
2127 /* count how many public ip structures we have */
2129 for (cur=ctdb->ifaces;cur;cur=cur->next) {
2133 len = offsetof(struct ctdb_control_get_ifaces, ifaces) +
2134 num*sizeof(struct ctdb_control_iface_info);
2135 ifaces = talloc_zero_size(outdata, len);
2136 CTDB_NO_MEMORY(ctdb, ifaces);
2139 for (cur=ctdb->ifaces;cur;cur=cur->next) {
2140 strcpy(ifaces->ifaces[i].name, cur->name);
2141 ifaces->ifaces[i].link_state = cur->link_up;
2142 ifaces->ifaces[i].references = cur->references;
2146 len = offsetof(struct ctdb_control_get_ifaces, ifaces) +
2147 i*sizeof(struct ctdb_control_iface_info);
2149 outdata->dsize = len;
2150 outdata->dptr = (uint8_t *)ifaces;
2155 int32_t ctdb_control_set_iface_link(struct ctdb_context *ctdb,
2156 struct ctdb_req_control *c,
2159 struct ctdb_control_iface_info *info;
2160 struct ctdb_iface *iface;
2161 bool link_up = false;
2163 info = (struct ctdb_control_iface_info *)indata.dptr;
2165 if (info->name[CTDB_IFACE_SIZE] != '\0') {
2166 int len = strnlen(info->name, CTDB_IFACE_SIZE);
2167 DEBUG(DEBUG_ERR, (__location__ " name[%*.*s] not terminated\n",
2168 len, len, info->name));
2172 switch (info->link_state) {
2180 DEBUG(DEBUG_ERR, (__location__ " link_state[%u] invalid\n",
2181 (unsigned int)info->link_state));
2185 if (info->references != 0) {
2186 DEBUG(DEBUG_ERR, (__location__ " references[%u] should be 0\n",
2187 (unsigned int)info->references));
2191 iface = ctdb_find_iface(ctdb, info->name);
2192 if (iface == NULL) {
2193 DEBUG(DEBUG_ERR, (__location__ "iface[%s] is unknown\n",
2198 if (link_up == iface->link_up) {
2202 DEBUG(iface->link_up?DEBUG_ERR:DEBUG_NOTICE,
2203 ("iface[%s] has changed it's link status %s => %s\n",
2205 iface->link_up?"up":"down",
2206 link_up?"up":"down"));
2208 iface->link_up = link_up;
2214 structure containing the listening socket and the list of tcp connections
2215 that the ctdb daemon is to kill
2217 struct ctdb_kill_tcp {
2218 struct ctdb_vnn *vnn;
2219 struct ctdb_context *ctdb;
2221 struct fd_event *fde;
2222 trbt_tree_t *connections;
2227 a tcp connection that is to be killed
2229 struct ctdb_killtcp_con {
2230 ctdb_sock_addr src_addr;
2231 ctdb_sock_addr dst_addr;
2233 struct ctdb_kill_tcp *killtcp;
2236 /* this function is used to create a key to represent this socketpair
2237 in the killtcp tree.
2238 this key is used to insert and lookup matching socketpairs that are
2239 to be tickled and RST
2241 #define KILLTCP_KEYLEN 10
2242 static uint32_t *killtcp_key(ctdb_sock_addr *src, ctdb_sock_addr *dst)
2244 static uint32_t key[KILLTCP_KEYLEN];
2246 bzero(key, sizeof(key));
2248 if (src->sa.sa_family != dst->sa.sa_family) {
2249 DEBUG(DEBUG_ERR, (__location__ " ERROR, different families passed :%u vs %u\n", src->sa.sa_family, dst->sa.sa_family));
2253 switch (src->sa.sa_family) {
2255 key[0] = dst->ip.sin_addr.s_addr;
2256 key[1] = src->ip.sin_addr.s_addr;
2257 key[2] = dst->ip.sin_port;
2258 key[3] = src->ip.sin_port;
2261 key[0] = dst->ip6.sin6_addr.s6_addr32[3];
2262 key[1] = src->ip6.sin6_addr.s6_addr32[3];
2263 key[2] = dst->ip6.sin6_addr.s6_addr32[2];
2264 key[3] = src->ip6.sin6_addr.s6_addr32[2];
2265 key[4] = dst->ip6.sin6_addr.s6_addr32[1];
2266 key[5] = src->ip6.sin6_addr.s6_addr32[1];
2267 key[6] = dst->ip6.sin6_addr.s6_addr32[0];
2268 key[7] = src->ip6.sin6_addr.s6_addr32[0];
2269 key[8] = dst->ip6.sin6_port;
2270 key[9] = src->ip6.sin6_port;
2273 DEBUG(DEBUG_ERR, (__location__ " ERROR, unknown family passed :%u\n", src->sa.sa_family));
2281 called when we get a read event on the raw socket
2283 static void capture_tcp_handler(struct event_context *ev, struct fd_event *fde,
2284 uint16_t flags, void *private_data)
2286 struct ctdb_kill_tcp *killtcp = talloc_get_type(private_data, struct ctdb_kill_tcp);
2287 struct ctdb_killtcp_con *con;
2288 ctdb_sock_addr src, dst;
2289 uint32_t ack_seq, seq;
2291 if (!(flags & EVENT_FD_READ)) {
2295 if (ctdb_sys_read_tcp_packet(killtcp->capture_fd,
2296 killtcp->private_data,
2298 &ack_seq, &seq) != 0) {
2299 /* probably a non-tcp ACK packet */
2303 /* check if we have this guy in our list of connections
2306 con = trbt_lookuparray32(killtcp->connections,
2307 KILLTCP_KEYLEN, killtcp_key(&src, &dst));
2309 /* no this was some other packet we can just ignore */
2313 /* This one has been tickled !
2314 now reset him and remove him from the list.
2316 DEBUG(DEBUG_INFO, ("sending a tcp reset to kill connection :%d -> %s:%d\n",
2317 ntohs(con->dst_addr.ip.sin_port),
2318 ctdb_addr_to_str(&con->src_addr),
2319 ntohs(con->src_addr.ip.sin_port)));
2321 ctdb_sys_send_tcp(&con->dst_addr, &con->src_addr, ack_seq, seq, 1);
2326 /* when traversing the list of all tcp connections to send tickle acks to
2327 (so that we can capture the ack coming back and kill the connection
2329 this callback is called for each connection we are currently trying to kill
2331 static void tickle_connection_traverse(void *param, void *data)
2333 struct ctdb_killtcp_con *con = talloc_get_type(data, struct ctdb_killtcp_con);
2335 /* have tried too many times, just give up */
2336 if (con->count >= 5) {
2337 /* can't delete in traverse: reparent to delete_cons */
2338 talloc_steal(param, con);
2342 /* othervise, try tickling it again */
2345 (ctdb_sock_addr *)&con->dst_addr,
2346 (ctdb_sock_addr *)&con->src_addr,
2352 called every second until all sentenced connections have been reset
2354 static void ctdb_tickle_sentenced_connections(struct event_context *ev, struct timed_event *te,
2355 struct timeval t, void *private_data)
2357 struct ctdb_kill_tcp *killtcp = talloc_get_type(private_data, struct ctdb_kill_tcp);
2358 void *delete_cons = talloc_new(NULL);
2360 /* loop over all connections sending tickle ACKs */
2361 trbt_traversearray32(killtcp->connections, KILLTCP_KEYLEN, tickle_connection_traverse, delete_cons);
2363 /* now we've finished traverse, it's safe to do deletion. */
2364 talloc_free(delete_cons);
2366 /* If there are no more connections to kill we can remove the
2367 entire killtcp structure
2369 if ( (killtcp->connections == NULL) ||
2370 (killtcp->connections->root == NULL) ) {
2371 talloc_free(killtcp);
2375 /* try tickling them again in a seconds time
2377 event_add_timed(killtcp->ctdb->ev, killtcp, timeval_current_ofs(1, 0),
2378 ctdb_tickle_sentenced_connections, killtcp);
2382 destroy the killtcp structure
2384 static int ctdb_killtcp_destructor(struct ctdb_kill_tcp *killtcp)
2387 killtcp->vnn->killtcp = NULL;
2393 /* nothing fancy here, just unconditionally replace any existing
2394 connection structure with the new one.
2396 dont even free the old one if it did exist, that one is talloc_stolen
2397 by the same node in the tree anyway and will be deleted when the new data
2400 static void *add_killtcp_callback(void *parm, void *data)
2406 add a tcp socket to the list of connections we want to RST
2408 static int ctdb_killtcp_add_connection(struct ctdb_context *ctdb,
2412 ctdb_sock_addr src, dst;
2413 struct ctdb_kill_tcp *killtcp;
2414 struct ctdb_killtcp_con *con;
2415 struct ctdb_vnn *vnn;
2417 ctdb_canonicalize_ip(s, &src);
2418 ctdb_canonicalize_ip(d, &dst);
2420 vnn = find_public_ip_vnn(ctdb, &dst);
2422 vnn = find_public_ip_vnn(ctdb, &src);
2425 /* if it is not a public ip it could be our 'single ip' */
2426 if (ctdb->single_ip_vnn) {
2427 if (ctdb_same_ip(&ctdb->single_ip_vnn->public_address, &dst)) {
2428 vnn = ctdb->single_ip_vnn;
2433 DEBUG(DEBUG_ERR,(__location__ " Could not killtcp, not a public address\n"));
2437 killtcp = vnn->killtcp;
2439 /* If this is the first connection to kill we must allocate
2442 if (killtcp == NULL) {
2443 killtcp = talloc_zero(ctdb, struct ctdb_kill_tcp);
2444 CTDB_NO_MEMORY(ctdb, killtcp);
2447 killtcp->ctdb = ctdb;
2448 killtcp->capture_fd = -1;
2449 killtcp->connections = trbt_create(killtcp, 0);
2451 vnn->killtcp = killtcp;
2452 talloc_set_destructor(killtcp, ctdb_killtcp_destructor);
2457 /* create a structure that describes this connection we want to
2458 RST and store it in killtcp->connections
2460 con = talloc(killtcp, struct ctdb_killtcp_con);
2461 CTDB_NO_MEMORY(ctdb, con);
2462 con->src_addr = src;
2463 con->dst_addr = dst;
2465 con->killtcp = killtcp;
2468 trbt_insertarray32_callback(killtcp->connections,
2469 KILLTCP_KEYLEN, killtcp_key(&con->dst_addr, &con->src_addr),
2470 add_killtcp_callback, con);
2473 If we dont have a socket to listen on yet we must create it
2475 if (killtcp->capture_fd == -1) {
2476 const char *iface = ctdb_vnn_iface_string(vnn);
2477 killtcp->capture_fd = ctdb_sys_open_capture_socket(iface, &killtcp->private_data);
2478 if (killtcp->capture_fd == -1) {
2479 DEBUG(DEBUG_CRIT,(__location__ " Failed to open capturing "
2480 "socket on iface '%s' for killtcp (%s)\n",
2481 iface, strerror(errno)));
2487 if (killtcp->fde == NULL) {
2488 killtcp->fde = event_add_fd(ctdb->ev, killtcp, killtcp->capture_fd,
2490 capture_tcp_handler, killtcp);
2491 tevent_fd_set_auto_close(killtcp->fde);
2493 /* We also need to set up some events to tickle all these connections
2494 until they are all reset
2496 event_add_timed(ctdb->ev, killtcp, timeval_current_ofs(1, 0),
2497 ctdb_tickle_sentenced_connections, killtcp);
2500 /* tickle him once now */
2509 talloc_free(vnn->killtcp);
2510 vnn->killtcp = NULL;
2515 kill a TCP connection.
2517 int32_t ctdb_control_kill_tcp(struct ctdb_context *ctdb, TDB_DATA indata)
2519 struct ctdb_control_killtcp *killtcp = (struct ctdb_control_killtcp *)indata.dptr;
2521 return ctdb_killtcp_add_connection(ctdb, &killtcp->src_addr, &killtcp->dst_addr);
2525 called by a daemon to inform us of the entire list of TCP tickles for
2526 a particular public address.
2527 this control should only be sent by the node that is currently serving
2528 that public address.
2530 int32_t ctdb_control_set_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata)
2532 struct ctdb_control_tcp_tickle_list *list = (struct ctdb_control_tcp_tickle_list *)indata.dptr;
2533 struct ctdb_tcp_array *tcparray;
2534 struct ctdb_vnn *vnn;
2536 /* We must at least have tickles.num or else we cant verify the size
2537 of the received data blob
2539 if (indata.dsize < offsetof(struct ctdb_control_tcp_tickle_list,
2540 tickles.connections)) {
2541 DEBUG(DEBUG_ERR,("Bad indata in ctdb_control_set_tcp_tickle_list. Not enough data for the tickle.num field\n"));
2545 /* verify that the size of data matches what we expect */
2546 if (indata.dsize < offsetof(struct ctdb_control_tcp_tickle_list,
2547 tickles.connections)
2548 + sizeof(struct ctdb_tcp_connection)
2549 * list->tickles.num) {
2550 DEBUG(DEBUG_ERR,("Bad indata in ctdb_control_set_tcp_tickle_list\n"));
2554 vnn = find_public_ip_vnn(ctdb, &list->addr);
2556 DEBUG(DEBUG_INFO,(__location__ " Could not set tcp tickle list, '%s' is not a public address\n",
2557 ctdb_addr_to_str(&list->addr)));
2562 /* remove any old ticklelist we might have */
2563 talloc_free(vnn->tcp_array);
2564 vnn->tcp_array = NULL;
2566 tcparray = talloc(ctdb->nodes, struct ctdb_tcp_array);
2567 CTDB_NO_MEMORY(ctdb, tcparray);
2569 tcparray->num = list->tickles.num;
2571 tcparray->connections = talloc_array(tcparray, struct ctdb_tcp_connection, tcparray->num);
2572 CTDB_NO_MEMORY(ctdb, tcparray->connections);
2574 memcpy(tcparray->connections, &list->tickles.connections[0],
2575 sizeof(struct ctdb_tcp_connection)*tcparray->num);
2577 /* We now have a new fresh tickle list array for this vnn */
2578 vnn->tcp_array = talloc_steal(vnn, tcparray);
2584 called to return the full list of tickles for the puclic address associated
2585 with the provided vnn
2587 int32_t ctdb_control_get_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata)
2589 ctdb_sock_addr *addr = (ctdb_sock_addr *)indata.dptr;
2590 struct ctdb_control_tcp_tickle_list *list;
2591 struct ctdb_tcp_array *tcparray;
2593 struct ctdb_vnn *vnn;
2595 vnn = find_public_ip_vnn(ctdb, addr);
2597 DEBUG(DEBUG_ERR,(__location__ " Could not get tcp tickle list, '%s' is not a public address\n",
2598 ctdb_addr_to_str(addr)));
2603 tcparray = vnn->tcp_array;
2605 num = tcparray->num;
2610 outdata->dsize = offsetof(struct ctdb_control_tcp_tickle_list,
2611 tickles.connections)
2612 + sizeof(struct ctdb_tcp_connection) * num;
2614 outdata->dptr = talloc_size(outdata, outdata->dsize);
2615 CTDB_NO_MEMORY(ctdb, outdata->dptr);
2616 list = (struct ctdb_control_tcp_tickle_list *)outdata->dptr;
2619 list->tickles.num = num;
2621 memcpy(&list->tickles.connections[0], tcparray->connections,
2622 sizeof(struct ctdb_tcp_connection) * num);
2630 set the list of all tcp tickles for a public address
2632 static int ctdb_ctrl_set_tcp_tickles(struct ctdb_context *ctdb,
2633 struct timeval timeout, uint32_t destnode,
2634 ctdb_sock_addr *addr,
2635 struct ctdb_tcp_array *tcparray)
2639 struct ctdb_control_tcp_tickle_list *list;
2642 num = tcparray->num;
2647 data.dsize = offsetof(struct ctdb_control_tcp_tickle_list,
2648 tickles.connections) +
2649 sizeof(struct ctdb_tcp_connection) * num;
2650 data.dptr = talloc_size(ctdb, data.dsize);
2651 CTDB_NO_MEMORY(ctdb, data.dptr);
2653 list = (struct ctdb_control_tcp_tickle_list *)data.dptr;
2655 list->tickles.num = num;
2657 memcpy(&list->tickles.connections[0], tcparray->connections, sizeof(struct ctdb_tcp_connection) * num);
2660 ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0,
2661 CTDB_CONTROL_SET_TCP_TICKLE_LIST,
2662 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
2664 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set tcp tickles failed\n"));
2668 talloc_free(data.dptr);
2675 perform tickle updates if required
2677 static void ctdb_update_tcp_tickles(struct event_context *ev,
2678 struct timed_event *te,
2679 struct timeval t, void *private_data)
2681 struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
2683 struct ctdb_vnn *vnn;
2685 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2686 /* we only send out updates for public addresses that
2689 if (ctdb->pnn != vnn->pnn) {
2692 /* We only send out the updates if we need to */
2693 if (!vnn->tcp_update_needed) {
2696 ret = ctdb_ctrl_set_tcp_tickles(ctdb,
2698 CTDB_BROADCAST_CONNECTED,
2699 &vnn->public_address,
2702 DEBUG(DEBUG_ERR,("Failed to send the tickle update for public address %s\n",
2703 ctdb_addr_to_str(&vnn->public_address)));
2707 event_add_timed(ctdb->ev, ctdb->tickle_update_context,
2708 timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
2709 ctdb_update_tcp_tickles, ctdb);
2714 start periodic update of tcp tickles
2716 void ctdb_start_tcp_tickle_update(struct ctdb_context *ctdb)
2718 ctdb->tickle_update_context = talloc_new(ctdb);
2720 event_add_timed(ctdb->ev, ctdb->tickle_update_context,
2721 timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
2722 ctdb_update_tcp_tickles, ctdb);
2728 struct control_gratious_arp {
2729 struct ctdb_context *ctdb;
2730 ctdb_sock_addr addr;
2736 send a control_gratuitous arp
2738 static void send_gratious_arp(struct event_context *ev, struct timed_event *te,
2739 struct timeval t, void *private_data)
2742 struct control_gratious_arp *arp = talloc_get_type(private_data,
2743 struct control_gratious_arp);
2745 ret = ctdb_sys_send_arp(&arp->addr, arp->iface);
2747 DEBUG(DEBUG_ERR,(__location__ " sending of gratious arp on iface '%s' failed (%s)\n",
2748 arp->iface, strerror(errno)));
2753 if (arp->count == CTDB_ARP_REPEAT) {
2758 event_add_timed(arp->ctdb->ev, arp,
2759 timeval_current_ofs(CTDB_ARP_INTERVAL, 0),
2760 send_gratious_arp, arp);
2767 int32_t ctdb_control_send_gratious_arp(struct ctdb_context *ctdb, TDB_DATA indata)
2769 struct ctdb_control_gratious_arp *gratious_arp = (struct ctdb_control_gratious_arp *)indata.dptr;
2770 struct control_gratious_arp *arp;
2772 /* verify the size of indata */
2773 if (indata.dsize < offsetof(struct ctdb_control_gratious_arp, iface)) {
2774 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_gratious_arp structure. Got %u require %u bytes\n",
2775 (unsigned)indata.dsize,
2776 (unsigned)offsetof(struct ctdb_control_gratious_arp, iface)));
2780 ( offsetof(struct ctdb_control_gratious_arp, iface)
2781 + gratious_arp->len ) ){
2783 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2784 "but should be %u bytes\n",
2785 (unsigned)indata.dsize,
2786 (unsigned)(offsetof(struct ctdb_control_gratious_arp, iface)+gratious_arp->len)));
2791 arp = talloc(ctdb, struct control_gratious_arp);
2792 CTDB_NO_MEMORY(ctdb, arp);
2795 arp->addr = gratious_arp->addr;
2796 arp->iface = talloc_strdup(arp, gratious_arp->iface);
2797 CTDB_NO_MEMORY(ctdb, arp->iface);
2800 event_add_timed(arp->ctdb->ev, arp,
2801 timeval_zero(), send_gratious_arp, arp);
2806 int32_t ctdb_control_add_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2808 struct ctdb_control_ip_iface *pub = (struct ctdb_control_ip_iface *)indata.dptr;
2811 /* verify the size of indata */
2812 if (indata.dsize < offsetof(struct ctdb_control_ip_iface, iface)) {
2813 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_ip_iface structure\n"));
2817 ( offsetof(struct ctdb_control_ip_iface, iface)
2820 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2821 "but should be %u bytes\n",
2822 (unsigned)indata.dsize,
2823 (unsigned)(offsetof(struct ctdb_control_ip_iface, iface)+pub->len)));
2827 ret = ctdb_add_public_address(ctdb, &pub->addr, pub->mask, &pub->iface[0]);
2830 DEBUG(DEBUG_ERR,(__location__ " Failed to add public address\n"));
2838 called when releaseip event finishes for del_public_address
2840 static void delete_ip_callback(struct ctdb_context *ctdb, int status,
2843 talloc_free(private_data);
2846 int32_t ctdb_control_del_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2848 struct ctdb_control_ip_iface *pub = (struct ctdb_control_ip_iface *)indata.dptr;
2849 struct ctdb_vnn *vnn;
2852 /* verify the size of indata */
2853 if (indata.dsize < offsetof(struct ctdb_control_ip_iface, iface)) {
2854 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_ip_iface structure\n"));
2858 ( offsetof(struct ctdb_control_ip_iface, iface)
2861 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2862 "but should be %u bytes\n",
2863 (unsigned)indata.dsize,
2864 (unsigned)(offsetof(struct ctdb_control_ip_iface, iface)+pub->len)));
2868 /* walk over all public addresses until we find a match */
2869 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2870 if (ctdb_same_ip(&vnn->public_address, &pub->addr)) {
2871 TALLOC_CTX *mem_ctx;
2873 DLIST_REMOVE(ctdb->vnn, vnn);
2874 if (vnn->iface == NULL) {
2879 mem_ctx = talloc_new(ctdb);
2880 ret = ctdb_event_script_callback(ctdb,
2881 mem_ctx, delete_ip_callback, mem_ctx,
2883 CTDB_EVENT_RELEASE_IP,
2885 ctdb_vnn_iface_string(vnn),
2886 ctdb_addr_to_str(&vnn->public_address),
2887 vnn->public_netmask_bits);
2888 ctdb_vnn_unassign_iface(ctdb, vnn);
2900 /* This function is called from the recovery daemon to verify that a remote
2901 node has the expected ip allocation.
2902 This is verified against ctdb->ip_tree
2904 int verify_remote_ip_allocation(struct ctdb_context *ctdb, struct ctdb_all_public_ips *ips)
2906 struct ctdb_public_ip_list *tmp_ip;
2909 if (ctdb->ip_tree == NULL) {
2910 /* dont know the expected allocation yet, assume remote node
2919 for (i=0; i<ips->num; i++) {
2920 tmp_ip = trbt_lookuparray32(ctdb->ip_tree, IP_KEYLEN, ip_key(&ips->ips[i].addr));
2921 if (tmp_ip == NULL) {
2922 DEBUG(DEBUG_ERR,(__location__ " Could not find host for address %s, reassign ips\n", ctdb_addr_to_str(&ips->ips[i].addr)));
2926 if (tmp_ip->pnn == -1 || ips->ips[i].pnn == -1) {
2930 if (tmp_ip->pnn != ips->ips[i].pnn) {
2931 DEBUG(DEBUG_ERR,("Inconsistent ip allocation. Trigger reallocation. Thinks %s is held by node %u while it is held by node %u\n", ctdb_addr_to_str(&ips->ips[i].addr), ips->ips[i].pnn, tmp_ip->pnn));
2939 int update_ip_assignment_tree(struct ctdb_context *ctdb, struct ctdb_public_ip *ip)
2941 struct ctdb_public_ip_list *tmp_ip;
2943 if (ctdb->ip_tree == NULL) {
2944 DEBUG(DEBUG_ERR,("No ctdb->ip_tree yet. Failed to update ip assignment\n"));
2948 tmp_ip = trbt_lookuparray32(ctdb->ip_tree, IP_KEYLEN, ip_key(&ip->addr));
2949 if (tmp_ip == NULL) {
2950 DEBUG(DEBUG_ERR,(__location__ " Could not find record for address %s, update ip\n", ctdb_addr_to_str(&ip->addr)));
2954 DEBUG(DEBUG_NOTICE,("Updated ip assignment tree for ip : %s from node %u to node %u\n", ctdb_addr_to_str(&ip->addr), tmp_ip->pnn, ip->pnn));
2955 tmp_ip->pnn = ip->pnn;