4 Copyright (C) Ronnie Sahlberg 2007
5 Copyright (C) Andrew Tridgell 2007
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, see <http://www.gnu.org/licenses/>.
21 #include "lib/tevent/tevent.h"
22 #include "lib/tdb/include/tdb.h"
23 #include "lib/util/dlinklist.h"
24 #include "system/network.h"
25 #include "system/filesys.h"
26 #include "system/wait.h"
27 #include "../include/ctdb_private.h"
28 #include "../common/rb_tree.h"
31 #define TAKEOVER_TIMEOUT() timeval_current_ofs(ctdb->tunable.takeover_timeout,0)
33 #define CTDB_ARP_INTERVAL 1
34 #define CTDB_ARP_REPEAT 3
37 struct ctdb_iface *prev, *next;
43 static const char *ctdb_vnn_iface_string(const struct ctdb_vnn *vnn)
46 return vnn->iface->name;
52 static int ctdb_add_local_iface(struct ctdb_context *ctdb, const char *iface)
56 /* Verify that we dont have an entry for this ip yet */
57 for (i=ctdb->ifaces;i;i=i->next) {
58 if (strcmp(i->name, iface) == 0) {
63 /* create a new structure for this interface */
64 i = talloc_zero(ctdb, struct ctdb_iface);
65 CTDB_NO_MEMORY_FATAL(ctdb, i);
66 i->name = talloc_strdup(i, iface);
67 CTDB_NO_MEMORY(ctdb, i->name);
70 DLIST_ADD(ctdb->ifaces, i);
75 static struct ctdb_iface *ctdb_find_iface(struct ctdb_context *ctdb,
80 /* Verify that we dont have an entry for this ip yet */
81 for (i=ctdb->ifaces;i;i=i->next) {
82 if (strcmp(i->name, iface) == 0) {
90 static struct ctdb_iface *ctdb_vnn_best_iface(struct ctdb_context *ctdb,
94 struct ctdb_iface *cur = NULL;
95 struct ctdb_iface *best = NULL;
97 for (i=0; vnn->ifaces[i]; i++) {
99 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
113 if (cur->references < best->references) {
122 static int32_t ctdb_vnn_assign_iface(struct ctdb_context *ctdb,
123 struct ctdb_vnn *vnn)
125 struct ctdb_iface *best = NULL;
128 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
129 "still assigned to iface '%s'\n",
130 ctdb_addr_to_str(&vnn->public_address),
131 ctdb_vnn_iface_string(vnn)));
135 best = ctdb_vnn_best_iface(ctdb, vnn);
137 DEBUG(DEBUG_ERR, (__location__ " public address '%s' "
138 "cannot assign to iface any iface\n",
139 ctdb_addr_to_str(&vnn->public_address)));
145 vnn->pnn = ctdb->pnn;
147 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
148 "now assigned to iface '%s' refs[%d]\n",
149 ctdb_addr_to_str(&vnn->public_address),
150 ctdb_vnn_iface_string(vnn),
155 static void ctdb_vnn_unassign_iface(struct ctdb_context *ctdb,
156 struct ctdb_vnn *vnn)
158 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
159 "now unassigned (old iface '%s' refs[%d])\n",
160 ctdb_addr_to_str(&vnn->public_address),
161 ctdb_vnn_iface_string(vnn),
162 vnn->iface?vnn->iface->references:0));
164 vnn->iface->references--;
167 if (vnn->pnn == ctdb->pnn) {
172 static bool ctdb_vnn_available(struct ctdb_context *ctdb,
173 struct ctdb_vnn *vnn)
177 if (vnn->iface && vnn->iface->link_up) {
181 for (i=0; vnn->ifaces[i]; i++) {
182 struct ctdb_iface *cur;
184 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
197 struct ctdb_takeover_arp {
198 struct ctdb_context *ctdb;
201 struct ctdb_tcp_array *tcparray;
202 struct ctdb_vnn *vnn;
207 lists of tcp endpoints
209 struct ctdb_tcp_list {
210 struct ctdb_tcp_list *prev, *next;
211 struct ctdb_tcp_connection connection;
215 list of clients to kill on IP release
217 struct ctdb_client_ip {
218 struct ctdb_client_ip *prev, *next;
219 struct ctdb_context *ctdb;
226 send a gratuitous arp
228 static void ctdb_control_send_arp(struct event_context *ev, struct timed_event *te,
229 struct timeval t, void *private_data)
231 struct ctdb_takeover_arp *arp = talloc_get_type(private_data,
232 struct ctdb_takeover_arp);
234 struct ctdb_tcp_array *tcparray;
235 const char *iface = ctdb_vnn_iface_string(arp->vnn);
237 ret = ctdb_sys_send_arp(&arp->addr, iface);
239 DEBUG(DEBUG_CRIT,(__location__ " sending of arp failed on iface '%s' (%s)\n",
240 iface, strerror(errno)));
243 tcparray = arp->tcparray;
245 for (i=0;i<tcparray->num;i++) {
246 struct ctdb_tcp_connection *tcon;
248 tcon = &tcparray->connections[i];
249 DEBUG(DEBUG_INFO,("sending tcp tickle ack for %u->%s:%u\n",
250 (unsigned)ntohs(tcon->dst_addr.ip.sin_port),
251 ctdb_addr_to_str(&tcon->src_addr),
252 (unsigned)ntohs(tcon->src_addr.ip.sin_port)));
253 ret = ctdb_sys_send_tcp(
258 DEBUG(DEBUG_CRIT,(__location__ " Failed to send tcp tickle ack for %s\n",
259 ctdb_addr_to_str(&tcon->src_addr)));
266 if (arp->count == CTDB_ARP_REPEAT) {
271 event_add_timed(arp->ctdb->ev, arp->vnn->takeover_ctx,
272 timeval_current_ofs(CTDB_ARP_INTERVAL, 100000),
273 ctdb_control_send_arp, arp);
276 static int32_t ctdb_announce_vnn_iface(struct ctdb_context *ctdb,
277 struct ctdb_vnn *vnn)
279 struct ctdb_takeover_arp *arp;
280 struct ctdb_tcp_array *tcparray;
282 if (!vnn->takeover_ctx) {
283 vnn->takeover_ctx = talloc_new(vnn);
284 if (!vnn->takeover_ctx) {
289 arp = talloc_zero(vnn->takeover_ctx, struct ctdb_takeover_arp);
295 arp->addr = vnn->public_address;
298 tcparray = vnn->tcp_array;
300 /* add all of the known tcp connections for this IP to the
301 list of tcp connections to send tickle acks for */
302 arp->tcparray = talloc_steal(arp, tcparray);
304 vnn->tcp_array = NULL;
305 vnn->tcp_update_needed = true;
308 event_add_timed(arp->ctdb->ev, vnn->takeover_ctx,
309 timeval_zero(), ctdb_control_send_arp, arp);
314 struct takeover_callback_state {
315 struct ctdb_req_control *c;
316 ctdb_sock_addr *addr;
317 struct ctdb_vnn *vnn;
320 struct ctdb_do_takeip_state {
321 struct ctdb_req_control *c;
322 struct ctdb_vnn *vnn;
326 called when takeip event finishes
328 static void ctdb_do_takeip_callback(struct ctdb_context *ctdb, int status,
331 struct ctdb_do_takeip_state *state =
332 talloc_get_type(private_data, struct ctdb_do_takeip_state);
337 if (status == -ETIME) {
340 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
341 ctdb_addr_to_str(&state->vnn->public_address),
342 ctdb_vnn_iface_string(state->vnn)));
343 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
348 ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
350 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
355 data.dptr = (uint8_t *)ctdb_addr_to_str(&state->vnn->public_address);
356 data.dsize = strlen((char *)data.dptr) + 1;
357 DEBUG(DEBUG_INFO,(__location__ " sending TAKE_IP for '%s'\n", data.dptr));
359 ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_TAKE_IP, data);
362 /* the control succeeded */
363 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
369 take over an ip address
371 static int32_t ctdb_do_takeip(struct ctdb_context *ctdb,
372 struct ctdb_req_control *c,
373 struct ctdb_vnn *vnn)
376 struct ctdb_do_takeip_state *state;
378 ret = ctdb_vnn_assign_iface(ctdb, vnn);
380 DEBUG(DEBUG_ERR,("Takeover of IP %s/%u failed to "
381 "assin a usable interface\n",
382 ctdb_addr_to_str(&vnn->public_address),
383 vnn->public_netmask_bits));
387 state = talloc(vnn, struct ctdb_do_takeip_state);
388 CTDB_NO_MEMORY(ctdb, state);
390 state->c = talloc_steal(ctdb, c);
393 DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u on interface %s\n",
394 ctdb_addr_to_str(&vnn->public_address),
395 vnn->public_netmask_bits,
396 ctdb_vnn_iface_string(vnn)));
398 ret = ctdb_event_script_callback(ctdb,
400 ctdb_do_takeip_callback,
405 ctdb_vnn_iface_string(vnn),
406 ctdb_addr_to_str(&vnn->public_address),
407 vnn->public_netmask_bits);
410 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
411 ctdb_addr_to_str(&vnn->public_address),
412 ctdb_vnn_iface_string(vnn)));
420 struct ctdb_do_updateip_state {
421 struct ctdb_req_control *c;
422 struct ctdb_iface *old;
423 struct ctdb_vnn *vnn;
427 called when updateip event finishes
429 static void ctdb_do_updateip_callback(struct ctdb_context *ctdb, int status,
432 struct ctdb_do_updateip_state *state =
433 talloc_get_type(private_data, struct ctdb_do_updateip_state);
437 if (status == -ETIME) {
440 DEBUG(DEBUG_ERR,(__location__ " Failed to move IP %s from interface %s to %s\n",
441 ctdb_addr_to_str(&state->vnn->public_address),
443 ctdb_vnn_iface_string(state->vnn)));
446 * All we can do is reset the old interface
447 * and let the next run fix it
449 ctdb_vnn_unassign_iface(ctdb, state->vnn);
450 state->vnn->iface = state->old;
451 state->vnn->iface->references++;
453 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
458 ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
460 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
465 /* the control succeeded */
466 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
472 update (move) an ip address
474 static int32_t ctdb_do_updateip(struct ctdb_context *ctdb,
475 struct ctdb_req_control *c,
476 struct ctdb_vnn *vnn)
479 struct ctdb_do_updateip_state *state;
480 struct ctdb_iface *old = vnn->iface;
482 ctdb_vnn_unassign_iface(ctdb, vnn);
483 ret = ctdb_vnn_assign_iface(ctdb, vnn);
485 DEBUG(DEBUG_ERR,("update of IP %s/%u failed to "
486 "assin a usable interface (old iface '%s')\n",
487 ctdb_addr_to_str(&vnn->public_address),
488 vnn->public_netmask_bits,
493 state = talloc(vnn, struct ctdb_do_updateip_state);
494 CTDB_NO_MEMORY(ctdb, state);
496 state->c = talloc_steal(ctdb, c);
500 DEBUG(DEBUG_NOTICE,("Update of IP %s/%u from "
501 "interface %s to %s\n",
502 ctdb_addr_to_str(&vnn->public_address),
503 vnn->public_netmask_bits,
505 ctdb_vnn_iface_string(vnn)));
507 ret = ctdb_event_script_callback(ctdb,
509 ctdb_do_updateip_callback,
512 CTDB_EVENT_UPDATE_IP,
515 ctdb_vnn_iface_string(vnn),
516 ctdb_addr_to_str(&vnn->public_address),
517 vnn->public_netmask_bits);
519 DEBUG(DEBUG_ERR,(__location__ " Failed update IP %s from interface %s to %s\n",
520 ctdb_addr_to_str(&vnn->public_address),
521 old->name, ctdb_vnn_iface_string(vnn)));
530 Find the vnn of the node that has a public ip address
531 returns -1 if the address is not known as a public address
533 static struct ctdb_vnn *find_public_ip_vnn(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
535 struct ctdb_vnn *vnn;
537 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
538 if (ctdb_same_ip(&vnn->public_address, addr)) {
547 take over an ip address
549 int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb,
550 struct ctdb_req_control *c,
555 struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
556 struct ctdb_vnn *vnn;
557 bool have_ip = false;
558 bool do_updateip = false;
559 bool do_takeip = false;
560 struct ctdb_iface *best_iface = NULL;
562 if (pip->pnn != ctdb->pnn) {
563 DEBUG(DEBUG_ERR,(__location__" takeoverip called for an ip '%s' "
564 "with pnn %d, but we're node %d\n",
565 ctdb_addr_to_str(&pip->addr),
566 pip->pnn, ctdb->pnn));
570 /* update out vnn list */
571 vnn = find_public_ip_vnn(ctdb, &pip->addr);
573 DEBUG(DEBUG_INFO,("takeoverip called for an ip '%s' that is not a public address\n",
574 ctdb_addr_to_str(&pip->addr)));
578 have_ip = ctdb_sys_have_ip(&pip->addr);
579 best_iface = ctdb_vnn_best_iface(ctdb, vnn);
580 if (best_iface == NULL) {
581 DEBUG(DEBUG_ERR,("takeoverip of IP %s/%u failed to find"
582 "a usable interface (old %s, have_ip %d)\n",
583 ctdb_addr_to_str(&vnn->public_address),
584 vnn->public_netmask_bits,
585 ctdb_vnn_iface_string(vnn),
590 if (vnn->iface == NULL && vnn->pnn == -1 && have_ip && best_iface != NULL) {
591 DEBUG(DEBUG_ERR,("Taking over newly created ip\n"));
595 if (vnn->iface == NULL && have_ip) {
596 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
597 "but we have no interface assigned, has someone manually configured it?"
599 ctdb_addr_to_str(&vnn->public_address)));
604 if (vnn->pnn != ctdb->pnn && have_ip && vnn->pnn != -1) {
605 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
606 "and we have it on iface[%s], but it was assigned to node %d"
607 "and we are node %d, banning ourself\n",
608 ctdb_addr_to_str(&vnn->public_address),
609 ctdb_vnn_iface_string(vnn), vnn->pnn, ctdb->pnn));
614 if (vnn->pnn == -1 && have_ip) {
615 vnn->pnn = ctdb->pnn;
616 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
617 "and we already have it on iface[%s], update local daemon\n",
618 ctdb_addr_to_str(&vnn->public_address),
619 ctdb_vnn_iface_string(vnn)));
624 if (vnn->iface->link_up) {
625 /* only move when the rebalance gains something */
626 if (vnn->iface->references > (best_iface->references + 1)) {
629 } else if (vnn->iface != best_iface) {
636 ctdb_vnn_unassign_iface(ctdb, vnn);
643 ret = ctdb_do_takeip(ctdb, c, vnn);
647 } else if (do_updateip) {
648 ret = ctdb_do_updateip(ctdb, c, vnn);
654 * The interface is up and the kernel known the ip
657 DEBUG(DEBUG_INFO,("Redundant takeover of IP %s/%u on interface %s (ip already held)\n",
658 ctdb_addr_to_str(&pip->addr),
659 vnn->public_netmask_bits,
660 ctdb_vnn_iface_string(vnn)));
664 /* tell ctdb_control.c that we will be replying asynchronously */
671 takeover an ip address old v4 style
673 int32_t ctdb_control_takeover_ipv4(struct ctdb_context *ctdb,
674 struct ctdb_req_control *c,
680 data.dsize = sizeof(struct ctdb_public_ip);
681 data.dptr = (uint8_t *)talloc_zero(c, struct ctdb_public_ip);
682 CTDB_NO_MEMORY(ctdb, data.dptr);
684 memcpy(data.dptr, indata.dptr, indata.dsize);
685 return ctdb_control_takeover_ip(ctdb, c, data, async_reply);
689 kill any clients that are registered with a IP that is being released
691 static void release_kill_clients(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
693 struct ctdb_client_ip *ip;
695 DEBUG(DEBUG_INFO,("release_kill_clients for ip %s\n",
696 ctdb_addr_to_str(addr)));
698 for (ip=ctdb->client_ip_list; ip; ip=ip->next) {
699 ctdb_sock_addr tmp_addr;
702 DEBUG(DEBUG_INFO,("checking for client %u with IP %s\n",
704 ctdb_addr_to_str(&ip->addr)));
706 if (ctdb_same_ip(&tmp_addr, addr)) {
707 struct ctdb_client *client = ctdb_reqid_find(ctdb,
710 DEBUG(DEBUG_INFO,("matched client %u with IP %s and pid %u\n",
712 ctdb_addr_to_str(&ip->addr),
715 if (client->pid != 0) {
716 DEBUG(DEBUG_INFO,(__location__ " Killing client pid %u for IP %s on client_id %u\n",
717 (unsigned)client->pid,
718 ctdb_addr_to_str(addr),
720 kill(client->pid, SIGKILL);
727 called when releaseip event finishes
729 static void release_ip_callback(struct ctdb_context *ctdb, int status,
732 struct takeover_callback_state *state =
733 talloc_get_type(private_data, struct takeover_callback_state);
736 if (status == -ETIME) {
740 /* send a message to all clients of this node telling them
741 that the cluster has been reconfigured and they should
742 release any sockets on this IP */
743 data.dptr = (uint8_t *)talloc_strdup(state, ctdb_addr_to_str(state->addr));
744 CTDB_NO_MEMORY_VOID(ctdb, data.dptr);
745 data.dsize = strlen((char *)data.dptr)+1;
747 DEBUG(DEBUG_INFO,(__location__ " sending RELEASE_IP for '%s'\n", data.dptr));
749 ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_RELEASE_IP, data);
751 /* kill clients that have registered with this IP */
752 release_kill_clients(ctdb, state->addr);
754 ctdb_vnn_unassign_iface(ctdb, state->vnn);
756 /* the control succeeded */
757 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
762 release an ip address
764 int32_t ctdb_control_release_ip(struct ctdb_context *ctdb,
765 struct ctdb_req_control *c,
770 struct takeover_callback_state *state;
771 struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
772 struct ctdb_vnn *vnn;
774 /* update our vnn list */
775 vnn = find_public_ip_vnn(ctdb, &pip->addr);
777 DEBUG(DEBUG_INFO,("releaseip called for an ip '%s' that is not a public address\n",
778 ctdb_addr_to_str(&pip->addr)));
783 /* stop any previous arps */
784 talloc_free(vnn->takeover_ctx);
785 vnn->takeover_ctx = NULL;
787 if (!ctdb_sys_have_ip(&pip->addr)) {
788 DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u on interface %s (ip not held)\n",
789 ctdb_addr_to_str(&pip->addr),
790 vnn->public_netmask_bits,
791 ctdb_vnn_iface_string(vnn)));
792 ctdb_vnn_unassign_iface(ctdb, vnn);
796 if (vnn->iface == NULL) {
797 DEBUG(DEBUG_CRIT,(__location__ " release_ip of IP %s is known to the kernel, "
798 "but we have no interface assigned, has someone manually configured it?"
800 ctdb_addr_to_str(&vnn->public_address)));
805 DEBUG(DEBUG_NOTICE,("Release of IP %s/%u on interface %s node:%d\n",
806 ctdb_addr_to_str(&pip->addr),
807 vnn->public_netmask_bits,
808 ctdb_vnn_iface_string(vnn),
811 state = talloc(ctdb, struct takeover_callback_state);
812 CTDB_NO_MEMORY(ctdb, state);
814 state->c = talloc_steal(state, c);
815 state->addr = talloc(state, ctdb_sock_addr);
816 CTDB_NO_MEMORY(ctdb, state->addr);
817 *state->addr = pip->addr;
820 ret = ctdb_event_script_callback(ctdb,
821 state, release_ip_callback, state,
823 CTDB_EVENT_RELEASE_IP,
825 ctdb_vnn_iface_string(vnn),
826 ctdb_addr_to_str(&pip->addr),
827 vnn->public_netmask_bits);
829 DEBUG(DEBUG_ERR,(__location__ " Failed to release IP %s on interface %s\n",
830 ctdb_addr_to_str(&pip->addr),
831 ctdb_vnn_iface_string(vnn)));
836 /* tell the control that we will be reply asynchronously */
842 release an ip address old v4 style
844 int32_t ctdb_control_release_ipv4(struct ctdb_context *ctdb,
845 struct ctdb_req_control *c,
851 data.dsize = sizeof(struct ctdb_public_ip);
852 data.dptr = (uint8_t *)talloc_zero(c, struct ctdb_public_ip);
853 CTDB_NO_MEMORY(ctdb, data.dptr);
855 memcpy(data.dptr, indata.dptr, indata.dsize);
856 return ctdb_control_release_ip(ctdb, c, data, async_reply);
860 static int ctdb_add_public_address(struct ctdb_context *ctdb,
861 ctdb_sock_addr *addr,
862 unsigned mask, const char *ifaces)
864 struct ctdb_vnn *vnn;
871 /* Verify that we dont have an entry for this ip yet */
872 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
873 if (ctdb_same_sockaddr(addr, &vnn->public_address)) {
874 DEBUG(DEBUG_CRIT,("Same ip '%s' specified multiple times in the public address list \n",
875 ctdb_addr_to_str(addr)));
880 /* create a new vnn structure for this ip address */
881 vnn = talloc_zero(ctdb, struct ctdb_vnn);
882 CTDB_NO_MEMORY_FATAL(ctdb, vnn);
883 vnn->ifaces = talloc_array(vnn, const char *, num + 2);
884 tmp = talloc_strdup(vnn, ifaces);
885 CTDB_NO_MEMORY_FATAL(ctdb, tmp);
886 for (iface = strtok(tmp, ","); iface; iface = strtok(NULL, ",")) {
887 vnn->ifaces = talloc_realloc(vnn, vnn->ifaces, const char *, num + 2);
888 CTDB_NO_MEMORY_FATAL(ctdb, vnn->ifaces);
889 vnn->ifaces[num] = talloc_strdup(vnn, iface);
890 CTDB_NO_MEMORY_FATAL(ctdb, vnn->ifaces[num]);
894 vnn->ifaces[num] = NULL;
895 vnn->public_address = *addr;
896 vnn->public_netmask_bits = mask;
898 if (ctdb_sys_have_ip(addr)) {
899 DEBUG(DEBUG_ERR,("We are already hosting public address '%s'. setting PNN to ourself:%d\n", ctdb_addr_to_str(addr), ctdb->pnn));
900 vnn->pnn = ctdb->pnn;
903 for (i=0; vnn->ifaces[i]; i++) {
904 ret = ctdb_add_local_iface(ctdb, vnn->ifaces[i]);
906 DEBUG(DEBUG_CRIT, (__location__ " failed to add iface[%s] "
907 "for public_address[%s]\n",
908 vnn->ifaces[i], ctdb_addr_to_str(addr)));
913 vnn->iface = ctdb_find_iface(ctdb, vnn->ifaces[i]);
917 DLIST_ADD(ctdb->vnn, vnn);
923 setup the event script directory
925 int ctdb_set_event_script_dir(struct ctdb_context *ctdb, const char *script_dir)
927 ctdb->event_script_dir = talloc_strdup(ctdb, script_dir);
928 CTDB_NO_MEMORY(ctdb, ctdb->event_script_dir);
933 setup the public address lists from a file
935 int ctdb_set_public_addresses(struct ctdb_context *ctdb, const char *alist)
941 lines = file_lines_load(alist, &nlines, ctdb);
943 ctdb_set_error(ctdb, "Failed to load public address list '%s'\n", alist);
946 while (nlines > 0 && strcmp(lines[nlines-1], "") == 0) {
950 for (i=0;i<nlines;i++) {
958 while ((*line == ' ') || (*line == '\t')) {
964 if (strcmp(line, "") == 0) {
967 tok = strtok(line, " \t");
969 tok = strtok(NULL, " \t");
971 if (NULL == ctdb->default_public_interface) {
972 DEBUG(DEBUG_CRIT,("No default public interface and no interface specified at line %u of public address list\n",
977 ifaces = ctdb->default_public_interface;
982 if (!addrstr || !parse_ip_mask(addrstr, ifaces, &addr, &mask)) {
983 DEBUG(DEBUG_CRIT,("Badly formed line %u in public address list\n", i+1));
987 if (ctdb_add_public_address(ctdb, &addr, mask, ifaces)) {
988 DEBUG(DEBUG_CRIT,("Failed to add line %u to the public address list\n", i+1));
998 int ctdb_set_single_public_ip(struct ctdb_context *ctdb,
1002 struct ctdb_vnn *svnn;
1006 svnn = talloc_zero(ctdb, struct ctdb_vnn);
1007 CTDB_NO_MEMORY(ctdb, svnn);
1009 svnn->ifaces = talloc_array(svnn, const char *, 2);
1010 CTDB_NO_MEMORY(ctdb, svnn->ifaces);
1011 svnn->ifaces[0] = talloc_strdup(svnn->ifaces, iface);
1012 CTDB_NO_MEMORY(ctdb, svnn->ifaces[0]);
1013 svnn->ifaces[1] = NULL;
1015 ok = parse_ip(ip, iface, 0, &svnn->public_address);
1021 ret = ctdb_add_local_iface(ctdb, svnn->ifaces[0]);
1023 DEBUG(DEBUG_CRIT, (__location__ " failed to add iface[%s] "
1024 "for single_ip[%s]\n",
1026 ctdb_addr_to_str(&svnn->public_address)));
1031 ret = ctdb_vnn_assign_iface(ctdb, svnn);
1037 ctdb->single_ip_vnn = svnn;
1041 struct ctdb_public_ip_list {
1042 struct ctdb_public_ip_list *next;
1044 ctdb_sock_addr addr;
1048 /* Given a physical node, return the number of
1049 public addresses that is currently assigned to this node.
1051 static int node_ip_coverage(struct ctdb_context *ctdb,
1053 struct ctdb_public_ip_list *ips)
1057 for (;ips;ips=ips->next) {
1058 if (ips->pnn == pnn) {
1066 /* Check if this is a public ip known to the node, i.e. can that
1067 node takeover this ip ?
1069 static int can_node_serve_ip(struct ctdb_context *ctdb, int32_t pnn,
1070 struct ctdb_public_ip_list *ip)
1072 struct ctdb_all_public_ips *public_ips;
1075 public_ips = ctdb->nodes[pnn]->available_public_ips;
1077 if (public_ips == NULL) {
1081 for (i=0;i<public_ips->num;i++) {
1082 if (ctdb_same_ip(&ip->addr, &public_ips->ips[i].addr)) {
1083 /* yes, this node can serve this public ip */
1092 /* search the node lists list for a node to takeover this ip.
1093 pick the node that currently are serving the least number of ips
1094 so that the ips get spread out evenly.
1096 static int find_takeover_node(struct ctdb_context *ctdb,
1097 struct ctdb_node_map *nodemap, uint32_t mask,
1098 struct ctdb_public_ip_list *ip,
1099 struct ctdb_public_ip_list *all_ips)
1101 int pnn, min=0, num;
1105 for (i=0;i<nodemap->num;i++) {
1106 if (nodemap->nodes[i].flags & mask) {
1107 /* This node is not healty and can not be used to serve
1113 /* verify that this node can serve this ip */
1114 if (can_node_serve_ip(ctdb, i, ip)) {
1115 /* no it couldnt so skip to the next node */
1119 num = node_ip_coverage(ctdb, i, all_ips);
1120 /* was this the first node we checked ? */
1132 DEBUG(DEBUG_WARNING,(__location__ " Could not find node to take over public address '%s'\n",
1133 ctdb_addr_to_str(&ip->addr)));
1143 static uint32_t *ip_key(ctdb_sock_addr *ip)
1145 static uint32_t key[IP_KEYLEN];
1147 bzero(key, sizeof(key));
1149 switch (ip->sa.sa_family) {
1151 key[3] = htonl(ip->ip.sin_addr.s_addr);
1154 key[0] = htonl(ip->ip6.sin6_addr.s6_addr32[0]);
1155 key[1] = htonl(ip->ip6.sin6_addr.s6_addr32[1]);
1156 key[2] = htonl(ip->ip6.sin6_addr.s6_addr32[2]);
1157 key[3] = htonl(ip->ip6.sin6_addr.s6_addr32[3]);
1160 DEBUG(DEBUG_ERR, (__location__ " ERROR, unknown family passed :%u\n", ip->sa.sa_family));
1167 static void *add_ip_callback(void *parm, void *data)
1169 struct ctdb_public_ip_list *this_ip = parm;
1170 struct ctdb_public_ip_list *prev_ip = data;
1172 if (prev_ip == NULL) {
1175 if (this_ip->pnn == -1) {
1176 this_ip->pnn = prev_ip->pnn;
1182 void getips_count_callback(void *param, void *data)
1184 struct ctdb_public_ip_list **ip_list = (struct ctdb_public_ip_list **)param;
1185 struct ctdb_public_ip_list *new_ip = (struct ctdb_public_ip_list *)data;
1187 new_ip->next = *ip_list;
1191 static struct ctdb_public_ip_list *
1192 create_merged_ip_list(struct ctdb_context *ctdb)
1195 struct ctdb_public_ip_list *ip_list;
1196 struct ctdb_all_public_ips *public_ips;
1198 if (ctdb->ip_tree != NULL) {
1199 talloc_free(ctdb->ip_tree);
1200 ctdb->ip_tree = NULL;
1202 ctdb->ip_tree = trbt_create(ctdb, 0);
1204 for (i=0;i<ctdb->num_nodes;i++) {
1205 public_ips = ctdb->nodes[i]->known_public_ips;
1207 if (ctdb->nodes[i]->flags & NODE_FLAGS_DELETED) {
1211 /* there were no public ips for this node */
1212 if (public_ips == NULL) {
1216 for (j=0;j<public_ips->num;j++) {
1217 struct ctdb_public_ip_list *tmp_ip;
1219 tmp_ip = talloc_zero(ctdb->ip_tree, struct ctdb_public_ip_list);
1220 CTDB_NO_MEMORY_NULL(ctdb, tmp_ip);
1221 tmp_ip->pnn = public_ips->ips[j].pnn;
1222 tmp_ip->addr = public_ips->ips[j].addr;
1223 tmp_ip->next = NULL;
1225 trbt_insertarray32_callback(ctdb->ip_tree,
1226 IP_KEYLEN, ip_key(&public_ips->ips[j].addr),
1233 trbt_traversearray32(ctdb->ip_tree, IP_KEYLEN, getips_count_callback, &ip_list);
1239 make any IP alias changes for public addresses that are necessary
1241 int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap)
1243 int i, num_healthy, retries;
1244 struct ctdb_public_ip ip;
1245 struct ctdb_public_ipv4 ipv4;
1246 uint32_t mask, *nodes;
1247 struct ctdb_public_ip_list *all_ips, *tmp_ip;
1248 int maxnode, maxnum=0, minnode, minnum=0, num;
1250 struct timeval timeout;
1251 struct client_async_data *async_data;
1252 struct ctdb_client_control_state *state;
1253 TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
1256 * ip failover is completely disabled, just send out the
1257 * ipreallocated event.
1259 if (ctdb->tunable.disable_ip_failover != 0) {
1265 /* Count how many completely healthy nodes we have */
1267 for (i=0;i<nodemap->num;i++) {
1268 if (!(nodemap->nodes[i].flags & (NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED))) {
1273 if (num_healthy > 0) {
1274 /* We have healthy nodes, so only consider them for
1275 serving public addresses
1277 mask = NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED;
1279 /* We didnt have any completely healthy nodes so
1280 use "disabled" nodes as a fallback
1282 mask = NODE_FLAGS_INACTIVE;
1285 /* since nodes only know about those public addresses that
1286 can be served by that particular node, no single node has
1287 a full list of all public addresses that exist in the cluster.
1288 Walk over all node structures and create a merged list of
1289 all public addresses that exist in the cluster.
1291 keep the tree of ips around as ctdb->ip_tree
1293 all_ips = create_merged_ip_list(ctdb);
1295 /* If we want deterministic ip allocations, i.e. that the ip addresses
1296 will always be allocated the same way for a specific set of
1297 available/unavailable nodes.
1299 if (1 == ctdb->tunable.deterministic_public_ips) {
1300 DEBUG(DEBUG_NOTICE,("Deterministic IPs enabled. Resetting all ip allocations\n"));
1301 for (i=0,tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next,i++) {
1302 tmp_ip->pnn = i%nodemap->num;
1307 /* mark all public addresses with a masked node as being served by
1310 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1311 if (tmp_ip->pnn == -1) {
1314 if (nodemap->nodes[tmp_ip->pnn].flags & mask) {
1319 /* verify that the assigned nodes can serve that public ip
1320 and set it to -1 if not
1322 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1323 if (tmp_ip->pnn == -1) {
1326 if (can_node_serve_ip(ctdb, tmp_ip->pnn, tmp_ip) != 0) {
1327 /* this node can not serve this ip. */
1333 /* now we must redistribute all public addresses with takeover node
1334 -1 among the nodes available
1338 /* loop over all ip's and find a physical node to cover for
1341 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1342 if (tmp_ip->pnn == -1) {
1343 if (find_takeover_node(ctdb, nodemap, mask, tmp_ip, all_ips)) {
1344 DEBUG(DEBUG_WARNING,("Failed to find node to cover ip %s\n",
1345 ctdb_addr_to_str(&tmp_ip->addr)));
1350 /* If we dont want ips to fail back after a node becomes healthy
1351 again, we wont even try to reallocat the ip addresses so that
1352 they are evenly spread out.
1353 This can NOT be used at the same time as DeterministicIPs !
1355 if (1 == ctdb->tunable.no_ip_failback) {
1356 if (1 == ctdb->tunable.deterministic_public_ips) {
1357 DEBUG(DEBUG_ERR, ("ERROR: You can not use 'DeterministicIPs' and 'NoIPFailback' at the same time\n"));
1363 /* now, try to make sure the ip adresses are evenly distributed
1365 for each ip address, loop over all nodes that can serve this
1366 ip and make sure that the difference between the node
1367 serving the most and the node serving the least ip's are not greater
1370 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1371 if (tmp_ip->pnn == -1) {
1375 /* Get the highest and lowest number of ips's served by any
1376 valid node which can serve this ip.
1380 for (i=0;i<nodemap->num;i++) {
1381 if (nodemap->nodes[i].flags & mask) {
1385 /* only check nodes that can actually serve this ip */
1386 if (can_node_serve_ip(ctdb, i, tmp_ip)) {
1387 /* no it couldnt so skip to the next node */
1391 num = node_ip_coverage(ctdb, i, all_ips);
1392 if (maxnode == -1) {
1401 if (minnode == -1) {
1411 if (maxnode == -1) {
1412 DEBUG(DEBUG_WARNING,(__location__ " Could not find maxnode. May not be able to serve ip '%s'\n",
1413 ctdb_addr_to_str(&tmp_ip->addr)));
1418 /* If we want deterministic IPs then dont try to reallocate
1419 them to spread out the load.
1421 if (1 == ctdb->tunable.deterministic_public_ips) {
1425 /* if the spread between the smallest and largest coverage by
1426 a node is >=2 we steal one of the ips from the node with
1427 most coverage to even things out a bit.
1428 try to do this at most 5 times since we dont want to spend
1429 too much time balancing the ip coverage.
1431 if ( (maxnum > minnum+1)
1433 struct ctdb_public_ip_list *tmp;
1435 /* mark one of maxnode's vnn's as unassigned and try
1438 for (tmp=all_ips;tmp;tmp=tmp->next) {
1439 if (tmp->pnn == maxnode) {
1449 /* finished distributing the public addresses, now just send the
1450 info out to the nodes
1454 /* at this point ->pnn is the node which will own each IP
1455 or -1 if there is no node that can cover this ip
1458 /* now tell all nodes to delete any alias that they should not
1459 have. This will be a NOOP on nodes that don't currently
1460 hold the given alias */
1461 async_data = talloc_zero(tmp_ctx, struct client_async_data);
1462 CTDB_NO_MEMORY_FATAL(ctdb, async_data);
1464 for (i=0;i<nodemap->num;i++) {
1465 /* don't talk to unconnected nodes, but do talk to banned nodes */
1466 if (nodemap->nodes[i].flags & NODE_FLAGS_DISCONNECTED) {
1470 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1471 if (tmp_ip->pnn == nodemap->nodes[i].pnn) {
1472 /* This node should be serving this
1473 vnn so dont tell it to release the ip
1477 if (tmp_ip->addr.sa.sa_family == AF_INET) {
1478 ipv4.pnn = tmp_ip->pnn;
1479 ipv4.sin = tmp_ip->addr.ip;
1481 timeout = TAKEOVER_TIMEOUT();
1482 data.dsize = sizeof(ipv4);
1483 data.dptr = (uint8_t *)&ipv4;
1484 state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
1485 0, CTDB_CONTROL_RELEASE_IPv4, 0,
1489 ip.pnn = tmp_ip->pnn;
1490 ip.addr = tmp_ip->addr;
1492 timeout = TAKEOVER_TIMEOUT();
1493 data.dsize = sizeof(ip);
1494 data.dptr = (uint8_t *)&ip;
1495 state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
1496 0, CTDB_CONTROL_RELEASE_IP, 0,
1501 if (state == NULL) {
1502 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_RELEASE_IP to node %u\n", nodemap->nodes[i].pnn));
1503 talloc_free(tmp_ctx);
1507 ctdb_client_async_add(async_data, state);
1510 if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1511 DEBUG(DEBUG_ERR,(__location__ " Async control CTDB_CONTROL_RELEASE_IP failed\n"));
1512 talloc_free(tmp_ctx);
1515 talloc_free(async_data);
1518 /* tell all nodes to get their own IPs */
1519 async_data = talloc_zero(tmp_ctx, struct client_async_data);
1520 CTDB_NO_MEMORY_FATAL(ctdb, async_data);
1521 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1522 if (tmp_ip->pnn == -1) {
1523 /* this IP won't be taken over */
1527 if (tmp_ip->addr.sa.sa_family == AF_INET) {
1528 ipv4.pnn = tmp_ip->pnn;
1529 ipv4.sin = tmp_ip->addr.ip;
1531 timeout = TAKEOVER_TIMEOUT();
1532 data.dsize = sizeof(ipv4);
1533 data.dptr = (uint8_t *)&ipv4;
1534 state = ctdb_control_send(ctdb, tmp_ip->pnn,
1535 0, CTDB_CONTROL_TAKEOVER_IPv4, 0,
1539 ip.pnn = tmp_ip->pnn;
1540 ip.addr = tmp_ip->addr;
1542 timeout = TAKEOVER_TIMEOUT();
1543 data.dsize = sizeof(ip);
1544 data.dptr = (uint8_t *)&ip;
1545 state = ctdb_control_send(ctdb, tmp_ip->pnn,
1546 0, CTDB_CONTROL_TAKEOVER_IP, 0,
1550 if (state == NULL) {
1551 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_TAKEOVER_IP to node %u\n", tmp_ip->pnn));
1552 talloc_free(tmp_ctx);
1556 ctdb_client_async_add(async_data, state);
1558 if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1559 DEBUG(DEBUG_ERR,(__location__ " Async control CTDB_CONTROL_TAKEOVER_IP failed\n"));
1560 talloc_free(tmp_ctx);
1565 /* tell all nodes to update natwg */
1566 /* send the flags update natgw on all connected nodes */
1567 data.dptr = discard_const("ipreallocated");
1568 data.dsize = strlen((char *)data.dptr) + 1;
1569 nodes = list_of_connected_nodes(ctdb, nodemap, tmp_ctx, true);
1570 if (ctdb_client_async_control(ctdb, CTDB_CONTROL_RUN_EVENTSCRIPTS,
1571 nodes, 0, TAKEOVER_TIMEOUT(),
1575 DEBUG(DEBUG_ERR, (__location__ " ctdb_control to updatenatgw failed\n"));
1578 talloc_free(tmp_ctx);
1584 destroy a ctdb_client_ip structure
1586 static int ctdb_client_ip_destructor(struct ctdb_client_ip *ip)
1588 DEBUG(DEBUG_DEBUG,("destroying client tcp for %s:%u (client_id %u)\n",
1589 ctdb_addr_to_str(&ip->addr),
1590 ntohs(ip->addr.ip.sin_port),
1593 DLIST_REMOVE(ip->ctdb->client_ip_list, ip);
1598 called by a client to inform us of a TCP connection that it is managing
1599 that should tickled with an ACK when IP takeover is done
1600 we handle both the old ipv4 style of packets as well as the new ipv4/6
1603 int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id,
1606 struct ctdb_client *client = ctdb_reqid_find(ctdb, client_id, struct ctdb_client);
1607 struct ctdb_control_tcp *old_addr = NULL;
1608 struct ctdb_control_tcp_addr new_addr;
1609 struct ctdb_control_tcp_addr *tcp_sock = NULL;
1610 struct ctdb_tcp_list *tcp;
1611 struct ctdb_tcp_connection t;
1614 struct ctdb_client_ip *ip;
1615 struct ctdb_vnn *vnn;
1616 ctdb_sock_addr addr;
1618 switch (indata.dsize) {
1619 case sizeof(struct ctdb_control_tcp):
1620 old_addr = (struct ctdb_control_tcp *)indata.dptr;
1621 ZERO_STRUCT(new_addr);
1622 tcp_sock = &new_addr;
1623 tcp_sock->src.ip = old_addr->src;
1624 tcp_sock->dest.ip = old_addr->dest;
1626 case sizeof(struct ctdb_control_tcp_addr):
1627 tcp_sock = (struct ctdb_control_tcp_addr *)indata.dptr;
1630 DEBUG(DEBUG_ERR,(__location__ " Invalid data structure passed "
1631 "to ctdb_control_tcp_client. size was %d but "
1632 "only allowed sizes are %lu and %lu\n",
1634 (long unsigned)sizeof(struct ctdb_control_tcp),
1635 (long unsigned)sizeof(struct ctdb_control_tcp_addr)));
1639 addr = tcp_sock->src;
1640 ctdb_canonicalize_ip(&addr, &tcp_sock->src);
1641 addr = tcp_sock->dest;
1642 ctdb_canonicalize_ip(&addr, &tcp_sock->dest);
1645 memcpy(&addr, &tcp_sock->dest, sizeof(addr));
1646 vnn = find_public_ip_vnn(ctdb, &addr);
1648 switch (addr.sa.sa_family) {
1650 if (ntohl(addr.ip.sin_addr.s_addr) != INADDR_LOOPBACK) {
1651 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public address.\n",
1652 ctdb_addr_to_str(&addr)));
1656 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public ipv6 address.\n",
1657 ctdb_addr_to_str(&addr)));
1660 DEBUG(DEBUG_ERR,(__location__ " Unknown family type %d\n", addr.sa.sa_family));
1666 if (vnn->pnn != ctdb->pnn) {
1667 DEBUG(DEBUG_ERR,("Attempt to register tcp client for IP %s we don't hold - failing (client_id %u pid %u)\n",
1668 ctdb_addr_to_str(&addr),
1669 client_id, client->pid));
1670 /* failing this call will tell smbd to die */
1674 ip = talloc(client, struct ctdb_client_ip);
1675 CTDB_NO_MEMORY(ctdb, ip);
1679 ip->client_id = client_id;
1680 talloc_set_destructor(ip, ctdb_client_ip_destructor);
1681 DLIST_ADD(ctdb->client_ip_list, ip);
1683 tcp = talloc(client, struct ctdb_tcp_list);
1684 CTDB_NO_MEMORY(ctdb, tcp);
1686 tcp->connection.src_addr = tcp_sock->src;
1687 tcp->connection.dst_addr = tcp_sock->dest;
1689 DLIST_ADD(client->tcp_list, tcp);
1691 t.src_addr = tcp_sock->src;
1692 t.dst_addr = tcp_sock->dest;
1694 data.dptr = (uint8_t *)&t;
1695 data.dsize = sizeof(t);
1697 switch (addr.sa.sa_family) {
1699 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1700 (unsigned)ntohs(tcp_sock->dest.ip.sin_port),
1701 ctdb_addr_to_str(&tcp_sock->src),
1702 (unsigned)ntohs(tcp_sock->src.ip.sin_port), client_id, client->pid));
1705 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1706 (unsigned)ntohs(tcp_sock->dest.ip6.sin6_port),
1707 ctdb_addr_to_str(&tcp_sock->src),
1708 (unsigned)ntohs(tcp_sock->src.ip6.sin6_port), client_id, client->pid));
1711 DEBUG(DEBUG_ERR,(__location__ " Unknown family %d\n", addr.sa.sa_family));
1715 /* tell all nodes about this tcp connection */
1716 ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0,
1717 CTDB_CONTROL_TCP_ADD,
1718 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
1720 DEBUG(DEBUG_ERR,(__location__ " Failed to send CTDB_CONTROL_TCP_ADD\n"));
1728 find a tcp address on a list
1730 static struct ctdb_tcp_connection *ctdb_tcp_find(struct ctdb_tcp_array *array,
1731 struct ctdb_tcp_connection *tcp)
1735 if (array == NULL) {
1739 for (i=0;i<array->num;i++) {
1740 if (ctdb_same_sockaddr(&array->connections[i].src_addr, &tcp->src_addr) &&
1741 ctdb_same_sockaddr(&array->connections[i].dst_addr, &tcp->dst_addr)) {
1742 return &array->connections[i];
1751 called by a daemon to inform us of a TCP connection that one of its
1752 clients managing that should tickled with an ACK when IP takeover is
1755 int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata, bool tcp_update_needed)
1757 struct ctdb_tcp_connection *p = (struct ctdb_tcp_connection *)indata.dptr;
1758 struct ctdb_tcp_array *tcparray;
1759 struct ctdb_tcp_connection tcp;
1760 struct ctdb_vnn *vnn;
1762 vnn = find_public_ip_vnn(ctdb, &p->dst_addr);
1764 DEBUG(DEBUG_INFO,(__location__ " got TCP_ADD control for an address which is not a public address '%s'\n",
1765 ctdb_addr_to_str(&p->dst_addr)));
1771 tcparray = vnn->tcp_array;
1773 /* If this is the first tickle */
1774 if (tcparray == NULL) {
1775 tcparray = talloc_size(ctdb->nodes,
1776 offsetof(struct ctdb_tcp_array, connections) +
1777 sizeof(struct ctdb_tcp_connection) * 1);
1778 CTDB_NO_MEMORY(ctdb, tcparray);
1779 vnn->tcp_array = tcparray;
1782 tcparray->connections = talloc_size(tcparray, sizeof(struct ctdb_tcp_connection));
1783 CTDB_NO_MEMORY(ctdb, tcparray->connections);
1785 tcparray->connections[tcparray->num].src_addr = p->src_addr;
1786 tcparray->connections[tcparray->num].dst_addr = p->dst_addr;
1789 if (tcp_update_needed) {
1790 vnn->tcp_update_needed = true;
1796 /* Do we already have this tickle ?*/
1797 tcp.src_addr = p->src_addr;
1798 tcp.dst_addr = p->dst_addr;
1799 if (ctdb_tcp_find(vnn->tcp_array, &tcp) != NULL) {
1800 DEBUG(DEBUG_DEBUG,("Already had tickle info for %s:%u for vnn:%u\n",
1801 ctdb_addr_to_str(&tcp.dst_addr),
1802 ntohs(tcp.dst_addr.ip.sin_port),
1807 /* A new tickle, we must add it to the array */
1808 tcparray->connections = talloc_realloc(tcparray, tcparray->connections,
1809 struct ctdb_tcp_connection,
1811 CTDB_NO_MEMORY(ctdb, tcparray->connections);
1813 vnn->tcp_array = tcparray;
1814 tcparray->connections[tcparray->num].src_addr = p->src_addr;
1815 tcparray->connections[tcparray->num].dst_addr = p->dst_addr;
1818 DEBUG(DEBUG_INFO,("Added tickle info for %s:%u from vnn %u\n",
1819 ctdb_addr_to_str(&tcp.dst_addr),
1820 ntohs(tcp.dst_addr.ip.sin_port),
1823 if (tcp_update_needed) {
1824 vnn->tcp_update_needed = true;
1832 called by a daemon to inform us of a TCP connection that one of its
1833 clients managing that should tickled with an ACK when IP takeover is
1836 static void ctdb_remove_tcp_connection(struct ctdb_context *ctdb, struct ctdb_tcp_connection *conn)
1838 struct ctdb_tcp_connection *tcpp;
1839 struct ctdb_vnn *vnn = find_public_ip_vnn(ctdb, &conn->dst_addr);
1842 DEBUG(DEBUG_ERR,(__location__ " unable to find public address %s\n",
1843 ctdb_addr_to_str(&conn->dst_addr)));
1847 /* if the array is empty we cant remove it
1848 and we dont need to do anything
1850 if (vnn->tcp_array == NULL) {
1851 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist (array is empty) %s:%u\n",
1852 ctdb_addr_to_str(&conn->dst_addr),
1853 ntohs(conn->dst_addr.ip.sin_port)));
1858 /* See if we know this connection
1859 if we dont know this connection then we dont need to do anything
1861 tcpp = ctdb_tcp_find(vnn->tcp_array, conn);
1863 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist %s:%u\n",
1864 ctdb_addr_to_str(&conn->dst_addr),
1865 ntohs(conn->dst_addr.ip.sin_port)));
1870 /* We need to remove this entry from the array.
1871 Instead of allocating a new array and copying data to it
1872 we cheat and just copy the last entry in the existing array
1873 to the entry that is to be removed and just shring the
1876 *tcpp = vnn->tcp_array->connections[vnn->tcp_array->num - 1];
1877 vnn->tcp_array->num--;
1879 /* If we deleted the last entry we also need to remove the entire array
1881 if (vnn->tcp_array->num == 0) {
1882 talloc_free(vnn->tcp_array);
1883 vnn->tcp_array = NULL;
1886 vnn->tcp_update_needed = true;
1888 DEBUG(DEBUG_INFO,("Removed tickle info for %s:%u\n",
1889 ctdb_addr_to_str(&conn->src_addr),
1890 ntohs(conn->src_addr.ip.sin_port)));
1895 called by a daemon to inform us of a TCP connection that one of its
1896 clients used are no longer needed in the tickle database
1898 int32_t ctdb_control_tcp_remove(struct ctdb_context *ctdb, TDB_DATA indata)
1900 struct ctdb_tcp_connection *conn = (struct ctdb_tcp_connection *)indata.dptr;
1902 ctdb_remove_tcp_connection(ctdb, conn);
1909 called when a daemon restarts - send all tickes for all public addresses
1910 we are serving immediately to the new node.
1912 int32_t ctdb_control_startup(struct ctdb_context *ctdb, uint32_t vnn)
1914 /*XXX here we should send all tickes we are serving to the new node */
1920 called when a client structure goes away - hook to remove
1921 elements from the tcp_list in all daemons
1923 void ctdb_takeover_client_destructor_hook(struct ctdb_client *client)
1925 while (client->tcp_list) {
1926 struct ctdb_tcp_list *tcp = client->tcp_list;
1927 DLIST_REMOVE(client->tcp_list, tcp);
1928 ctdb_remove_tcp_connection(client->ctdb, &tcp->connection);
1934 release all IPs on shutdown
1936 void ctdb_release_all_ips(struct ctdb_context *ctdb)
1938 struct ctdb_vnn *vnn;
1940 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1941 if (!ctdb_sys_have_ip(&vnn->public_address)) {
1942 ctdb_vnn_unassign_iface(ctdb, vnn);
1948 ctdb_event_script_args(ctdb, CTDB_EVENT_RELEASE_IP, "%s %s %u",
1949 ctdb_vnn_iface_string(vnn),
1950 ctdb_addr_to_str(&vnn->public_address),
1951 vnn->public_netmask_bits);
1952 release_kill_clients(ctdb, &vnn->public_address);
1953 ctdb_vnn_unassign_iface(ctdb, vnn);
1959 get list of public IPs
1961 int32_t ctdb_control_get_public_ips(struct ctdb_context *ctdb,
1962 struct ctdb_req_control *c, TDB_DATA *outdata)
1965 struct ctdb_all_public_ips *ips;
1966 struct ctdb_vnn *vnn;
1967 bool only_available = false;
1969 if (c->flags & CTDB_PUBLIC_IP_FLAGS_ONLY_AVAILABLE) {
1970 only_available = true;
1973 /* count how many public ip structures we have */
1975 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1979 len = offsetof(struct ctdb_all_public_ips, ips) +
1980 num*sizeof(struct ctdb_public_ip);
1981 ips = talloc_zero_size(outdata, len);
1982 CTDB_NO_MEMORY(ctdb, ips);
1985 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1986 if (only_available && !ctdb_vnn_available(ctdb, vnn)) {
1989 ips->ips[i].pnn = vnn->pnn;
1990 ips->ips[i].addr = vnn->public_address;
1994 len = offsetof(struct ctdb_all_public_ips, ips) +
1995 i*sizeof(struct ctdb_public_ip);
1997 outdata->dsize = len;
1998 outdata->dptr = (uint8_t *)ips;
2005 get list of public IPs, old ipv4 style. only returns ipv4 addresses
2007 int32_t ctdb_control_get_public_ipsv4(struct ctdb_context *ctdb,
2008 struct ctdb_req_control *c, TDB_DATA *outdata)
2011 struct ctdb_all_public_ipsv4 *ips;
2012 struct ctdb_vnn *vnn;
2014 /* count how many public ip structures we have */
2016 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2017 if (vnn->public_address.sa.sa_family != AF_INET) {
2023 len = offsetof(struct ctdb_all_public_ipsv4, ips) +
2024 num*sizeof(struct ctdb_public_ipv4);
2025 ips = talloc_zero_size(outdata, len);
2026 CTDB_NO_MEMORY(ctdb, ips);
2028 outdata->dsize = len;
2029 outdata->dptr = (uint8_t *)ips;
2033 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2034 if (vnn->public_address.sa.sa_family != AF_INET) {
2037 ips->ips[i].pnn = vnn->pnn;
2038 ips->ips[i].sin = vnn->public_address.ip;
2045 int32_t ctdb_control_get_public_ip_info(struct ctdb_context *ctdb,
2046 struct ctdb_req_control *c,
2051 ctdb_sock_addr *addr;
2052 struct ctdb_control_public_ip_info *info;
2053 struct ctdb_vnn *vnn;
2055 addr = (ctdb_sock_addr *)indata.dptr;
2057 vnn = find_public_ip_vnn(ctdb, addr);
2059 /* if it is not a public ip it could be our 'single ip' */
2060 if (ctdb->single_ip_vnn) {
2061 if (ctdb_same_ip(&ctdb->single_ip_vnn->public_address, addr)) {
2062 vnn = ctdb->single_ip_vnn;
2067 DEBUG(DEBUG_ERR,(__location__ " Could not get public ip info, "
2068 "'%s'not a public address\n",
2069 ctdb_addr_to_str(addr)));
2073 /* count how many public ip structures we have */
2075 for (;vnn->ifaces[num];) {
2079 len = offsetof(struct ctdb_control_public_ip_info, ifaces) +
2080 num*sizeof(struct ctdb_control_iface_info);
2081 info = talloc_zero_size(outdata, len);
2082 CTDB_NO_MEMORY(ctdb, info);
2084 info->ip.addr = vnn->public_address;
2085 info->ip.pnn = vnn->pnn;
2086 info->active_idx = 0xFFFFFFFF;
2088 for (i=0; vnn->ifaces[i]; i++) {
2089 struct ctdb_iface *cur;
2091 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
2093 DEBUG(DEBUG_CRIT, (__location__ " internal error iface[%s] unknown\n",
2097 if (vnn->iface == cur) {
2098 info->active_idx = i;
2100 strcpy(info->ifaces[i].name, cur->name);
2101 info->ifaces[i].link_state = cur->link_up;
2102 info->ifaces[i].references = cur->references;
2105 len = offsetof(struct ctdb_control_public_ip_info, ifaces) +
2106 i*sizeof(struct ctdb_control_iface_info);
2108 outdata->dsize = len;
2109 outdata->dptr = (uint8_t *)info;
2114 int32_t ctdb_control_get_ifaces(struct ctdb_context *ctdb,
2115 struct ctdb_req_control *c,
2119 struct ctdb_control_get_ifaces *ifaces;
2120 struct ctdb_iface *cur;
2122 /* count how many public ip structures we have */
2124 for (cur=ctdb->ifaces;cur;cur=cur->next) {
2128 len = offsetof(struct ctdb_control_get_ifaces, ifaces) +
2129 num*sizeof(struct ctdb_control_iface_info);
2130 ifaces = talloc_zero_size(outdata, len);
2131 CTDB_NO_MEMORY(ctdb, ifaces);
2134 for (cur=ctdb->ifaces;cur;cur=cur->next) {
2135 strcpy(ifaces->ifaces[i].name, cur->name);
2136 ifaces->ifaces[i].link_state = cur->link_up;
2137 ifaces->ifaces[i].references = cur->references;
2141 len = offsetof(struct ctdb_control_get_ifaces, ifaces) +
2142 i*sizeof(struct ctdb_control_iface_info);
2144 outdata->dsize = len;
2145 outdata->dptr = (uint8_t *)ifaces;
2150 int32_t ctdb_control_set_iface_link(struct ctdb_context *ctdb,
2151 struct ctdb_req_control *c,
2154 struct ctdb_control_iface_info *info;
2155 struct ctdb_iface *iface;
2156 bool link_up = false;
2158 info = (struct ctdb_control_iface_info *)indata.dptr;
2160 if (info->name[CTDB_IFACE_SIZE] != '\0') {
2161 int len = strnlen(info->name, CTDB_IFACE_SIZE);
2162 DEBUG(DEBUG_ERR, (__location__ " name[%*.*s] not terminated\n",
2163 len, len, info->name));
2167 switch (info->link_state) {
2175 DEBUG(DEBUG_ERR, (__location__ " link_state[%u] invalid\n",
2176 (unsigned int)info->link_state));
2180 if (info->references != 0) {
2181 DEBUG(DEBUG_ERR, (__location__ " references[%u] should be 0\n",
2182 (unsigned int)info->references));
2186 iface = ctdb_find_iface(ctdb, info->name);
2187 if (iface == NULL) {
2188 DEBUG(DEBUG_ERR, (__location__ "iface[%s] is unknown\n",
2193 if (link_up == iface->link_up) {
2197 DEBUG(iface->link_up?DEBUG_ERR:DEBUG_NOTICE,
2198 ("iface[%s] has changed it's link status %s => %s\n",
2200 iface->link_up?"up":"down",
2201 link_up?"up":"down"));
2203 iface->link_up = link_up;
2209 structure containing the listening socket and the list of tcp connections
2210 that the ctdb daemon is to kill
2212 struct ctdb_kill_tcp {
2213 struct ctdb_vnn *vnn;
2214 struct ctdb_context *ctdb;
2216 struct fd_event *fde;
2217 trbt_tree_t *connections;
2222 a tcp connection that is to be killed
2224 struct ctdb_killtcp_con {
2225 ctdb_sock_addr src_addr;
2226 ctdb_sock_addr dst_addr;
2228 struct ctdb_kill_tcp *killtcp;
2231 /* this function is used to create a key to represent this socketpair
2232 in the killtcp tree.
2233 this key is used to insert and lookup matching socketpairs that are
2234 to be tickled and RST
2236 #define KILLTCP_KEYLEN 10
2237 static uint32_t *killtcp_key(ctdb_sock_addr *src, ctdb_sock_addr *dst)
2239 static uint32_t key[KILLTCP_KEYLEN];
2241 bzero(key, sizeof(key));
2243 if (src->sa.sa_family != dst->sa.sa_family) {
2244 DEBUG(DEBUG_ERR, (__location__ " ERROR, different families passed :%u vs %u\n", src->sa.sa_family, dst->sa.sa_family));
2248 switch (src->sa.sa_family) {
2250 key[0] = dst->ip.sin_addr.s_addr;
2251 key[1] = src->ip.sin_addr.s_addr;
2252 key[2] = dst->ip.sin_port;
2253 key[3] = src->ip.sin_port;
2256 key[0] = dst->ip6.sin6_addr.s6_addr32[3];
2257 key[1] = src->ip6.sin6_addr.s6_addr32[3];
2258 key[2] = dst->ip6.sin6_addr.s6_addr32[2];
2259 key[3] = src->ip6.sin6_addr.s6_addr32[2];
2260 key[4] = dst->ip6.sin6_addr.s6_addr32[1];
2261 key[5] = src->ip6.sin6_addr.s6_addr32[1];
2262 key[6] = dst->ip6.sin6_addr.s6_addr32[0];
2263 key[7] = src->ip6.sin6_addr.s6_addr32[0];
2264 key[8] = dst->ip6.sin6_port;
2265 key[9] = src->ip6.sin6_port;
2268 DEBUG(DEBUG_ERR, (__location__ " ERROR, unknown family passed :%u\n", src->sa.sa_family));
2276 called when we get a read event on the raw socket
2278 static void capture_tcp_handler(struct event_context *ev, struct fd_event *fde,
2279 uint16_t flags, void *private_data)
2281 struct ctdb_kill_tcp *killtcp = talloc_get_type(private_data, struct ctdb_kill_tcp);
2282 struct ctdb_killtcp_con *con;
2283 ctdb_sock_addr src, dst;
2284 uint32_t ack_seq, seq;
2286 if (!(flags & EVENT_FD_READ)) {
2290 if (ctdb_sys_read_tcp_packet(killtcp->capture_fd,
2291 killtcp->private_data,
2293 &ack_seq, &seq) != 0) {
2294 /* probably a non-tcp ACK packet */
2298 /* check if we have this guy in our list of connections
2301 con = trbt_lookuparray32(killtcp->connections,
2302 KILLTCP_KEYLEN, killtcp_key(&src, &dst));
2304 /* no this was some other packet we can just ignore */
2308 /* This one has been tickled !
2309 now reset him and remove him from the list.
2311 DEBUG(DEBUG_INFO, ("sending a tcp reset to kill connection :%d -> %s:%d\n",
2312 ntohs(con->dst_addr.ip.sin_port),
2313 ctdb_addr_to_str(&con->src_addr),
2314 ntohs(con->src_addr.ip.sin_port)));
2316 ctdb_sys_send_tcp(&con->dst_addr, &con->src_addr, ack_seq, seq, 1);
2321 /* when traversing the list of all tcp connections to send tickle acks to
2322 (so that we can capture the ack coming back and kill the connection
2324 this callback is called for each connection we are currently trying to kill
2326 static void tickle_connection_traverse(void *param, void *data)
2328 struct ctdb_killtcp_con *con = talloc_get_type(data, struct ctdb_killtcp_con);
2330 /* have tried too many times, just give up */
2331 if (con->count >= 5) {
2332 /* can't delete in traverse: reparent to delete_cons */
2333 talloc_steal(param, con);
2337 /* othervise, try tickling it again */
2340 (ctdb_sock_addr *)&con->dst_addr,
2341 (ctdb_sock_addr *)&con->src_addr,
2347 called every second until all sentenced connections have been reset
2349 static void ctdb_tickle_sentenced_connections(struct event_context *ev, struct timed_event *te,
2350 struct timeval t, void *private_data)
2352 struct ctdb_kill_tcp *killtcp = talloc_get_type(private_data, struct ctdb_kill_tcp);
2353 void *delete_cons = talloc_new(NULL);
2355 /* loop over all connections sending tickle ACKs */
2356 trbt_traversearray32(killtcp->connections, KILLTCP_KEYLEN, tickle_connection_traverse, delete_cons);
2358 /* now we've finished traverse, it's safe to do deletion. */
2359 talloc_free(delete_cons);
2361 /* If there are no more connections to kill we can remove the
2362 entire killtcp structure
2364 if ( (killtcp->connections == NULL) ||
2365 (killtcp->connections->root == NULL) ) {
2366 talloc_free(killtcp);
2370 /* try tickling them again in a seconds time
2372 event_add_timed(killtcp->ctdb->ev, killtcp, timeval_current_ofs(1, 0),
2373 ctdb_tickle_sentenced_connections, killtcp);
2377 destroy the killtcp structure
2379 static int ctdb_killtcp_destructor(struct ctdb_kill_tcp *killtcp)
2382 killtcp->vnn->killtcp = NULL;
2388 /* nothing fancy here, just unconditionally replace any existing
2389 connection structure with the new one.
2391 dont even free the old one if it did exist, that one is talloc_stolen
2392 by the same node in the tree anyway and will be deleted when the new data
2395 static void *add_killtcp_callback(void *parm, void *data)
2401 add a tcp socket to the list of connections we want to RST
2403 static int ctdb_killtcp_add_connection(struct ctdb_context *ctdb,
2407 ctdb_sock_addr src, dst;
2408 struct ctdb_kill_tcp *killtcp;
2409 struct ctdb_killtcp_con *con;
2410 struct ctdb_vnn *vnn;
2412 ctdb_canonicalize_ip(s, &src);
2413 ctdb_canonicalize_ip(d, &dst);
2415 vnn = find_public_ip_vnn(ctdb, &dst);
2417 vnn = find_public_ip_vnn(ctdb, &src);
2420 /* if it is not a public ip it could be our 'single ip' */
2421 if (ctdb->single_ip_vnn) {
2422 if (ctdb_same_ip(&ctdb->single_ip_vnn->public_address, &dst)) {
2423 vnn = ctdb->single_ip_vnn;
2428 DEBUG(DEBUG_ERR,(__location__ " Could not killtcp, not a public address\n"));
2432 killtcp = vnn->killtcp;
2434 /* If this is the first connection to kill we must allocate
2437 if (killtcp == NULL) {
2438 killtcp = talloc_zero(ctdb, struct ctdb_kill_tcp);
2439 CTDB_NO_MEMORY(ctdb, killtcp);
2442 killtcp->ctdb = ctdb;
2443 killtcp->capture_fd = -1;
2444 killtcp->connections = trbt_create(killtcp, 0);
2446 vnn->killtcp = killtcp;
2447 talloc_set_destructor(killtcp, ctdb_killtcp_destructor);
2452 /* create a structure that describes this connection we want to
2453 RST and store it in killtcp->connections
2455 con = talloc(killtcp, struct ctdb_killtcp_con);
2456 CTDB_NO_MEMORY(ctdb, con);
2457 con->src_addr = src;
2458 con->dst_addr = dst;
2460 con->killtcp = killtcp;
2463 trbt_insertarray32_callback(killtcp->connections,
2464 KILLTCP_KEYLEN, killtcp_key(&con->dst_addr, &con->src_addr),
2465 add_killtcp_callback, con);
2468 If we dont have a socket to listen on yet we must create it
2470 if (killtcp->capture_fd == -1) {
2471 const char *iface = ctdb_vnn_iface_string(vnn);
2472 killtcp->capture_fd = ctdb_sys_open_capture_socket(iface, &killtcp->private_data);
2473 if (killtcp->capture_fd == -1) {
2474 DEBUG(DEBUG_CRIT,(__location__ " Failed to open capturing "
2475 "socket on iface '%s' for killtcp (%s)\n",
2476 iface, strerror(errno)));
2482 if (killtcp->fde == NULL) {
2483 killtcp->fde = event_add_fd(ctdb->ev, killtcp, killtcp->capture_fd,
2485 capture_tcp_handler, killtcp);
2486 tevent_fd_set_auto_close(killtcp->fde);
2488 /* We also need to set up some events to tickle all these connections
2489 until they are all reset
2491 event_add_timed(ctdb->ev, killtcp, timeval_current_ofs(1, 0),
2492 ctdb_tickle_sentenced_connections, killtcp);
2495 /* tickle him once now */
2504 talloc_free(vnn->killtcp);
2505 vnn->killtcp = NULL;
2510 kill a TCP connection.
2512 int32_t ctdb_control_kill_tcp(struct ctdb_context *ctdb, TDB_DATA indata)
2514 struct ctdb_control_killtcp *killtcp = (struct ctdb_control_killtcp *)indata.dptr;
2516 return ctdb_killtcp_add_connection(ctdb, &killtcp->src_addr, &killtcp->dst_addr);
2520 called by a daemon to inform us of the entire list of TCP tickles for
2521 a particular public address.
2522 this control should only be sent by the node that is currently serving
2523 that public address.
2525 int32_t ctdb_control_set_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata)
2527 struct ctdb_control_tcp_tickle_list *list = (struct ctdb_control_tcp_tickle_list *)indata.dptr;
2528 struct ctdb_tcp_array *tcparray;
2529 struct ctdb_vnn *vnn;
2531 /* We must at least have tickles.num or else we cant verify the size
2532 of the received data blob
2534 if (indata.dsize < offsetof(struct ctdb_control_tcp_tickle_list,
2535 tickles.connections)) {
2536 DEBUG(DEBUG_ERR,("Bad indata in ctdb_control_set_tcp_tickle_list. Not enough data for the tickle.num field\n"));
2540 /* verify that the size of data matches what we expect */
2541 if (indata.dsize < offsetof(struct ctdb_control_tcp_tickle_list,
2542 tickles.connections)
2543 + sizeof(struct ctdb_tcp_connection)
2544 * list->tickles.num) {
2545 DEBUG(DEBUG_ERR,("Bad indata in ctdb_control_set_tcp_tickle_list\n"));
2549 vnn = find_public_ip_vnn(ctdb, &list->addr);
2551 DEBUG(DEBUG_INFO,(__location__ " Could not set tcp tickle list, '%s' is not a public address\n",
2552 ctdb_addr_to_str(&list->addr)));
2557 /* remove any old ticklelist we might have */
2558 talloc_free(vnn->tcp_array);
2559 vnn->tcp_array = NULL;
2561 tcparray = talloc(ctdb->nodes, struct ctdb_tcp_array);
2562 CTDB_NO_MEMORY(ctdb, tcparray);
2564 tcparray->num = list->tickles.num;
2566 tcparray->connections = talloc_array(tcparray, struct ctdb_tcp_connection, tcparray->num);
2567 CTDB_NO_MEMORY(ctdb, tcparray->connections);
2569 memcpy(tcparray->connections, &list->tickles.connections[0],
2570 sizeof(struct ctdb_tcp_connection)*tcparray->num);
2572 /* We now have a new fresh tickle list array for this vnn */
2573 vnn->tcp_array = talloc_steal(vnn, tcparray);
2579 called to return the full list of tickles for the puclic address associated
2580 with the provided vnn
2582 int32_t ctdb_control_get_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata)
2584 ctdb_sock_addr *addr = (ctdb_sock_addr *)indata.dptr;
2585 struct ctdb_control_tcp_tickle_list *list;
2586 struct ctdb_tcp_array *tcparray;
2588 struct ctdb_vnn *vnn;
2590 vnn = find_public_ip_vnn(ctdb, addr);
2592 DEBUG(DEBUG_ERR,(__location__ " Could not get tcp tickle list, '%s' is not a public address\n",
2593 ctdb_addr_to_str(addr)));
2598 tcparray = vnn->tcp_array;
2600 num = tcparray->num;
2605 outdata->dsize = offsetof(struct ctdb_control_tcp_tickle_list,
2606 tickles.connections)
2607 + sizeof(struct ctdb_tcp_connection) * num;
2609 outdata->dptr = talloc_size(outdata, outdata->dsize);
2610 CTDB_NO_MEMORY(ctdb, outdata->dptr);
2611 list = (struct ctdb_control_tcp_tickle_list *)outdata->dptr;
2614 list->tickles.num = num;
2616 memcpy(&list->tickles.connections[0], tcparray->connections,
2617 sizeof(struct ctdb_tcp_connection) * num);
2625 set the list of all tcp tickles for a public address
2627 static int ctdb_ctrl_set_tcp_tickles(struct ctdb_context *ctdb,
2628 struct timeval timeout, uint32_t destnode,
2629 ctdb_sock_addr *addr,
2630 struct ctdb_tcp_array *tcparray)
2634 struct ctdb_control_tcp_tickle_list *list;
2637 num = tcparray->num;
2642 data.dsize = offsetof(struct ctdb_control_tcp_tickle_list,
2643 tickles.connections) +
2644 sizeof(struct ctdb_tcp_connection) * num;
2645 data.dptr = talloc_size(ctdb, data.dsize);
2646 CTDB_NO_MEMORY(ctdb, data.dptr);
2648 list = (struct ctdb_control_tcp_tickle_list *)data.dptr;
2650 list->tickles.num = num;
2652 memcpy(&list->tickles.connections[0], tcparray->connections, sizeof(struct ctdb_tcp_connection) * num);
2655 ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0,
2656 CTDB_CONTROL_SET_TCP_TICKLE_LIST,
2657 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
2659 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set tcp tickles failed\n"));
2663 talloc_free(data.dptr);
2670 perform tickle updates if required
2672 static void ctdb_update_tcp_tickles(struct event_context *ev,
2673 struct timed_event *te,
2674 struct timeval t, void *private_data)
2676 struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
2678 struct ctdb_vnn *vnn;
2680 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2681 /* we only send out updates for public addresses that
2684 if (ctdb->pnn != vnn->pnn) {
2687 /* We only send out the updates if we need to */
2688 if (!vnn->tcp_update_needed) {
2691 ret = ctdb_ctrl_set_tcp_tickles(ctdb,
2693 CTDB_BROADCAST_CONNECTED,
2694 &vnn->public_address,
2697 DEBUG(DEBUG_ERR,("Failed to send the tickle update for public address %s\n",
2698 ctdb_addr_to_str(&vnn->public_address)));
2702 event_add_timed(ctdb->ev, ctdb->tickle_update_context,
2703 timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
2704 ctdb_update_tcp_tickles, ctdb);
2709 start periodic update of tcp tickles
2711 void ctdb_start_tcp_tickle_update(struct ctdb_context *ctdb)
2713 ctdb->tickle_update_context = talloc_new(ctdb);
2715 event_add_timed(ctdb->ev, ctdb->tickle_update_context,
2716 timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
2717 ctdb_update_tcp_tickles, ctdb);
2723 struct control_gratious_arp {
2724 struct ctdb_context *ctdb;
2725 ctdb_sock_addr addr;
2731 send a control_gratuitous arp
2733 static void send_gratious_arp(struct event_context *ev, struct timed_event *te,
2734 struct timeval t, void *private_data)
2737 struct control_gratious_arp *arp = talloc_get_type(private_data,
2738 struct control_gratious_arp);
2740 ret = ctdb_sys_send_arp(&arp->addr, arp->iface);
2742 DEBUG(DEBUG_ERR,(__location__ " sending of gratious arp on iface '%s' failed (%s)\n",
2743 arp->iface, strerror(errno)));
2748 if (arp->count == CTDB_ARP_REPEAT) {
2753 event_add_timed(arp->ctdb->ev, arp,
2754 timeval_current_ofs(CTDB_ARP_INTERVAL, 0),
2755 send_gratious_arp, arp);
2762 int32_t ctdb_control_send_gratious_arp(struct ctdb_context *ctdb, TDB_DATA indata)
2764 struct ctdb_control_gratious_arp *gratious_arp = (struct ctdb_control_gratious_arp *)indata.dptr;
2765 struct control_gratious_arp *arp;
2767 /* verify the size of indata */
2768 if (indata.dsize < offsetof(struct ctdb_control_gratious_arp, iface)) {
2769 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_gratious_arp structure. Got %u require %u bytes\n",
2770 (unsigned)indata.dsize,
2771 (unsigned)offsetof(struct ctdb_control_gratious_arp, iface)));
2775 ( offsetof(struct ctdb_control_gratious_arp, iface)
2776 + gratious_arp->len ) ){
2778 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2779 "but should be %u bytes\n",
2780 (unsigned)indata.dsize,
2781 (unsigned)(offsetof(struct ctdb_control_gratious_arp, iface)+gratious_arp->len)));
2786 arp = talloc(ctdb, struct control_gratious_arp);
2787 CTDB_NO_MEMORY(ctdb, arp);
2790 arp->addr = gratious_arp->addr;
2791 arp->iface = talloc_strdup(arp, gratious_arp->iface);
2792 CTDB_NO_MEMORY(ctdb, arp->iface);
2795 event_add_timed(arp->ctdb->ev, arp,
2796 timeval_zero(), send_gratious_arp, arp);
2801 int32_t ctdb_control_add_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2803 struct ctdb_control_ip_iface *pub = (struct ctdb_control_ip_iface *)indata.dptr;
2806 /* verify the size of indata */
2807 if (indata.dsize < offsetof(struct ctdb_control_ip_iface, iface)) {
2808 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_ip_iface structure\n"));
2812 ( offsetof(struct ctdb_control_ip_iface, iface)
2815 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2816 "but should be %u bytes\n",
2817 (unsigned)indata.dsize,
2818 (unsigned)(offsetof(struct ctdb_control_ip_iface, iface)+pub->len)));
2822 ret = ctdb_add_public_address(ctdb, &pub->addr, pub->mask, &pub->iface[0]);
2825 DEBUG(DEBUG_ERR,(__location__ " Failed to add public address\n"));
2833 called when releaseip event finishes for del_public_address
2835 static void delete_ip_callback(struct ctdb_context *ctdb, int status,
2838 talloc_free(private_data);
2841 int32_t ctdb_control_del_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2843 struct ctdb_control_ip_iface *pub = (struct ctdb_control_ip_iface *)indata.dptr;
2844 struct ctdb_vnn *vnn;
2847 /* verify the size of indata */
2848 if (indata.dsize < offsetof(struct ctdb_control_ip_iface, iface)) {
2849 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_ip_iface structure\n"));
2853 ( offsetof(struct ctdb_control_ip_iface, iface)
2856 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2857 "but should be %u bytes\n",
2858 (unsigned)indata.dsize,
2859 (unsigned)(offsetof(struct ctdb_control_ip_iface, iface)+pub->len)));
2863 /* walk over all public addresses until we find a match */
2864 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2865 if (ctdb_same_ip(&vnn->public_address, &pub->addr)) {
2866 TALLOC_CTX *mem_ctx;
2868 DLIST_REMOVE(ctdb->vnn, vnn);
2869 if (vnn->iface == NULL) {
2874 mem_ctx = talloc_new(ctdb);
2875 ret = ctdb_event_script_callback(ctdb,
2876 mem_ctx, delete_ip_callback, mem_ctx,
2878 CTDB_EVENT_RELEASE_IP,
2880 ctdb_vnn_iface_string(vnn),
2881 ctdb_addr_to_str(&vnn->public_address),
2882 vnn->public_netmask_bits);
2883 ctdb_vnn_unassign_iface(ctdb, vnn);
2895 /* This function is called from the recovery daemon to verify that a remote
2896 node has the expected ip allocation.
2897 This is verified against ctdb->ip_tree
2899 int verify_remote_ip_allocation(struct ctdb_context *ctdb, struct ctdb_all_public_ips *ips)
2901 struct ctdb_public_ip_list *tmp_ip;
2904 if (ctdb->ip_tree == NULL) {
2905 /* dont know the expected allocation yet, assume remote node
2914 for (i=0; i<ips->num; i++) {
2915 tmp_ip = trbt_lookuparray32(ctdb->ip_tree, IP_KEYLEN, ip_key(&ips->ips[i].addr));
2916 if (tmp_ip == NULL) {
2917 DEBUG(DEBUG_ERR,(__location__ " Could not find host for address %s, reassign ips\n", ctdb_addr_to_str(&ips->ips[i].addr)));
2921 if (tmp_ip->pnn == -1 || ips->ips[i].pnn == -1) {
2925 if (tmp_ip->pnn != ips->ips[i].pnn) {
2926 DEBUG(DEBUG_ERR,("Inconsistent ip allocation. Trigger reallocation. Thinks %s is held by node %u while it is held by node %u\n", ctdb_addr_to_str(&ips->ips[i].addr), ips->ips[i].pnn, tmp_ip->pnn));
2934 int update_ip_assignment_tree(struct ctdb_context *ctdb, struct ctdb_public_ip *ip)
2936 struct ctdb_public_ip_list *tmp_ip;
2938 if (ctdb->ip_tree == NULL) {
2939 DEBUG(DEBUG_ERR,("No ctdb->ip_tree yet. Failed to update ip assignment\n"));
2943 tmp_ip = trbt_lookuparray32(ctdb->ip_tree, IP_KEYLEN, ip_key(&ip->addr));
2944 if (tmp_ip == NULL) {
2945 DEBUG(DEBUG_ERR,(__location__ " Could not find record for address %s, update ip\n", ctdb_addr_to_str(&ip->addr)));
2949 DEBUG(DEBUG_NOTICE,("Updated ip assignment tree for ip : %s from node %u to node %u\n", ctdb_addr_to_str(&ip->addr), tmp_ip->pnn, ip->pnn));
2950 tmp_ip->pnn = ip->pnn;