4 Copyright (C) Ronnie Sahlberg 2007
5 Copyright (C) Andrew Tridgell 2007
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, see <http://www.gnu.org/licenses/>.
21 #include "lib/tevent/tevent.h"
22 #include "lib/tdb/include/tdb.h"
23 #include "lib/util/dlinklist.h"
24 #include "system/network.h"
25 #include "system/filesys.h"
26 #include "system/wait.h"
27 #include "../include/ctdb_private.h"
28 #include "../common/rb_tree.h"
31 #define TAKEOVER_TIMEOUT() timeval_current_ofs(ctdb->tunable.takeover_timeout,0)
33 #define CTDB_ARP_INTERVAL 1
34 #define CTDB_ARP_REPEAT 3
37 struct ctdb_iface *prev, *next;
43 static const char *ctdb_vnn_iface_string(const struct ctdb_vnn *vnn)
46 return vnn->iface->name;
52 static int ctdb_add_local_iface(struct ctdb_context *ctdb, const char *iface)
56 /* Verify that we dont have an entry for this ip yet */
57 for (i=ctdb->ifaces;i;i=i->next) {
58 if (strcmp(i->name, iface) == 0) {
63 /* create a new structure for this interface */
64 i = talloc_zero(ctdb, struct ctdb_iface);
65 CTDB_NO_MEMORY_FATAL(ctdb, i);
66 i->name = talloc_strdup(i, iface);
67 CTDB_NO_MEMORY(ctdb, i->name);
70 DLIST_ADD(ctdb->ifaces, i);
75 static struct ctdb_iface *ctdb_find_iface(struct ctdb_context *ctdb,
80 /* Verify that we dont have an entry for this ip yet */
81 for (i=ctdb->ifaces;i;i=i->next) {
82 if (strcmp(i->name, iface) == 0) {
90 static struct ctdb_iface *ctdb_vnn_best_iface(struct ctdb_context *ctdb,
94 struct ctdb_iface *cur = NULL;
95 struct ctdb_iface *best = NULL;
97 for (i=0; vnn->ifaces[i]; i++) {
99 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
113 if (cur->references < best->references) {
122 static int32_t ctdb_vnn_assign_iface(struct ctdb_context *ctdb,
123 struct ctdb_vnn *vnn)
125 struct ctdb_iface *best = NULL;
128 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
129 "still assigned to iface '%s'\n",
130 ctdb_addr_to_str(&vnn->public_address),
131 ctdb_vnn_iface_string(vnn)));
135 best = ctdb_vnn_best_iface(ctdb, vnn);
137 DEBUG(DEBUG_ERR, (__location__ " public address '%s' "
138 "cannot assign to iface any iface\n",
139 ctdb_addr_to_str(&vnn->public_address)));
145 vnn->pnn = ctdb->pnn;
147 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
148 "now assigned to iface '%s' refs[%d]\n",
149 ctdb_addr_to_str(&vnn->public_address),
150 ctdb_vnn_iface_string(vnn),
155 static void ctdb_vnn_unassign_iface(struct ctdb_context *ctdb,
156 struct ctdb_vnn *vnn)
158 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
159 "now unassigned (old iface '%s' refs[%d])\n",
160 ctdb_addr_to_str(&vnn->public_address),
161 ctdb_vnn_iface_string(vnn),
162 vnn->iface?vnn->iface->references:0));
164 vnn->iface->references--;
167 if (vnn->pnn == ctdb->pnn) {
172 static bool ctdb_vnn_available(struct ctdb_context *ctdb,
173 struct ctdb_vnn *vnn)
177 if (vnn->iface && vnn->iface->link_up) {
181 for (i=0; vnn->ifaces[i]; i++) {
182 struct ctdb_iface *cur;
184 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
197 struct ctdb_takeover_arp {
198 struct ctdb_context *ctdb;
201 struct ctdb_tcp_array *tcparray;
202 struct ctdb_vnn *vnn;
207 lists of tcp endpoints
209 struct ctdb_tcp_list {
210 struct ctdb_tcp_list *prev, *next;
211 struct ctdb_tcp_connection connection;
215 list of clients to kill on IP release
217 struct ctdb_client_ip {
218 struct ctdb_client_ip *prev, *next;
219 struct ctdb_context *ctdb;
226 send a gratuitous arp
228 static void ctdb_control_send_arp(struct event_context *ev, struct timed_event *te,
229 struct timeval t, void *private_data)
231 struct ctdb_takeover_arp *arp = talloc_get_type(private_data,
232 struct ctdb_takeover_arp);
234 struct ctdb_tcp_array *tcparray;
235 const char *iface = ctdb_vnn_iface_string(arp->vnn);
237 ret = ctdb_sys_send_arp(&arp->addr, iface);
239 DEBUG(DEBUG_CRIT,(__location__ " sending of arp failed on iface '%s' (%s)\n",
240 iface, strerror(errno)));
243 tcparray = arp->tcparray;
245 for (i=0;i<tcparray->num;i++) {
246 struct ctdb_tcp_connection *tcon;
248 tcon = &tcparray->connections[i];
249 DEBUG(DEBUG_INFO,("sending tcp tickle ack for %u->%s:%u\n",
250 (unsigned)ntohs(tcon->dst_addr.ip.sin_port),
251 ctdb_addr_to_str(&tcon->src_addr),
252 (unsigned)ntohs(tcon->src_addr.ip.sin_port)));
253 ret = ctdb_sys_send_tcp(
258 DEBUG(DEBUG_CRIT,(__location__ " Failed to send tcp tickle ack for %s\n",
259 ctdb_addr_to_str(&tcon->src_addr)));
266 if (arp->count == CTDB_ARP_REPEAT) {
271 event_add_timed(arp->ctdb->ev, arp->vnn->takeover_ctx,
272 timeval_current_ofs(CTDB_ARP_INTERVAL, 100000),
273 ctdb_control_send_arp, arp);
276 static int32_t ctdb_announce_vnn_iface(struct ctdb_context *ctdb,
277 struct ctdb_vnn *vnn)
279 struct ctdb_takeover_arp *arp;
280 struct ctdb_tcp_array *tcparray;
282 if (!vnn->takeover_ctx) {
283 vnn->takeover_ctx = talloc_new(vnn);
284 if (!vnn->takeover_ctx) {
289 arp = talloc_zero(vnn->takeover_ctx, struct ctdb_takeover_arp);
295 arp->addr = vnn->public_address;
298 tcparray = vnn->tcp_array;
300 /* add all of the known tcp connections for this IP to the
301 list of tcp connections to send tickle acks for */
302 arp->tcparray = talloc_steal(arp, tcparray);
304 vnn->tcp_array = NULL;
305 vnn->tcp_update_needed = true;
308 event_add_timed(arp->ctdb->ev, vnn->takeover_ctx,
309 timeval_zero(), ctdb_control_send_arp, arp);
314 struct takeover_callback_state {
315 struct ctdb_req_control *c;
316 ctdb_sock_addr *addr;
317 struct ctdb_vnn *vnn;
320 struct ctdb_do_takeip_state {
321 struct ctdb_req_control *c;
322 struct ctdb_vnn *vnn;
326 called when takeip event finishes
328 static void ctdb_do_takeip_callback(struct ctdb_context *ctdb, int status,
331 struct ctdb_do_takeip_state *state =
332 talloc_get_type(private_data, struct ctdb_do_takeip_state);
337 if (status == -ETIME) {
340 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
341 ctdb_addr_to_str(&state->vnn->public_address),
342 ctdb_vnn_iface_string(state->vnn)));
343 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
348 ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
350 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
355 data.dptr = (uint8_t *)ctdb_addr_to_str(&state->vnn->public_address);
356 data.dsize = strlen((char *)data.dptr) + 1;
357 DEBUG(DEBUG_INFO,(__location__ " sending TAKE_IP for '%s'\n", data.dptr));
359 ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_TAKE_IP, data);
362 /* the control succeeded */
363 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
369 take over an ip address
371 static int32_t ctdb_do_takeip(struct ctdb_context *ctdb,
372 struct ctdb_req_control *c,
373 struct ctdb_vnn *vnn)
376 struct ctdb_do_takeip_state *state;
378 ret = ctdb_vnn_assign_iface(ctdb, vnn);
380 DEBUG(DEBUG_ERR,("Takeover of IP %s/%u failed to "
381 "assin a usable interface\n",
382 ctdb_addr_to_str(&vnn->public_address),
383 vnn->public_netmask_bits));
387 state = talloc(vnn, struct ctdb_do_takeip_state);
388 CTDB_NO_MEMORY(ctdb, state);
390 state->c = talloc_steal(ctdb, c);
393 DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u on interface %s\n",
394 ctdb_addr_to_str(&vnn->public_address),
395 vnn->public_netmask_bits,
396 ctdb_vnn_iface_string(vnn)));
398 ret = ctdb_event_script_callback(ctdb,
400 ctdb_do_takeip_callback,
405 ctdb_vnn_iface_string(vnn),
406 ctdb_addr_to_str(&vnn->public_address),
407 vnn->public_netmask_bits);
410 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
411 ctdb_addr_to_str(&vnn->public_address),
412 ctdb_vnn_iface_string(vnn)));
420 struct ctdb_do_updateip_state {
421 struct ctdb_req_control *c;
422 struct ctdb_iface *old;
423 struct ctdb_vnn *vnn;
427 called when updateip event finishes
429 static void ctdb_do_updateip_callback(struct ctdb_context *ctdb, int status,
432 struct ctdb_do_updateip_state *state =
433 talloc_get_type(private_data, struct ctdb_do_updateip_state);
437 if (status == -ETIME) {
440 DEBUG(DEBUG_ERR,(__location__ " Failed to move IP %s from interface %s to %s\n",
441 ctdb_addr_to_str(&state->vnn->public_address),
443 ctdb_vnn_iface_string(state->vnn)));
446 * All we can do is reset the old interface
447 * and let the next run fix it
449 ctdb_vnn_unassign_iface(ctdb, state->vnn);
450 state->vnn->iface = state->old;
451 state->vnn->iface->references++;
453 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
458 ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
460 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
465 /* the control succeeded */
466 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
472 update (move) an ip address
474 static int32_t ctdb_do_updateip(struct ctdb_context *ctdb,
475 struct ctdb_req_control *c,
476 struct ctdb_vnn *vnn)
479 struct ctdb_do_updateip_state *state;
480 struct ctdb_iface *old = vnn->iface;
482 ctdb_vnn_unassign_iface(ctdb, vnn);
483 ret = ctdb_vnn_assign_iface(ctdb, vnn);
485 DEBUG(DEBUG_ERR,("update of IP %s/%u failed to "
486 "assin a usable interface (old iface '%s')\n",
487 ctdb_addr_to_str(&vnn->public_address),
488 vnn->public_netmask_bits,
493 state = talloc(vnn, struct ctdb_do_updateip_state);
494 CTDB_NO_MEMORY(ctdb, state);
496 state->c = talloc_steal(ctdb, c);
500 DEBUG(DEBUG_NOTICE,("Update of IP %s/%u from "
501 "interface %s to %s\n",
502 ctdb_addr_to_str(&vnn->public_address),
503 vnn->public_netmask_bits,
505 ctdb_vnn_iface_string(vnn)));
507 ret = ctdb_event_script_callback(ctdb,
509 ctdb_do_updateip_callback,
512 CTDB_EVENT_UPDATE_IP,
515 ctdb_vnn_iface_string(vnn),
516 ctdb_addr_to_str(&vnn->public_address),
517 vnn->public_netmask_bits);
519 DEBUG(DEBUG_ERR,(__location__ " Failed update IP %s from interface %s to %s\n",
520 ctdb_addr_to_str(&vnn->public_address),
521 old->name, ctdb_vnn_iface_string(vnn)));
530 Find the vnn of the node that has a public ip address
531 returns -1 if the address is not known as a public address
533 static struct ctdb_vnn *find_public_ip_vnn(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
535 struct ctdb_vnn *vnn;
537 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
538 if (ctdb_same_ip(&vnn->public_address, addr)) {
547 take over an ip address
549 int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb,
550 struct ctdb_req_control *c,
555 struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
556 struct ctdb_vnn *vnn;
557 bool have_ip = false;
558 bool do_updateip = false;
559 bool do_takeip = false;
560 struct ctdb_iface *best_iface = NULL;
562 if (pip->pnn != ctdb->pnn) {
563 DEBUG(DEBUG_ERR,(__location__" takeoverip called for an ip '%s' "
564 "with pnn %d, but we're node %d\n",
565 ctdb_addr_to_str(&pip->addr),
566 pip->pnn, ctdb->pnn));
570 /* update out vnn list */
571 vnn = find_public_ip_vnn(ctdb, &pip->addr);
573 DEBUG(DEBUG_INFO,("takeoverip called for an ip '%s' that is not a public address\n",
574 ctdb_addr_to_str(&pip->addr)));
578 have_ip = ctdb_sys_have_ip(&pip->addr);
579 best_iface = ctdb_vnn_best_iface(ctdb, vnn);
580 if (best_iface == NULL) {
581 DEBUG(DEBUG_ERR,("takeoverip of IP %s/%u failed to find"
582 "a usable interface (old %s, have_ip %d)\n",
583 ctdb_addr_to_str(&vnn->public_address),
584 vnn->public_netmask_bits,
585 ctdb_vnn_iface_string(vnn),
590 if (vnn->iface == NULL && vnn->pnn == -1 && have_ip && best_iface != NULL) {
591 DEBUG(DEBUG_ERR,("Taking over newly created ip\n"));
595 if (vnn->iface == NULL && have_ip) {
596 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
597 "but we have no interface assigned, has someone manually configured it?"
599 ctdb_addr_to_str(&vnn->public_address)));
604 if (vnn->pnn != ctdb->pnn && have_ip && vnn->pnn != -1) {
605 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
606 "and we have it on iface[%s], but it was assigned to node %d"
607 "and we are node %d, banning ourself\n",
608 ctdb_addr_to_str(&vnn->public_address),
609 ctdb_vnn_iface_string(vnn), vnn->pnn, ctdb->pnn));
614 if (vnn->pnn == -1 && have_ip) {
615 vnn->pnn = ctdb->pnn;
616 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
617 "and we already have it on iface[%s], update local daemon\n",
618 ctdb_addr_to_str(&vnn->public_address),
619 ctdb_vnn_iface_string(vnn)));
624 if (vnn->iface->link_up) {
625 /* only move when the rebalance gains something */
626 if (vnn->iface->references > (best_iface->references + 1)) {
629 } else if (vnn->iface != best_iface) {
636 ctdb_vnn_unassign_iface(ctdb, vnn);
643 ret = ctdb_do_takeip(ctdb, c, vnn);
647 } else if (do_updateip) {
648 ret = ctdb_do_updateip(ctdb, c, vnn);
654 * The interface is up and the kernel known the ip
657 DEBUG(DEBUG_INFO,("Redundant takeover of IP %s/%u on interface %s (ip already held)\n",
658 ctdb_addr_to_str(&pip->addr),
659 vnn->public_netmask_bits,
660 ctdb_vnn_iface_string(vnn)));
664 /* tell ctdb_control.c that we will be replying asynchronously */
671 takeover an ip address old v4 style
673 int32_t ctdb_control_takeover_ipv4(struct ctdb_context *ctdb,
674 struct ctdb_req_control *c,
680 data.dsize = sizeof(struct ctdb_public_ip);
681 data.dptr = (uint8_t *)talloc_zero(c, struct ctdb_public_ip);
682 CTDB_NO_MEMORY(ctdb, data.dptr);
684 memcpy(data.dptr, indata.dptr, indata.dsize);
685 return ctdb_control_takeover_ip(ctdb, c, data, async_reply);
689 kill any clients that are registered with a IP that is being released
691 static void release_kill_clients(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
693 struct ctdb_client_ip *ip;
695 DEBUG(DEBUG_INFO,("release_kill_clients for ip %s\n",
696 ctdb_addr_to_str(addr)));
698 for (ip=ctdb->client_ip_list; ip; ip=ip->next) {
699 ctdb_sock_addr tmp_addr;
702 DEBUG(DEBUG_INFO,("checking for client %u with IP %s\n",
704 ctdb_addr_to_str(&ip->addr)));
706 if (ctdb_same_ip(&tmp_addr, addr)) {
707 struct ctdb_client *client = ctdb_reqid_find(ctdb,
710 DEBUG(DEBUG_INFO,("matched client %u with IP %s and pid %u\n",
712 ctdb_addr_to_str(&ip->addr),
715 if (client->pid != 0) {
716 DEBUG(DEBUG_INFO,(__location__ " Killing client pid %u for IP %s on client_id %u\n",
717 (unsigned)client->pid,
718 ctdb_addr_to_str(addr),
720 kill(client->pid, SIGKILL);
727 called when releaseip event finishes
729 static void release_ip_callback(struct ctdb_context *ctdb, int status,
732 struct takeover_callback_state *state =
733 talloc_get_type(private_data, struct takeover_callback_state);
736 if (status == -ETIME) {
740 /* send a message to all clients of this node telling them
741 that the cluster has been reconfigured and they should
742 release any sockets on this IP */
743 data.dptr = (uint8_t *)talloc_strdup(state, ctdb_addr_to_str(state->addr));
744 CTDB_NO_MEMORY_VOID(ctdb, data.dptr);
745 data.dsize = strlen((char *)data.dptr)+1;
747 DEBUG(DEBUG_INFO,(__location__ " sending RELEASE_IP for '%s'\n", data.dptr));
749 ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_RELEASE_IP, data);
751 /* kill clients that have registered with this IP */
752 release_kill_clients(ctdb, state->addr);
754 ctdb_vnn_unassign_iface(ctdb, state->vnn);
756 /* the control succeeded */
757 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
762 release an ip address
764 int32_t ctdb_control_release_ip(struct ctdb_context *ctdb,
765 struct ctdb_req_control *c,
770 struct takeover_callback_state *state;
771 struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
772 struct ctdb_vnn *vnn;
774 /* update our vnn list */
775 vnn = find_public_ip_vnn(ctdb, &pip->addr);
777 DEBUG(DEBUG_INFO,("releaseip called for an ip '%s' that is not a public address\n",
778 ctdb_addr_to_str(&pip->addr)));
783 /* stop any previous arps */
784 talloc_free(vnn->takeover_ctx);
785 vnn->takeover_ctx = NULL;
787 if (!ctdb_sys_have_ip(&pip->addr)) {
788 DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u on interface %s (ip not held)\n",
789 ctdb_addr_to_str(&pip->addr),
790 vnn->public_netmask_bits,
791 ctdb_vnn_iface_string(vnn)));
792 ctdb_vnn_unassign_iface(ctdb, vnn);
796 if (vnn->iface == NULL) {
797 DEBUG(DEBUG_CRIT,(__location__ " release_ip of IP %s is known to the kernel, "
798 "but we have no interface assigned, has someone manually configured it?"
800 ctdb_addr_to_str(&vnn->public_address)));
805 DEBUG(DEBUG_NOTICE,("Release of IP %s/%u on interface %s node:%d\n",
806 ctdb_addr_to_str(&pip->addr),
807 vnn->public_netmask_bits,
808 ctdb_vnn_iface_string(vnn),
811 state = talloc(ctdb, struct takeover_callback_state);
812 CTDB_NO_MEMORY(ctdb, state);
814 state->c = talloc_steal(state, c);
815 state->addr = talloc(state, ctdb_sock_addr);
816 CTDB_NO_MEMORY(ctdb, state->addr);
817 *state->addr = pip->addr;
820 ret = ctdb_event_script_callback(ctdb,
821 state, release_ip_callback, state,
823 CTDB_EVENT_RELEASE_IP,
825 ctdb_vnn_iface_string(vnn),
826 ctdb_addr_to_str(&pip->addr),
827 vnn->public_netmask_bits);
829 DEBUG(DEBUG_ERR,(__location__ " Failed to release IP %s on interface %s\n",
830 ctdb_addr_to_str(&pip->addr),
831 ctdb_vnn_iface_string(vnn)));
836 /* tell the control that we will be reply asynchronously */
842 release an ip address old v4 style
844 int32_t ctdb_control_release_ipv4(struct ctdb_context *ctdb,
845 struct ctdb_req_control *c,
851 data.dsize = sizeof(struct ctdb_public_ip);
852 data.dptr = (uint8_t *)talloc_zero(c, struct ctdb_public_ip);
853 CTDB_NO_MEMORY(ctdb, data.dptr);
855 memcpy(data.dptr, indata.dptr, indata.dsize);
856 return ctdb_control_release_ip(ctdb, c, data, async_reply);
860 static int ctdb_add_public_address(struct ctdb_context *ctdb,
861 ctdb_sock_addr *addr,
862 unsigned mask, const char *ifaces)
864 struct ctdb_vnn *vnn;
871 /* Verify that we dont have an entry for this ip yet */
872 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
873 if (ctdb_same_sockaddr(addr, &vnn->public_address)) {
874 DEBUG(DEBUG_CRIT,("Same ip '%s' specified multiple times in the public address list \n",
875 ctdb_addr_to_str(addr)));
880 /* create a new vnn structure for this ip address */
881 vnn = talloc_zero(ctdb, struct ctdb_vnn);
882 CTDB_NO_MEMORY_FATAL(ctdb, vnn);
883 vnn->ifaces = talloc_array(vnn, const char *, num + 2);
884 tmp = talloc_strdup(vnn, ifaces);
885 CTDB_NO_MEMORY_FATAL(ctdb, tmp);
886 for (iface = strtok(tmp, ","); iface; iface = strtok(NULL, ",")) {
887 vnn->ifaces = talloc_realloc(vnn, vnn->ifaces, const char *, num + 2);
888 CTDB_NO_MEMORY_FATAL(ctdb, vnn->ifaces);
889 vnn->ifaces[num] = talloc_strdup(vnn, iface);
890 CTDB_NO_MEMORY_FATAL(ctdb, vnn->ifaces[num]);
894 vnn->ifaces[num] = NULL;
895 vnn->public_address = *addr;
896 vnn->public_netmask_bits = mask;
898 if (ctdb_sys_have_ip(addr)) {
899 DEBUG(DEBUG_ERR,("We are already hosting public address '%s'. setting PNN to ourself:%d\n", ctdb_addr_to_str(addr), ctdb->pnn));
900 vnn->pnn = ctdb->pnn;
903 for (i=0; vnn->ifaces[i]; i++) {
904 ret = ctdb_add_local_iface(ctdb, vnn->ifaces[i]);
906 DEBUG(DEBUG_CRIT, (__location__ " failed to add iface[%s] "
907 "for public_address[%s]\n",
908 vnn->ifaces[i], ctdb_addr_to_str(addr)));
913 vnn->iface = ctdb_find_iface(ctdb, vnn->ifaces[i]);
917 DLIST_ADD(ctdb->vnn, vnn);
923 setup the event script directory
925 int ctdb_set_event_script_dir(struct ctdb_context *ctdb, const char *script_dir)
927 ctdb->event_script_dir = talloc_strdup(ctdb, script_dir);
928 CTDB_NO_MEMORY(ctdb, ctdb->event_script_dir);
933 setup the public address lists from a file
935 int ctdb_set_public_addresses(struct ctdb_context *ctdb, const char *alist)
941 lines = file_lines_load(alist, &nlines, ctdb);
943 ctdb_set_error(ctdb, "Failed to load public address list '%s'\n", alist);
946 while (nlines > 0 && strcmp(lines[nlines-1], "") == 0) {
950 for (i=0;i<nlines;i++) {
958 while ((*line == ' ') || (*line == '\t')) {
964 if (strcmp(line, "") == 0) {
967 tok = strtok(line, " \t");
969 tok = strtok(NULL, " \t");
971 if (NULL == ctdb->default_public_interface) {
972 DEBUG(DEBUG_CRIT,("No default public interface and no interface specified at line %u of public address list\n",
977 ifaces = ctdb->default_public_interface;
982 if (!addrstr || !parse_ip_mask(addrstr, ifaces, &addr, &mask)) {
983 DEBUG(DEBUG_CRIT,("Badly formed line %u in public address list\n", i+1));
987 if (ctdb_add_public_address(ctdb, &addr, mask, ifaces)) {
988 DEBUG(DEBUG_CRIT,("Failed to add line %u to the public address list\n", i+1));
998 int ctdb_set_single_public_ip(struct ctdb_context *ctdb,
1002 struct ctdb_vnn *svnn;
1003 struct ctdb_iface *cur = NULL;
1007 svnn = talloc_zero(ctdb, struct ctdb_vnn);
1008 CTDB_NO_MEMORY(ctdb, svnn);
1010 svnn->ifaces = talloc_array(svnn, const char *, 2);
1011 CTDB_NO_MEMORY(ctdb, svnn->ifaces);
1012 svnn->ifaces[0] = talloc_strdup(svnn->ifaces, iface);
1013 CTDB_NO_MEMORY(ctdb, svnn->ifaces[0]);
1014 svnn->ifaces[1] = NULL;
1016 ok = parse_ip(ip, iface, 0, &svnn->public_address);
1022 ret = ctdb_add_local_iface(ctdb, svnn->ifaces[0]);
1024 DEBUG(DEBUG_CRIT, (__location__ " failed to add iface[%s] "
1025 "for single_ip[%s]\n",
1027 ctdb_addr_to_str(&svnn->public_address)));
1032 /* assume the single public ip interface is initially "good" */
1033 cur = ctdb_find_iface(ctdb, iface);
1035 DEBUG(DEBUG_CRIT,("Can not find public interface %s used by --single-public-ip", iface));
1038 cur->link_up = true;
1040 ret = ctdb_vnn_assign_iface(ctdb, svnn);
1046 ctdb->single_ip_vnn = svnn;
1050 struct ctdb_public_ip_list {
1051 struct ctdb_public_ip_list *next;
1053 ctdb_sock_addr addr;
1057 /* Given a physical node, return the number of
1058 public addresses that is currently assigned to this node.
1060 static int node_ip_coverage(struct ctdb_context *ctdb,
1062 struct ctdb_public_ip_list *ips)
1066 for (;ips;ips=ips->next) {
1067 if (ips->pnn == pnn) {
1075 /* Check if this is a public ip known to the node, i.e. can that
1076 node takeover this ip ?
1078 static int can_node_serve_ip(struct ctdb_context *ctdb, int32_t pnn,
1079 struct ctdb_public_ip_list *ip)
1081 struct ctdb_all_public_ips *public_ips;
1084 public_ips = ctdb->nodes[pnn]->available_public_ips;
1086 if (public_ips == NULL) {
1090 for (i=0;i<public_ips->num;i++) {
1091 if (ctdb_same_ip(&ip->addr, &public_ips->ips[i].addr)) {
1092 /* yes, this node can serve this public ip */
1101 /* search the node lists list for a node to takeover this ip.
1102 pick the node that currently are serving the least number of ips
1103 so that the ips get spread out evenly.
1105 static int find_takeover_node(struct ctdb_context *ctdb,
1106 struct ctdb_node_map *nodemap, uint32_t mask,
1107 struct ctdb_public_ip_list *ip,
1108 struct ctdb_public_ip_list *all_ips)
1110 int pnn, min=0, num;
1114 for (i=0;i<nodemap->num;i++) {
1115 if (nodemap->nodes[i].flags & mask) {
1116 /* This node is not healty and can not be used to serve
1122 /* verify that this node can serve this ip */
1123 if (can_node_serve_ip(ctdb, i, ip)) {
1124 /* no it couldnt so skip to the next node */
1128 num = node_ip_coverage(ctdb, i, all_ips);
1129 /* was this the first node we checked ? */
1141 DEBUG(DEBUG_WARNING,(__location__ " Could not find node to take over public address '%s'\n",
1142 ctdb_addr_to_str(&ip->addr)));
1152 static uint32_t *ip_key(ctdb_sock_addr *ip)
1154 static uint32_t key[IP_KEYLEN];
1156 bzero(key, sizeof(key));
1158 switch (ip->sa.sa_family) {
1160 key[3] = htonl(ip->ip.sin_addr.s_addr);
1163 key[0] = htonl(ip->ip6.sin6_addr.s6_addr32[0]);
1164 key[1] = htonl(ip->ip6.sin6_addr.s6_addr32[1]);
1165 key[2] = htonl(ip->ip6.sin6_addr.s6_addr32[2]);
1166 key[3] = htonl(ip->ip6.sin6_addr.s6_addr32[3]);
1169 DEBUG(DEBUG_ERR, (__location__ " ERROR, unknown family passed :%u\n", ip->sa.sa_family));
1176 static void *add_ip_callback(void *parm, void *data)
1178 struct ctdb_public_ip_list *this_ip = parm;
1179 struct ctdb_public_ip_list *prev_ip = data;
1181 if (prev_ip == NULL) {
1184 if (this_ip->pnn == -1) {
1185 this_ip->pnn = prev_ip->pnn;
1191 void getips_count_callback(void *param, void *data)
1193 struct ctdb_public_ip_list **ip_list = (struct ctdb_public_ip_list **)param;
1194 struct ctdb_public_ip_list *new_ip = (struct ctdb_public_ip_list *)data;
1196 new_ip->next = *ip_list;
1200 static struct ctdb_public_ip_list *
1201 create_merged_ip_list(struct ctdb_context *ctdb)
1204 struct ctdb_public_ip_list *ip_list;
1205 struct ctdb_all_public_ips *public_ips;
1207 if (ctdb->ip_tree != NULL) {
1208 talloc_free(ctdb->ip_tree);
1209 ctdb->ip_tree = NULL;
1211 ctdb->ip_tree = trbt_create(ctdb, 0);
1213 for (i=0;i<ctdb->num_nodes;i++) {
1214 public_ips = ctdb->nodes[i]->known_public_ips;
1216 if (ctdb->nodes[i]->flags & NODE_FLAGS_DELETED) {
1220 /* there were no public ips for this node */
1221 if (public_ips == NULL) {
1225 for (j=0;j<public_ips->num;j++) {
1226 struct ctdb_public_ip_list *tmp_ip;
1228 tmp_ip = talloc_zero(ctdb->ip_tree, struct ctdb_public_ip_list);
1229 CTDB_NO_MEMORY_NULL(ctdb, tmp_ip);
1230 tmp_ip->pnn = public_ips->ips[j].pnn;
1231 tmp_ip->addr = public_ips->ips[j].addr;
1232 tmp_ip->next = NULL;
1234 trbt_insertarray32_callback(ctdb->ip_tree,
1235 IP_KEYLEN, ip_key(&public_ips->ips[j].addr),
1242 trbt_traversearray32(ctdb->ip_tree, IP_KEYLEN, getips_count_callback, &ip_list);
1248 make any IP alias changes for public addresses that are necessary
1250 int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap)
1252 int i, num_healthy, retries;
1253 struct ctdb_public_ip ip;
1254 struct ctdb_public_ipv4 ipv4;
1255 uint32_t mask, *nodes;
1256 struct ctdb_public_ip_list *all_ips, *tmp_ip;
1257 int maxnode, maxnum=0, minnode, minnum=0, num;
1259 struct timeval timeout;
1260 struct client_async_data *async_data;
1261 struct ctdb_client_control_state *state;
1262 TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
1265 * ip failover is completely disabled, just send out the
1266 * ipreallocated event.
1268 if (ctdb->tunable.disable_ip_failover != 0) {
1274 /* Count how many completely healthy nodes we have */
1276 for (i=0;i<nodemap->num;i++) {
1277 if (!(nodemap->nodes[i].flags & (NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED))) {
1282 if (num_healthy > 0) {
1283 /* We have healthy nodes, so only consider them for
1284 serving public addresses
1286 mask = NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED;
1288 /* We didnt have any completely healthy nodes so
1289 use "disabled" nodes as a fallback
1291 mask = NODE_FLAGS_INACTIVE;
1294 /* since nodes only know about those public addresses that
1295 can be served by that particular node, no single node has
1296 a full list of all public addresses that exist in the cluster.
1297 Walk over all node structures and create a merged list of
1298 all public addresses that exist in the cluster.
1300 keep the tree of ips around as ctdb->ip_tree
1302 all_ips = create_merged_ip_list(ctdb);
1304 /* If we want deterministic ip allocations, i.e. that the ip addresses
1305 will always be allocated the same way for a specific set of
1306 available/unavailable nodes.
1308 if (1 == ctdb->tunable.deterministic_public_ips) {
1309 DEBUG(DEBUG_NOTICE,("Deterministic IPs enabled. Resetting all ip allocations\n"));
1310 for (i=0,tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next,i++) {
1311 tmp_ip->pnn = i%nodemap->num;
1316 /* mark all public addresses with a masked node as being served by
1319 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1320 if (tmp_ip->pnn == -1) {
1323 if (nodemap->nodes[tmp_ip->pnn].flags & mask) {
1328 /* verify that the assigned nodes can serve that public ip
1329 and set it to -1 if not
1331 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1332 if (tmp_ip->pnn == -1) {
1335 if (can_node_serve_ip(ctdb, tmp_ip->pnn, tmp_ip) != 0) {
1336 /* this node can not serve this ip. */
1342 /* now we must redistribute all public addresses with takeover node
1343 -1 among the nodes available
1347 /* loop over all ip's and find a physical node to cover for
1350 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1351 if (tmp_ip->pnn == -1) {
1352 if (find_takeover_node(ctdb, nodemap, mask, tmp_ip, all_ips)) {
1353 DEBUG(DEBUG_WARNING,("Failed to find node to cover ip %s\n",
1354 ctdb_addr_to_str(&tmp_ip->addr)));
1359 /* If we dont want ips to fail back after a node becomes healthy
1360 again, we wont even try to reallocat the ip addresses so that
1361 they are evenly spread out.
1362 This can NOT be used at the same time as DeterministicIPs !
1364 if (1 == ctdb->tunable.no_ip_failback) {
1365 if (1 == ctdb->tunable.deterministic_public_ips) {
1366 DEBUG(DEBUG_ERR, ("ERROR: You can not use 'DeterministicIPs' and 'NoIPFailback' at the same time\n"));
1372 /* now, try to make sure the ip adresses are evenly distributed
1374 for each ip address, loop over all nodes that can serve this
1375 ip and make sure that the difference between the node
1376 serving the most and the node serving the least ip's are not greater
1379 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1380 if (tmp_ip->pnn == -1) {
1384 /* Get the highest and lowest number of ips's served by any
1385 valid node which can serve this ip.
1389 for (i=0;i<nodemap->num;i++) {
1390 if (nodemap->nodes[i].flags & mask) {
1394 /* only check nodes that can actually serve this ip */
1395 if (can_node_serve_ip(ctdb, i, tmp_ip)) {
1396 /* no it couldnt so skip to the next node */
1400 num = node_ip_coverage(ctdb, i, all_ips);
1401 if (maxnode == -1) {
1410 if (minnode == -1) {
1420 if (maxnode == -1) {
1421 DEBUG(DEBUG_WARNING,(__location__ " Could not find maxnode. May not be able to serve ip '%s'\n",
1422 ctdb_addr_to_str(&tmp_ip->addr)));
1427 /* If we want deterministic IPs then dont try to reallocate
1428 them to spread out the load.
1430 if (1 == ctdb->tunable.deterministic_public_ips) {
1434 /* if the spread between the smallest and largest coverage by
1435 a node is >=2 we steal one of the ips from the node with
1436 most coverage to even things out a bit.
1437 try to do this at most 5 times since we dont want to spend
1438 too much time balancing the ip coverage.
1440 if ( (maxnum > minnum+1)
1442 struct ctdb_public_ip_list *tmp;
1444 /* mark one of maxnode's vnn's as unassigned and try
1447 for (tmp=all_ips;tmp;tmp=tmp->next) {
1448 if (tmp->pnn == maxnode) {
1458 /* finished distributing the public addresses, now just send the
1459 info out to the nodes
1463 /* at this point ->pnn is the node which will own each IP
1464 or -1 if there is no node that can cover this ip
1467 /* now tell all nodes to delete any alias that they should not
1468 have. This will be a NOOP on nodes that don't currently
1469 hold the given alias */
1470 async_data = talloc_zero(tmp_ctx, struct client_async_data);
1471 CTDB_NO_MEMORY_FATAL(ctdb, async_data);
1473 for (i=0;i<nodemap->num;i++) {
1474 /* don't talk to unconnected nodes, but do talk to banned nodes */
1475 if (nodemap->nodes[i].flags & NODE_FLAGS_DISCONNECTED) {
1479 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1480 if (tmp_ip->pnn == nodemap->nodes[i].pnn) {
1481 /* This node should be serving this
1482 vnn so dont tell it to release the ip
1486 if (tmp_ip->addr.sa.sa_family == AF_INET) {
1487 ipv4.pnn = tmp_ip->pnn;
1488 ipv4.sin = tmp_ip->addr.ip;
1490 timeout = TAKEOVER_TIMEOUT();
1491 data.dsize = sizeof(ipv4);
1492 data.dptr = (uint8_t *)&ipv4;
1493 state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
1494 0, CTDB_CONTROL_RELEASE_IPv4, 0,
1498 ip.pnn = tmp_ip->pnn;
1499 ip.addr = tmp_ip->addr;
1501 timeout = TAKEOVER_TIMEOUT();
1502 data.dsize = sizeof(ip);
1503 data.dptr = (uint8_t *)&ip;
1504 state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
1505 0, CTDB_CONTROL_RELEASE_IP, 0,
1510 if (state == NULL) {
1511 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_RELEASE_IP to node %u\n", nodemap->nodes[i].pnn));
1512 talloc_free(tmp_ctx);
1516 ctdb_client_async_add(async_data, state);
1519 if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1520 DEBUG(DEBUG_ERR,(__location__ " Async control CTDB_CONTROL_RELEASE_IP failed\n"));
1521 talloc_free(tmp_ctx);
1524 talloc_free(async_data);
1527 /* tell all nodes to get their own IPs */
1528 async_data = talloc_zero(tmp_ctx, struct client_async_data);
1529 CTDB_NO_MEMORY_FATAL(ctdb, async_data);
1530 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1531 if (tmp_ip->pnn == -1) {
1532 /* this IP won't be taken over */
1536 if (tmp_ip->addr.sa.sa_family == AF_INET) {
1537 ipv4.pnn = tmp_ip->pnn;
1538 ipv4.sin = tmp_ip->addr.ip;
1540 timeout = TAKEOVER_TIMEOUT();
1541 data.dsize = sizeof(ipv4);
1542 data.dptr = (uint8_t *)&ipv4;
1543 state = ctdb_control_send(ctdb, tmp_ip->pnn,
1544 0, CTDB_CONTROL_TAKEOVER_IPv4, 0,
1548 ip.pnn = tmp_ip->pnn;
1549 ip.addr = tmp_ip->addr;
1551 timeout = TAKEOVER_TIMEOUT();
1552 data.dsize = sizeof(ip);
1553 data.dptr = (uint8_t *)&ip;
1554 state = ctdb_control_send(ctdb, tmp_ip->pnn,
1555 0, CTDB_CONTROL_TAKEOVER_IP, 0,
1559 if (state == NULL) {
1560 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_TAKEOVER_IP to node %u\n", tmp_ip->pnn));
1561 talloc_free(tmp_ctx);
1565 ctdb_client_async_add(async_data, state);
1567 if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1568 DEBUG(DEBUG_ERR,(__location__ " Async control CTDB_CONTROL_TAKEOVER_IP failed\n"));
1569 talloc_free(tmp_ctx);
1574 /* tell all nodes to update natwg */
1575 /* send the flags update natgw on all connected nodes */
1576 data.dptr = discard_const("ipreallocated");
1577 data.dsize = strlen((char *)data.dptr) + 1;
1578 nodes = list_of_connected_nodes(ctdb, nodemap, tmp_ctx, true);
1579 if (ctdb_client_async_control(ctdb, CTDB_CONTROL_RUN_EVENTSCRIPTS,
1580 nodes, 0, TAKEOVER_TIMEOUT(),
1584 DEBUG(DEBUG_ERR, (__location__ " ctdb_control to updatenatgw failed\n"));
1587 talloc_free(tmp_ctx);
1593 destroy a ctdb_client_ip structure
1595 static int ctdb_client_ip_destructor(struct ctdb_client_ip *ip)
1597 DEBUG(DEBUG_DEBUG,("destroying client tcp for %s:%u (client_id %u)\n",
1598 ctdb_addr_to_str(&ip->addr),
1599 ntohs(ip->addr.ip.sin_port),
1602 DLIST_REMOVE(ip->ctdb->client_ip_list, ip);
1607 called by a client to inform us of a TCP connection that it is managing
1608 that should tickled with an ACK when IP takeover is done
1609 we handle both the old ipv4 style of packets as well as the new ipv4/6
1612 int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id,
1615 struct ctdb_client *client = ctdb_reqid_find(ctdb, client_id, struct ctdb_client);
1616 struct ctdb_control_tcp *old_addr = NULL;
1617 struct ctdb_control_tcp_addr new_addr;
1618 struct ctdb_control_tcp_addr *tcp_sock = NULL;
1619 struct ctdb_tcp_list *tcp;
1620 struct ctdb_tcp_connection t;
1623 struct ctdb_client_ip *ip;
1624 struct ctdb_vnn *vnn;
1625 ctdb_sock_addr addr;
1627 switch (indata.dsize) {
1628 case sizeof(struct ctdb_control_tcp):
1629 old_addr = (struct ctdb_control_tcp *)indata.dptr;
1630 ZERO_STRUCT(new_addr);
1631 tcp_sock = &new_addr;
1632 tcp_sock->src.ip = old_addr->src;
1633 tcp_sock->dest.ip = old_addr->dest;
1635 case sizeof(struct ctdb_control_tcp_addr):
1636 tcp_sock = (struct ctdb_control_tcp_addr *)indata.dptr;
1639 DEBUG(DEBUG_ERR,(__location__ " Invalid data structure passed "
1640 "to ctdb_control_tcp_client. size was %d but "
1641 "only allowed sizes are %lu and %lu\n",
1643 (long unsigned)sizeof(struct ctdb_control_tcp),
1644 (long unsigned)sizeof(struct ctdb_control_tcp_addr)));
1648 addr = tcp_sock->src;
1649 ctdb_canonicalize_ip(&addr, &tcp_sock->src);
1650 addr = tcp_sock->dest;
1651 ctdb_canonicalize_ip(&addr, &tcp_sock->dest);
1654 memcpy(&addr, &tcp_sock->dest, sizeof(addr));
1655 vnn = find_public_ip_vnn(ctdb, &addr);
1657 switch (addr.sa.sa_family) {
1659 if (ntohl(addr.ip.sin_addr.s_addr) != INADDR_LOOPBACK) {
1660 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public address.\n",
1661 ctdb_addr_to_str(&addr)));
1665 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public ipv6 address.\n",
1666 ctdb_addr_to_str(&addr)));
1669 DEBUG(DEBUG_ERR,(__location__ " Unknown family type %d\n", addr.sa.sa_family));
1675 if (vnn->pnn != ctdb->pnn) {
1676 DEBUG(DEBUG_ERR,("Attempt to register tcp client for IP %s we don't hold - failing (client_id %u pid %u)\n",
1677 ctdb_addr_to_str(&addr),
1678 client_id, client->pid));
1679 /* failing this call will tell smbd to die */
1683 ip = talloc(client, struct ctdb_client_ip);
1684 CTDB_NO_MEMORY(ctdb, ip);
1688 ip->client_id = client_id;
1689 talloc_set_destructor(ip, ctdb_client_ip_destructor);
1690 DLIST_ADD(ctdb->client_ip_list, ip);
1692 tcp = talloc(client, struct ctdb_tcp_list);
1693 CTDB_NO_MEMORY(ctdb, tcp);
1695 tcp->connection.src_addr = tcp_sock->src;
1696 tcp->connection.dst_addr = tcp_sock->dest;
1698 DLIST_ADD(client->tcp_list, tcp);
1700 t.src_addr = tcp_sock->src;
1701 t.dst_addr = tcp_sock->dest;
1703 data.dptr = (uint8_t *)&t;
1704 data.dsize = sizeof(t);
1706 switch (addr.sa.sa_family) {
1708 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1709 (unsigned)ntohs(tcp_sock->dest.ip.sin_port),
1710 ctdb_addr_to_str(&tcp_sock->src),
1711 (unsigned)ntohs(tcp_sock->src.ip.sin_port), client_id, client->pid));
1714 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1715 (unsigned)ntohs(tcp_sock->dest.ip6.sin6_port),
1716 ctdb_addr_to_str(&tcp_sock->src),
1717 (unsigned)ntohs(tcp_sock->src.ip6.sin6_port), client_id, client->pid));
1720 DEBUG(DEBUG_ERR,(__location__ " Unknown family %d\n", addr.sa.sa_family));
1724 /* tell all nodes about this tcp connection */
1725 ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0,
1726 CTDB_CONTROL_TCP_ADD,
1727 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
1729 DEBUG(DEBUG_ERR,(__location__ " Failed to send CTDB_CONTROL_TCP_ADD\n"));
1737 find a tcp address on a list
1739 static struct ctdb_tcp_connection *ctdb_tcp_find(struct ctdb_tcp_array *array,
1740 struct ctdb_tcp_connection *tcp)
1744 if (array == NULL) {
1748 for (i=0;i<array->num;i++) {
1749 if (ctdb_same_sockaddr(&array->connections[i].src_addr, &tcp->src_addr) &&
1750 ctdb_same_sockaddr(&array->connections[i].dst_addr, &tcp->dst_addr)) {
1751 return &array->connections[i];
1760 called by a daemon to inform us of a TCP connection that one of its
1761 clients managing that should tickled with an ACK when IP takeover is
1764 int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata, bool tcp_update_needed)
1766 struct ctdb_tcp_connection *p = (struct ctdb_tcp_connection *)indata.dptr;
1767 struct ctdb_tcp_array *tcparray;
1768 struct ctdb_tcp_connection tcp;
1769 struct ctdb_vnn *vnn;
1771 vnn = find_public_ip_vnn(ctdb, &p->dst_addr);
1773 DEBUG(DEBUG_INFO,(__location__ " got TCP_ADD control for an address which is not a public address '%s'\n",
1774 ctdb_addr_to_str(&p->dst_addr)));
1780 tcparray = vnn->tcp_array;
1782 /* If this is the first tickle */
1783 if (tcparray == NULL) {
1784 tcparray = talloc_size(ctdb->nodes,
1785 offsetof(struct ctdb_tcp_array, connections) +
1786 sizeof(struct ctdb_tcp_connection) * 1);
1787 CTDB_NO_MEMORY(ctdb, tcparray);
1788 vnn->tcp_array = tcparray;
1791 tcparray->connections = talloc_size(tcparray, sizeof(struct ctdb_tcp_connection));
1792 CTDB_NO_MEMORY(ctdb, tcparray->connections);
1794 tcparray->connections[tcparray->num].src_addr = p->src_addr;
1795 tcparray->connections[tcparray->num].dst_addr = p->dst_addr;
1798 if (tcp_update_needed) {
1799 vnn->tcp_update_needed = true;
1805 /* Do we already have this tickle ?*/
1806 tcp.src_addr = p->src_addr;
1807 tcp.dst_addr = p->dst_addr;
1808 if (ctdb_tcp_find(vnn->tcp_array, &tcp) != NULL) {
1809 DEBUG(DEBUG_DEBUG,("Already had tickle info for %s:%u for vnn:%u\n",
1810 ctdb_addr_to_str(&tcp.dst_addr),
1811 ntohs(tcp.dst_addr.ip.sin_port),
1816 /* A new tickle, we must add it to the array */
1817 tcparray->connections = talloc_realloc(tcparray, tcparray->connections,
1818 struct ctdb_tcp_connection,
1820 CTDB_NO_MEMORY(ctdb, tcparray->connections);
1822 vnn->tcp_array = tcparray;
1823 tcparray->connections[tcparray->num].src_addr = p->src_addr;
1824 tcparray->connections[tcparray->num].dst_addr = p->dst_addr;
1827 DEBUG(DEBUG_INFO,("Added tickle info for %s:%u from vnn %u\n",
1828 ctdb_addr_to_str(&tcp.dst_addr),
1829 ntohs(tcp.dst_addr.ip.sin_port),
1832 if (tcp_update_needed) {
1833 vnn->tcp_update_needed = true;
1841 called by a daemon to inform us of a TCP connection that one of its
1842 clients managing that should tickled with an ACK when IP takeover is
1845 static void ctdb_remove_tcp_connection(struct ctdb_context *ctdb, struct ctdb_tcp_connection *conn)
1847 struct ctdb_tcp_connection *tcpp;
1848 struct ctdb_vnn *vnn = find_public_ip_vnn(ctdb, &conn->dst_addr);
1851 DEBUG(DEBUG_ERR,(__location__ " unable to find public address %s\n",
1852 ctdb_addr_to_str(&conn->dst_addr)));
1856 /* if the array is empty we cant remove it
1857 and we dont need to do anything
1859 if (vnn->tcp_array == NULL) {
1860 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist (array is empty) %s:%u\n",
1861 ctdb_addr_to_str(&conn->dst_addr),
1862 ntohs(conn->dst_addr.ip.sin_port)));
1867 /* See if we know this connection
1868 if we dont know this connection then we dont need to do anything
1870 tcpp = ctdb_tcp_find(vnn->tcp_array, conn);
1872 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist %s:%u\n",
1873 ctdb_addr_to_str(&conn->dst_addr),
1874 ntohs(conn->dst_addr.ip.sin_port)));
1879 /* We need to remove this entry from the array.
1880 Instead of allocating a new array and copying data to it
1881 we cheat and just copy the last entry in the existing array
1882 to the entry that is to be removed and just shring the
1885 *tcpp = vnn->tcp_array->connections[vnn->tcp_array->num - 1];
1886 vnn->tcp_array->num--;
1888 /* If we deleted the last entry we also need to remove the entire array
1890 if (vnn->tcp_array->num == 0) {
1891 talloc_free(vnn->tcp_array);
1892 vnn->tcp_array = NULL;
1895 vnn->tcp_update_needed = true;
1897 DEBUG(DEBUG_INFO,("Removed tickle info for %s:%u\n",
1898 ctdb_addr_to_str(&conn->src_addr),
1899 ntohs(conn->src_addr.ip.sin_port)));
1904 called by a daemon to inform us of a TCP connection that one of its
1905 clients used are no longer needed in the tickle database
1907 int32_t ctdb_control_tcp_remove(struct ctdb_context *ctdb, TDB_DATA indata)
1909 struct ctdb_tcp_connection *conn = (struct ctdb_tcp_connection *)indata.dptr;
1911 ctdb_remove_tcp_connection(ctdb, conn);
1918 called when a daemon restarts - send all tickes for all public addresses
1919 we are serving immediately to the new node.
1921 int32_t ctdb_control_startup(struct ctdb_context *ctdb, uint32_t vnn)
1923 /*XXX here we should send all tickes we are serving to the new node */
1929 called when a client structure goes away - hook to remove
1930 elements from the tcp_list in all daemons
1932 void ctdb_takeover_client_destructor_hook(struct ctdb_client *client)
1934 while (client->tcp_list) {
1935 struct ctdb_tcp_list *tcp = client->tcp_list;
1936 DLIST_REMOVE(client->tcp_list, tcp);
1937 ctdb_remove_tcp_connection(client->ctdb, &tcp->connection);
1943 release all IPs on shutdown
1945 void ctdb_release_all_ips(struct ctdb_context *ctdb)
1947 struct ctdb_vnn *vnn;
1949 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1950 if (!ctdb_sys_have_ip(&vnn->public_address)) {
1951 ctdb_vnn_unassign_iface(ctdb, vnn);
1957 ctdb_event_script_args(ctdb, CTDB_EVENT_RELEASE_IP, "%s %s %u",
1958 ctdb_vnn_iface_string(vnn),
1959 ctdb_addr_to_str(&vnn->public_address),
1960 vnn->public_netmask_bits);
1961 release_kill_clients(ctdb, &vnn->public_address);
1962 ctdb_vnn_unassign_iface(ctdb, vnn);
1968 get list of public IPs
1970 int32_t ctdb_control_get_public_ips(struct ctdb_context *ctdb,
1971 struct ctdb_req_control *c, TDB_DATA *outdata)
1974 struct ctdb_all_public_ips *ips;
1975 struct ctdb_vnn *vnn;
1976 bool only_available = false;
1978 if (c->flags & CTDB_PUBLIC_IP_FLAGS_ONLY_AVAILABLE) {
1979 only_available = true;
1982 /* count how many public ip structures we have */
1984 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1988 len = offsetof(struct ctdb_all_public_ips, ips) +
1989 num*sizeof(struct ctdb_public_ip);
1990 ips = talloc_zero_size(outdata, len);
1991 CTDB_NO_MEMORY(ctdb, ips);
1994 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1995 if (only_available && !ctdb_vnn_available(ctdb, vnn)) {
1998 ips->ips[i].pnn = vnn->pnn;
1999 ips->ips[i].addr = vnn->public_address;
2003 len = offsetof(struct ctdb_all_public_ips, ips) +
2004 i*sizeof(struct ctdb_public_ip);
2006 outdata->dsize = len;
2007 outdata->dptr = (uint8_t *)ips;
2014 get list of public IPs, old ipv4 style. only returns ipv4 addresses
2016 int32_t ctdb_control_get_public_ipsv4(struct ctdb_context *ctdb,
2017 struct ctdb_req_control *c, TDB_DATA *outdata)
2020 struct ctdb_all_public_ipsv4 *ips;
2021 struct ctdb_vnn *vnn;
2023 /* count how many public ip structures we have */
2025 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2026 if (vnn->public_address.sa.sa_family != AF_INET) {
2032 len = offsetof(struct ctdb_all_public_ipsv4, ips) +
2033 num*sizeof(struct ctdb_public_ipv4);
2034 ips = talloc_zero_size(outdata, len);
2035 CTDB_NO_MEMORY(ctdb, ips);
2037 outdata->dsize = len;
2038 outdata->dptr = (uint8_t *)ips;
2042 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2043 if (vnn->public_address.sa.sa_family != AF_INET) {
2046 ips->ips[i].pnn = vnn->pnn;
2047 ips->ips[i].sin = vnn->public_address.ip;
2054 int32_t ctdb_control_get_public_ip_info(struct ctdb_context *ctdb,
2055 struct ctdb_req_control *c,
2060 ctdb_sock_addr *addr;
2061 struct ctdb_control_public_ip_info *info;
2062 struct ctdb_vnn *vnn;
2064 addr = (ctdb_sock_addr *)indata.dptr;
2066 vnn = find_public_ip_vnn(ctdb, addr);
2068 /* if it is not a public ip it could be our 'single ip' */
2069 if (ctdb->single_ip_vnn) {
2070 if (ctdb_same_ip(&ctdb->single_ip_vnn->public_address, addr)) {
2071 vnn = ctdb->single_ip_vnn;
2076 DEBUG(DEBUG_ERR,(__location__ " Could not get public ip info, "
2077 "'%s'not a public address\n",
2078 ctdb_addr_to_str(addr)));
2082 /* count how many public ip structures we have */
2084 for (;vnn->ifaces[num];) {
2088 len = offsetof(struct ctdb_control_public_ip_info, ifaces) +
2089 num*sizeof(struct ctdb_control_iface_info);
2090 info = talloc_zero_size(outdata, len);
2091 CTDB_NO_MEMORY(ctdb, info);
2093 info->ip.addr = vnn->public_address;
2094 info->ip.pnn = vnn->pnn;
2095 info->active_idx = 0xFFFFFFFF;
2097 for (i=0; vnn->ifaces[i]; i++) {
2098 struct ctdb_iface *cur;
2100 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
2102 DEBUG(DEBUG_CRIT, (__location__ " internal error iface[%s] unknown\n",
2106 if (vnn->iface == cur) {
2107 info->active_idx = i;
2109 strcpy(info->ifaces[i].name, cur->name);
2110 info->ifaces[i].link_state = cur->link_up;
2111 info->ifaces[i].references = cur->references;
2114 len = offsetof(struct ctdb_control_public_ip_info, ifaces) +
2115 i*sizeof(struct ctdb_control_iface_info);
2117 outdata->dsize = len;
2118 outdata->dptr = (uint8_t *)info;
2123 int32_t ctdb_control_get_ifaces(struct ctdb_context *ctdb,
2124 struct ctdb_req_control *c,
2128 struct ctdb_control_get_ifaces *ifaces;
2129 struct ctdb_iface *cur;
2131 /* count how many public ip structures we have */
2133 for (cur=ctdb->ifaces;cur;cur=cur->next) {
2137 len = offsetof(struct ctdb_control_get_ifaces, ifaces) +
2138 num*sizeof(struct ctdb_control_iface_info);
2139 ifaces = talloc_zero_size(outdata, len);
2140 CTDB_NO_MEMORY(ctdb, ifaces);
2143 for (cur=ctdb->ifaces;cur;cur=cur->next) {
2144 strcpy(ifaces->ifaces[i].name, cur->name);
2145 ifaces->ifaces[i].link_state = cur->link_up;
2146 ifaces->ifaces[i].references = cur->references;
2150 len = offsetof(struct ctdb_control_get_ifaces, ifaces) +
2151 i*sizeof(struct ctdb_control_iface_info);
2153 outdata->dsize = len;
2154 outdata->dptr = (uint8_t *)ifaces;
2159 int32_t ctdb_control_set_iface_link(struct ctdb_context *ctdb,
2160 struct ctdb_req_control *c,
2163 struct ctdb_control_iface_info *info;
2164 struct ctdb_iface *iface;
2165 bool link_up = false;
2167 info = (struct ctdb_control_iface_info *)indata.dptr;
2169 if (info->name[CTDB_IFACE_SIZE] != '\0') {
2170 int len = strnlen(info->name, CTDB_IFACE_SIZE);
2171 DEBUG(DEBUG_ERR, (__location__ " name[%*.*s] not terminated\n",
2172 len, len, info->name));
2176 switch (info->link_state) {
2184 DEBUG(DEBUG_ERR, (__location__ " link_state[%u] invalid\n",
2185 (unsigned int)info->link_state));
2189 if (info->references != 0) {
2190 DEBUG(DEBUG_ERR, (__location__ " references[%u] should be 0\n",
2191 (unsigned int)info->references));
2195 iface = ctdb_find_iface(ctdb, info->name);
2196 if (iface == NULL) {
2197 DEBUG(DEBUG_ERR, (__location__ "iface[%s] is unknown\n",
2202 if (link_up == iface->link_up) {
2206 DEBUG(iface->link_up?DEBUG_ERR:DEBUG_NOTICE,
2207 ("iface[%s] has changed it's link status %s => %s\n",
2209 iface->link_up?"up":"down",
2210 link_up?"up":"down"));
2212 iface->link_up = link_up;
2218 structure containing the listening socket and the list of tcp connections
2219 that the ctdb daemon is to kill
2221 struct ctdb_kill_tcp {
2222 struct ctdb_vnn *vnn;
2223 struct ctdb_context *ctdb;
2225 struct fd_event *fde;
2226 trbt_tree_t *connections;
2231 a tcp connection that is to be killed
2233 struct ctdb_killtcp_con {
2234 ctdb_sock_addr src_addr;
2235 ctdb_sock_addr dst_addr;
2237 struct ctdb_kill_tcp *killtcp;
2240 /* this function is used to create a key to represent this socketpair
2241 in the killtcp tree.
2242 this key is used to insert and lookup matching socketpairs that are
2243 to be tickled and RST
2245 #define KILLTCP_KEYLEN 10
2246 static uint32_t *killtcp_key(ctdb_sock_addr *src, ctdb_sock_addr *dst)
2248 static uint32_t key[KILLTCP_KEYLEN];
2250 bzero(key, sizeof(key));
2252 if (src->sa.sa_family != dst->sa.sa_family) {
2253 DEBUG(DEBUG_ERR, (__location__ " ERROR, different families passed :%u vs %u\n", src->sa.sa_family, dst->sa.sa_family));
2257 switch (src->sa.sa_family) {
2259 key[0] = dst->ip.sin_addr.s_addr;
2260 key[1] = src->ip.sin_addr.s_addr;
2261 key[2] = dst->ip.sin_port;
2262 key[3] = src->ip.sin_port;
2265 key[0] = dst->ip6.sin6_addr.s6_addr32[3];
2266 key[1] = src->ip6.sin6_addr.s6_addr32[3];
2267 key[2] = dst->ip6.sin6_addr.s6_addr32[2];
2268 key[3] = src->ip6.sin6_addr.s6_addr32[2];
2269 key[4] = dst->ip6.sin6_addr.s6_addr32[1];
2270 key[5] = src->ip6.sin6_addr.s6_addr32[1];
2271 key[6] = dst->ip6.sin6_addr.s6_addr32[0];
2272 key[7] = src->ip6.sin6_addr.s6_addr32[0];
2273 key[8] = dst->ip6.sin6_port;
2274 key[9] = src->ip6.sin6_port;
2277 DEBUG(DEBUG_ERR, (__location__ " ERROR, unknown family passed :%u\n", src->sa.sa_family));
2285 called when we get a read event on the raw socket
2287 static void capture_tcp_handler(struct event_context *ev, struct fd_event *fde,
2288 uint16_t flags, void *private_data)
2290 struct ctdb_kill_tcp *killtcp = talloc_get_type(private_data, struct ctdb_kill_tcp);
2291 struct ctdb_killtcp_con *con;
2292 ctdb_sock_addr src, dst;
2293 uint32_t ack_seq, seq;
2295 if (!(flags & EVENT_FD_READ)) {
2299 if (ctdb_sys_read_tcp_packet(killtcp->capture_fd,
2300 killtcp->private_data,
2302 &ack_seq, &seq) != 0) {
2303 /* probably a non-tcp ACK packet */
2307 /* check if we have this guy in our list of connections
2310 con = trbt_lookuparray32(killtcp->connections,
2311 KILLTCP_KEYLEN, killtcp_key(&src, &dst));
2313 /* no this was some other packet we can just ignore */
2317 /* This one has been tickled !
2318 now reset him and remove him from the list.
2320 DEBUG(DEBUG_INFO, ("sending a tcp reset to kill connection :%d -> %s:%d\n",
2321 ntohs(con->dst_addr.ip.sin_port),
2322 ctdb_addr_to_str(&con->src_addr),
2323 ntohs(con->src_addr.ip.sin_port)));
2325 ctdb_sys_send_tcp(&con->dst_addr, &con->src_addr, ack_seq, seq, 1);
2330 /* when traversing the list of all tcp connections to send tickle acks to
2331 (so that we can capture the ack coming back and kill the connection
2333 this callback is called for each connection we are currently trying to kill
2335 static void tickle_connection_traverse(void *param, void *data)
2337 struct ctdb_killtcp_con *con = talloc_get_type(data, struct ctdb_killtcp_con);
2339 /* have tried too many times, just give up */
2340 if (con->count >= 5) {
2341 /* can't delete in traverse: reparent to delete_cons */
2342 talloc_steal(param, con);
2346 /* othervise, try tickling it again */
2349 (ctdb_sock_addr *)&con->dst_addr,
2350 (ctdb_sock_addr *)&con->src_addr,
2356 called every second until all sentenced connections have been reset
2358 static void ctdb_tickle_sentenced_connections(struct event_context *ev, struct timed_event *te,
2359 struct timeval t, void *private_data)
2361 struct ctdb_kill_tcp *killtcp = talloc_get_type(private_data, struct ctdb_kill_tcp);
2362 void *delete_cons = talloc_new(NULL);
2364 /* loop over all connections sending tickle ACKs */
2365 trbt_traversearray32(killtcp->connections, KILLTCP_KEYLEN, tickle_connection_traverse, delete_cons);
2367 /* now we've finished traverse, it's safe to do deletion. */
2368 talloc_free(delete_cons);
2370 /* If there are no more connections to kill we can remove the
2371 entire killtcp structure
2373 if ( (killtcp->connections == NULL) ||
2374 (killtcp->connections->root == NULL) ) {
2375 talloc_free(killtcp);
2379 /* try tickling them again in a seconds time
2381 event_add_timed(killtcp->ctdb->ev, killtcp, timeval_current_ofs(1, 0),
2382 ctdb_tickle_sentenced_connections, killtcp);
2386 destroy the killtcp structure
2388 static int ctdb_killtcp_destructor(struct ctdb_kill_tcp *killtcp)
2391 killtcp->vnn->killtcp = NULL;
2397 /* nothing fancy here, just unconditionally replace any existing
2398 connection structure with the new one.
2400 dont even free the old one if it did exist, that one is talloc_stolen
2401 by the same node in the tree anyway and will be deleted when the new data
2404 static void *add_killtcp_callback(void *parm, void *data)
2410 add a tcp socket to the list of connections we want to RST
2412 static int ctdb_killtcp_add_connection(struct ctdb_context *ctdb,
2416 ctdb_sock_addr src, dst;
2417 struct ctdb_kill_tcp *killtcp;
2418 struct ctdb_killtcp_con *con;
2419 struct ctdb_vnn *vnn;
2421 ctdb_canonicalize_ip(s, &src);
2422 ctdb_canonicalize_ip(d, &dst);
2424 vnn = find_public_ip_vnn(ctdb, &dst);
2426 vnn = find_public_ip_vnn(ctdb, &src);
2429 /* if it is not a public ip it could be our 'single ip' */
2430 if (ctdb->single_ip_vnn) {
2431 if (ctdb_same_ip(&ctdb->single_ip_vnn->public_address, &dst)) {
2432 vnn = ctdb->single_ip_vnn;
2437 DEBUG(DEBUG_ERR,(__location__ " Could not killtcp, not a public address\n"));
2441 killtcp = vnn->killtcp;
2443 /* If this is the first connection to kill we must allocate
2446 if (killtcp == NULL) {
2447 killtcp = talloc_zero(ctdb, struct ctdb_kill_tcp);
2448 CTDB_NO_MEMORY(ctdb, killtcp);
2451 killtcp->ctdb = ctdb;
2452 killtcp->capture_fd = -1;
2453 killtcp->connections = trbt_create(killtcp, 0);
2455 vnn->killtcp = killtcp;
2456 talloc_set_destructor(killtcp, ctdb_killtcp_destructor);
2461 /* create a structure that describes this connection we want to
2462 RST and store it in killtcp->connections
2464 con = talloc(killtcp, struct ctdb_killtcp_con);
2465 CTDB_NO_MEMORY(ctdb, con);
2466 con->src_addr = src;
2467 con->dst_addr = dst;
2469 con->killtcp = killtcp;
2472 trbt_insertarray32_callback(killtcp->connections,
2473 KILLTCP_KEYLEN, killtcp_key(&con->dst_addr, &con->src_addr),
2474 add_killtcp_callback, con);
2477 If we dont have a socket to listen on yet we must create it
2479 if (killtcp->capture_fd == -1) {
2480 const char *iface = ctdb_vnn_iface_string(vnn);
2481 killtcp->capture_fd = ctdb_sys_open_capture_socket(iface, &killtcp->private_data);
2482 if (killtcp->capture_fd == -1) {
2483 DEBUG(DEBUG_CRIT,(__location__ " Failed to open capturing "
2484 "socket on iface '%s' for killtcp (%s)\n",
2485 iface, strerror(errno)));
2491 if (killtcp->fde == NULL) {
2492 killtcp->fde = event_add_fd(ctdb->ev, killtcp, killtcp->capture_fd,
2494 capture_tcp_handler, killtcp);
2495 tevent_fd_set_auto_close(killtcp->fde);
2497 /* We also need to set up some events to tickle all these connections
2498 until they are all reset
2500 event_add_timed(ctdb->ev, killtcp, timeval_current_ofs(1, 0),
2501 ctdb_tickle_sentenced_connections, killtcp);
2504 /* tickle him once now */
2513 talloc_free(vnn->killtcp);
2514 vnn->killtcp = NULL;
2519 kill a TCP connection.
2521 int32_t ctdb_control_kill_tcp(struct ctdb_context *ctdb, TDB_DATA indata)
2523 struct ctdb_control_killtcp *killtcp = (struct ctdb_control_killtcp *)indata.dptr;
2525 return ctdb_killtcp_add_connection(ctdb, &killtcp->src_addr, &killtcp->dst_addr);
2529 called by a daemon to inform us of the entire list of TCP tickles for
2530 a particular public address.
2531 this control should only be sent by the node that is currently serving
2532 that public address.
2534 int32_t ctdb_control_set_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata)
2536 struct ctdb_control_tcp_tickle_list *list = (struct ctdb_control_tcp_tickle_list *)indata.dptr;
2537 struct ctdb_tcp_array *tcparray;
2538 struct ctdb_vnn *vnn;
2540 /* We must at least have tickles.num or else we cant verify the size
2541 of the received data blob
2543 if (indata.dsize < offsetof(struct ctdb_control_tcp_tickle_list,
2544 tickles.connections)) {
2545 DEBUG(DEBUG_ERR,("Bad indata in ctdb_control_set_tcp_tickle_list. Not enough data for the tickle.num field\n"));
2549 /* verify that the size of data matches what we expect */
2550 if (indata.dsize < offsetof(struct ctdb_control_tcp_tickle_list,
2551 tickles.connections)
2552 + sizeof(struct ctdb_tcp_connection)
2553 * list->tickles.num) {
2554 DEBUG(DEBUG_ERR,("Bad indata in ctdb_control_set_tcp_tickle_list\n"));
2558 vnn = find_public_ip_vnn(ctdb, &list->addr);
2560 DEBUG(DEBUG_INFO,(__location__ " Could not set tcp tickle list, '%s' is not a public address\n",
2561 ctdb_addr_to_str(&list->addr)));
2566 /* remove any old ticklelist we might have */
2567 talloc_free(vnn->tcp_array);
2568 vnn->tcp_array = NULL;
2570 tcparray = talloc(ctdb->nodes, struct ctdb_tcp_array);
2571 CTDB_NO_MEMORY(ctdb, tcparray);
2573 tcparray->num = list->tickles.num;
2575 tcparray->connections = talloc_array(tcparray, struct ctdb_tcp_connection, tcparray->num);
2576 CTDB_NO_MEMORY(ctdb, tcparray->connections);
2578 memcpy(tcparray->connections, &list->tickles.connections[0],
2579 sizeof(struct ctdb_tcp_connection)*tcparray->num);
2581 /* We now have a new fresh tickle list array for this vnn */
2582 vnn->tcp_array = talloc_steal(vnn, tcparray);
2588 called to return the full list of tickles for the puclic address associated
2589 with the provided vnn
2591 int32_t ctdb_control_get_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata)
2593 ctdb_sock_addr *addr = (ctdb_sock_addr *)indata.dptr;
2594 struct ctdb_control_tcp_tickle_list *list;
2595 struct ctdb_tcp_array *tcparray;
2597 struct ctdb_vnn *vnn;
2599 vnn = find_public_ip_vnn(ctdb, addr);
2601 DEBUG(DEBUG_ERR,(__location__ " Could not get tcp tickle list, '%s' is not a public address\n",
2602 ctdb_addr_to_str(addr)));
2607 tcparray = vnn->tcp_array;
2609 num = tcparray->num;
2614 outdata->dsize = offsetof(struct ctdb_control_tcp_tickle_list,
2615 tickles.connections)
2616 + sizeof(struct ctdb_tcp_connection) * num;
2618 outdata->dptr = talloc_size(outdata, outdata->dsize);
2619 CTDB_NO_MEMORY(ctdb, outdata->dptr);
2620 list = (struct ctdb_control_tcp_tickle_list *)outdata->dptr;
2623 list->tickles.num = num;
2625 memcpy(&list->tickles.connections[0], tcparray->connections,
2626 sizeof(struct ctdb_tcp_connection) * num);
2634 set the list of all tcp tickles for a public address
2636 static int ctdb_ctrl_set_tcp_tickles(struct ctdb_context *ctdb,
2637 struct timeval timeout, uint32_t destnode,
2638 ctdb_sock_addr *addr,
2639 struct ctdb_tcp_array *tcparray)
2643 struct ctdb_control_tcp_tickle_list *list;
2646 num = tcparray->num;
2651 data.dsize = offsetof(struct ctdb_control_tcp_tickle_list,
2652 tickles.connections) +
2653 sizeof(struct ctdb_tcp_connection) * num;
2654 data.dptr = talloc_size(ctdb, data.dsize);
2655 CTDB_NO_MEMORY(ctdb, data.dptr);
2657 list = (struct ctdb_control_tcp_tickle_list *)data.dptr;
2659 list->tickles.num = num;
2661 memcpy(&list->tickles.connections[0], tcparray->connections, sizeof(struct ctdb_tcp_connection) * num);
2664 ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0,
2665 CTDB_CONTROL_SET_TCP_TICKLE_LIST,
2666 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
2668 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set tcp tickles failed\n"));
2672 talloc_free(data.dptr);
2679 perform tickle updates if required
2681 static void ctdb_update_tcp_tickles(struct event_context *ev,
2682 struct timed_event *te,
2683 struct timeval t, void *private_data)
2685 struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
2687 struct ctdb_vnn *vnn;
2689 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2690 /* we only send out updates for public addresses that
2693 if (ctdb->pnn != vnn->pnn) {
2696 /* We only send out the updates if we need to */
2697 if (!vnn->tcp_update_needed) {
2700 ret = ctdb_ctrl_set_tcp_tickles(ctdb,
2702 CTDB_BROADCAST_CONNECTED,
2703 &vnn->public_address,
2706 DEBUG(DEBUG_ERR,("Failed to send the tickle update for public address %s\n",
2707 ctdb_addr_to_str(&vnn->public_address)));
2711 event_add_timed(ctdb->ev, ctdb->tickle_update_context,
2712 timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
2713 ctdb_update_tcp_tickles, ctdb);
2718 start periodic update of tcp tickles
2720 void ctdb_start_tcp_tickle_update(struct ctdb_context *ctdb)
2722 ctdb->tickle_update_context = talloc_new(ctdb);
2724 event_add_timed(ctdb->ev, ctdb->tickle_update_context,
2725 timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
2726 ctdb_update_tcp_tickles, ctdb);
2732 struct control_gratious_arp {
2733 struct ctdb_context *ctdb;
2734 ctdb_sock_addr addr;
2740 send a control_gratuitous arp
2742 static void send_gratious_arp(struct event_context *ev, struct timed_event *te,
2743 struct timeval t, void *private_data)
2746 struct control_gratious_arp *arp = talloc_get_type(private_data,
2747 struct control_gratious_arp);
2749 ret = ctdb_sys_send_arp(&arp->addr, arp->iface);
2751 DEBUG(DEBUG_ERR,(__location__ " sending of gratious arp on iface '%s' failed (%s)\n",
2752 arp->iface, strerror(errno)));
2757 if (arp->count == CTDB_ARP_REPEAT) {
2762 event_add_timed(arp->ctdb->ev, arp,
2763 timeval_current_ofs(CTDB_ARP_INTERVAL, 0),
2764 send_gratious_arp, arp);
2771 int32_t ctdb_control_send_gratious_arp(struct ctdb_context *ctdb, TDB_DATA indata)
2773 struct ctdb_control_gratious_arp *gratious_arp = (struct ctdb_control_gratious_arp *)indata.dptr;
2774 struct control_gratious_arp *arp;
2776 /* verify the size of indata */
2777 if (indata.dsize < offsetof(struct ctdb_control_gratious_arp, iface)) {
2778 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_gratious_arp structure. Got %u require %u bytes\n",
2779 (unsigned)indata.dsize,
2780 (unsigned)offsetof(struct ctdb_control_gratious_arp, iface)));
2784 ( offsetof(struct ctdb_control_gratious_arp, iface)
2785 + gratious_arp->len ) ){
2787 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2788 "but should be %u bytes\n",
2789 (unsigned)indata.dsize,
2790 (unsigned)(offsetof(struct ctdb_control_gratious_arp, iface)+gratious_arp->len)));
2795 arp = talloc(ctdb, struct control_gratious_arp);
2796 CTDB_NO_MEMORY(ctdb, arp);
2799 arp->addr = gratious_arp->addr;
2800 arp->iface = talloc_strdup(arp, gratious_arp->iface);
2801 CTDB_NO_MEMORY(ctdb, arp->iface);
2804 event_add_timed(arp->ctdb->ev, arp,
2805 timeval_zero(), send_gratious_arp, arp);
2810 int32_t ctdb_control_add_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2812 struct ctdb_control_ip_iface *pub = (struct ctdb_control_ip_iface *)indata.dptr;
2815 /* verify the size of indata */
2816 if (indata.dsize < offsetof(struct ctdb_control_ip_iface, iface)) {
2817 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_ip_iface structure\n"));
2821 ( offsetof(struct ctdb_control_ip_iface, iface)
2824 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2825 "but should be %u bytes\n",
2826 (unsigned)indata.dsize,
2827 (unsigned)(offsetof(struct ctdb_control_ip_iface, iface)+pub->len)));
2831 ret = ctdb_add_public_address(ctdb, &pub->addr, pub->mask, &pub->iface[0]);
2834 DEBUG(DEBUG_ERR,(__location__ " Failed to add public address\n"));
2842 called when releaseip event finishes for del_public_address
2844 static void delete_ip_callback(struct ctdb_context *ctdb, int status,
2847 talloc_free(private_data);
2850 int32_t ctdb_control_del_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2852 struct ctdb_control_ip_iface *pub = (struct ctdb_control_ip_iface *)indata.dptr;
2853 struct ctdb_vnn *vnn;
2856 /* verify the size of indata */
2857 if (indata.dsize < offsetof(struct ctdb_control_ip_iface, iface)) {
2858 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_ip_iface structure\n"));
2862 ( offsetof(struct ctdb_control_ip_iface, iface)
2865 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2866 "but should be %u bytes\n",
2867 (unsigned)indata.dsize,
2868 (unsigned)(offsetof(struct ctdb_control_ip_iface, iface)+pub->len)));
2872 /* walk over all public addresses until we find a match */
2873 for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2874 if (ctdb_same_ip(&vnn->public_address, &pub->addr)) {
2875 TALLOC_CTX *mem_ctx;
2877 DLIST_REMOVE(ctdb->vnn, vnn);
2878 if (vnn->iface == NULL) {
2883 mem_ctx = talloc_new(ctdb);
2884 ret = ctdb_event_script_callback(ctdb,
2885 mem_ctx, delete_ip_callback, mem_ctx,
2887 CTDB_EVENT_RELEASE_IP,
2889 ctdb_vnn_iface_string(vnn),
2890 ctdb_addr_to_str(&vnn->public_address),
2891 vnn->public_netmask_bits);
2892 ctdb_vnn_unassign_iface(ctdb, vnn);
2904 /* This function is called from the recovery daemon to verify that a remote
2905 node has the expected ip allocation.
2906 This is verified against ctdb->ip_tree
2908 int verify_remote_ip_allocation(struct ctdb_context *ctdb, struct ctdb_all_public_ips *ips)
2910 struct ctdb_public_ip_list *tmp_ip;
2913 if (ctdb->ip_tree == NULL) {
2914 /* dont know the expected allocation yet, assume remote node
2923 for (i=0; i<ips->num; i++) {
2924 tmp_ip = trbt_lookuparray32(ctdb->ip_tree, IP_KEYLEN, ip_key(&ips->ips[i].addr));
2925 if (tmp_ip == NULL) {
2926 DEBUG(DEBUG_ERR,(__location__ " Could not find host for address %s, reassign ips\n", ctdb_addr_to_str(&ips->ips[i].addr)));
2930 if (tmp_ip->pnn == -1 || ips->ips[i].pnn == -1) {
2934 if (tmp_ip->pnn != ips->ips[i].pnn) {
2935 DEBUG(DEBUG_ERR,("Inconsistent ip allocation. Trigger reallocation. Thinks %s is held by node %u while it is held by node %u\n", ctdb_addr_to_str(&ips->ips[i].addr), ips->ips[i].pnn, tmp_ip->pnn));
2943 int update_ip_assignment_tree(struct ctdb_context *ctdb, struct ctdb_public_ip *ip)
2945 struct ctdb_public_ip_list *tmp_ip;
2947 if (ctdb->ip_tree == NULL) {
2948 DEBUG(DEBUG_ERR,("No ctdb->ip_tree yet. Failed to update ip assignment\n"));
2952 tmp_ip = trbt_lookuparray32(ctdb->ip_tree, IP_KEYLEN, ip_key(&ip->addr));
2953 if (tmp_ip == NULL) {
2954 DEBUG(DEBUG_ERR,(__location__ " Could not find record for address %s, update ip\n", ctdb_addr_to_str(&ip->addr)));
2958 DEBUG(DEBUG_NOTICE,("Updated ip assignment tree for ip : %s from node %u to node %u\n", ctdb_addr_to_str(&ip->addr), tmp_ip->pnn, ip->pnn));
2959 tmp_ip->pnn = ip->pnn;