22dd878818dc7b901520aa5392c15e51213fe5e7
[samba.git] / ctdb / server / ctdb_takeover.c
1 /* 
2    ctdb ip takeover code
3
4    Copyright (C) Ronnie Sahlberg  2007
5    Copyright (C) Andrew Tridgell  2007
6    Copyright (C) Martin Schwenke  2011
7
8    This program is free software; you can redistribute it and/or modify
9    it under the terms of the GNU General Public License as published by
10    the Free Software Foundation; either version 3 of the License, or
11    (at your option) any later version.
12    
13    This program is distributed in the hope that it will be useful,
14    but WITHOUT ANY WARRANTY; without even the implied warranty of
15    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16    GNU General Public License for more details.
17    
18    You should have received a copy of the GNU General Public License
19    along with this program; if not, see <http://www.gnu.org/licenses/>.
20 */
21 #include "replace.h"
22 #include "system/network.h"
23 #include "system/filesys.h"
24 #include "system/time.h"
25 #include "system/wait.h"
26
27 #include <talloc.h>
28 #include <tevent.h>
29
30 #include "lib/util/dlinklist.h"
31 #include "lib/util/debug.h"
32 #include "lib/util/samba_util.h"
33 #include "lib/util/sys_rw.h"
34 #include "lib/util/util_process.h"
35
36 #include "ctdb_private.h"
37 #include "ctdb_client.h"
38
39 #include "common/rb_tree.h"
40 #include "common/reqid.h"
41 #include "common/system.h"
42 #include "common/common.h"
43 #include "common/logging.h"
44
45 #include "server/ipalloc.h"
46
47 #define TAKEOVER_TIMEOUT() timeval_current_ofs(ctdb->tunable.takeover_timeout,0)
48
49 #define CTDB_ARP_INTERVAL 1
50 #define CTDB_ARP_REPEAT   3
51
52 struct ctdb_interface {
53         struct ctdb_interface *prev, *next;
54         const char *name;
55         bool link_up;
56         uint32_t references;
57 };
58
59 struct vnn_interface {
60         struct vnn_interface *prev, *next;
61         struct ctdb_interface *iface;
62 };
63
64 /* state associated with a public ip address */
65 struct ctdb_vnn {
66         struct ctdb_vnn *prev, *next;
67
68         struct ctdb_interface *iface;
69         struct vnn_interface *ifaces;
70         ctdb_sock_addr public_address;
71         uint8_t public_netmask_bits;
72
73         /* the node number that is serving this public address, if any.
74            If no node serves this ip it is set to -1 */
75         int32_t pnn;
76
77         /* List of clients to tickle for this public address */
78         struct ctdb_tcp_array *tcp_array;
79
80         /* whether we need to update the other nodes with changes to our list
81            of connected clients */
82         bool tcp_update_needed;
83
84         /* a context to hang sending gratious arp events off */
85         TALLOC_CTX *takeover_ctx;
86
87         /* Set to true any time an update to this VNN is in flight.
88            This helps to avoid races. */
89         bool update_in_flight;
90
91         /* If CTDB_CONTROL_DEL_PUBLIC_IP is received for this IP
92          * address then this flag is set.  It will be deleted in the
93          * release IP callback. */
94         bool delete_pending;
95 };
96
97 static const char *iface_string(const struct ctdb_interface *iface)
98 {
99         return (iface != NULL ? iface->name : "__none__");
100 }
101
102 static const char *ctdb_vnn_iface_string(const struct ctdb_vnn *vnn)
103 {
104         return iface_string(vnn->iface);
105 }
106
107 static struct ctdb_interface *ctdb_find_iface(struct ctdb_context *ctdb,
108                                               const char *iface);
109
110 static struct ctdb_interface *
111 ctdb_add_local_iface(struct ctdb_context *ctdb, const char *iface)
112 {
113         struct ctdb_interface *i;
114
115         if (strlen(iface) > CTDB_IFACE_SIZE) {
116                 DEBUG(DEBUG_ERR, ("Interface name too long \"%s\"\n", iface));
117                 return NULL;
118         }
119
120         /* Verify that we don't have an entry for this ip yet */
121         i = ctdb_find_iface(ctdb, iface);
122         if (i != NULL) {
123                 return i;
124         }
125
126         /* create a new structure for this interface */
127         i = talloc_zero(ctdb, struct ctdb_interface);
128         if (i == NULL) {
129                 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
130                 return NULL;
131         }
132         i->name = talloc_strdup(i, iface);
133         if (i->name == NULL) {
134                 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
135                 talloc_free(i);
136                 return NULL;
137         }
138
139         i->link_up = true;
140
141         DLIST_ADD(ctdb->ifaces, i);
142
143         return i;
144 }
145
146 static bool vnn_has_interface(struct ctdb_vnn *vnn,
147                               const struct ctdb_interface *iface)
148 {
149         struct vnn_interface *i;
150
151         for (i = vnn->ifaces; i != NULL; i = i->next) {
152                 if (iface == i->iface) {
153                         return true;
154                 }
155         }
156
157         return false;
158 }
159
160 /* If any interfaces now have no possible IPs then delete them.  This
161  * implementation is naive (i.e. simple) rather than clever
162  * (i.e. complex).  Given that this is run on delip and that operation
163  * is rare, this doesn't need to be efficient - it needs to be
164  * foolproof.  One alternative is reference counting, where the logic
165  * is distributed and can, therefore, be broken in multiple places.
166  * Another alternative is to build a red-black tree of interfaces that
167  * can have addresses (by walking ctdb->vnn once) and then walking
168  * ctdb->ifaces once and deleting those not in the tree.  Let's go to
169  * one of those if the naive implementation causes problems...  :-)
170  */
171 static void ctdb_remove_orphaned_ifaces(struct ctdb_context *ctdb,
172                                         struct ctdb_vnn *vnn)
173 {
174         struct ctdb_interface *i, *next;
175
176         /* For each interface, check if there's an IP using it. */
177         for (i = ctdb->ifaces; i != NULL; i = next) {
178                 struct ctdb_vnn *tv;
179                 bool found;
180                 next = i->next;
181
182                 /* Only consider interfaces named in the given VNN. */
183                 if (!vnn_has_interface(vnn, i)) {
184                         continue;
185                 }
186
187                 /* Search for a vnn with this interface. */
188                 found = false;
189                 for (tv=ctdb->vnn; tv; tv=tv->next) {
190                         if (vnn_has_interface(tv, i)) {
191                                 found = true;
192                                 break;
193                         }
194                 }
195
196                 if (!found) {
197                         /* None of the VNNs are using this interface. */
198                         DLIST_REMOVE(ctdb->ifaces, i);
199                         talloc_free(i);
200                 }
201         }
202 }
203
204
205 static struct ctdb_interface *ctdb_find_iface(struct ctdb_context *ctdb,
206                                               const char *iface)
207 {
208         struct ctdb_interface *i;
209
210         for (i=ctdb->ifaces;i;i=i->next) {
211                 if (strcmp(i->name, iface) == 0) {
212                         return i;
213                 }
214         }
215
216         return NULL;
217 }
218
219 static struct ctdb_interface *ctdb_vnn_best_iface(struct ctdb_context *ctdb,
220                                                   struct ctdb_vnn *vnn)
221 {
222         struct vnn_interface *i;
223         struct ctdb_interface *cur = NULL;
224         struct ctdb_interface *best = NULL;
225
226         for (i = vnn->ifaces; i != NULL; i = i->next) {
227
228                 cur = i->iface;
229
230                 if (!cur->link_up) {
231                         continue;
232                 }
233
234                 if (best == NULL) {
235                         best = cur;
236                         continue;
237                 }
238
239                 if (cur->references < best->references) {
240                         best = cur;
241                         continue;
242                 }
243         }
244
245         return best;
246 }
247
248 static int32_t ctdb_vnn_assign_iface(struct ctdb_context *ctdb,
249                                      struct ctdb_vnn *vnn)
250 {
251         struct ctdb_interface *best = NULL;
252
253         if (vnn->iface) {
254                 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
255                                    "still assigned to iface '%s'\n",
256                                    ctdb_addr_to_str(&vnn->public_address),
257                                    ctdb_vnn_iface_string(vnn)));
258                 return 0;
259         }
260
261         best = ctdb_vnn_best_iface(ctdb, vnn);
262         if (best == NULL) {
263                 DEBUG(DEBUG_ERR, (__location__ " public address '%s' "
264                                   "cannot assign to iface any iface\n",
265                                   ctdb_addr_to_str(&vnn->public_address)));
266                 return -1;
267         }
268
269         vnn->iface = best;
270         best->references++;
271         vnn->pnn = ctdb->pnn;
272
273         DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
274                            "now assigned to iface '%s' refs[%d]\n",
275                            ctdb_addr_to_str(&vnn->public_address),
276                            ctdb_vnn_iface_string(vnn),
277                            best->references));
278         return 0;
279 }
280
281 static void ctdb_vnn_unassign_iface(struct ctdb_context *ctdb,
282                                     struct ctdb_vnn *vnn)
283 {
284         DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
285                            "now unassigned (old iface '%s' refs[%d])\n",
286                            ctdb_addr_to_str(&vnn->public_address),
287                            ctdb_vnn_iface_string(vnn),
288                            vnn->iface?vnn->iface->references:0));
289         if (vnn->iface) {
290                 vnn->iface->references--;
291         }
292         vnn->iface = NULL;
293         if (vnn->pnn == ctdb->pnn) {
294                 vnn->pnn = -1;
295         }
296 }
297
298 static bool ctdb_vnn_available(struct ctdb_context *ctdb,
299                                struct ctdb_vnn *vnn)
300 {
301         struct vnn_interface *i;
302
303         /* Nodes that are not RUNNING can not host IPs */
304         if (ctdb->runstate != CTDB_RUNSTATE_RUNNING) {
305                 return false;
306         }
307
308         if (vnn->delete_pending) {
309                 return false;
310         }
311
312         if (vnn->iface && vnn->iface->link_up) {
313                 return true;
314         }
315
316         for (i = vnn->ifaces; i != NULL; i = i->next) {
317                 if (i->iface->link_up) {
318                         return true;
319                 }
320         }
321
322         return false;
323 }
324
325 struct ctdb_takeover_arp {
326         struct ctdb_context *ctdb;
327         uint32_t count;
328         ctdb_sock_addr addr;
329         struct ctdb_tcp_array *tcparray;
330         struct ctdb_vnn *vnn;
331 };
332
333
334 /*
335   lists of tcp endpoints
336  */
337 struct ctdb_tcp_list {
338         struct ctdb_tcp_list *prev, *next;
339         struct ctdb_connection connection;
340 };
341
342 /*
343   list of clients to kill on IP release
344  */
345 struct ctdb_client_ip {
346         struct ctdb_client_ip *prev, *next;
347         struct ctdb_context *ctdb;
348         ctdb_sock_addr addr;
349         uint32_t client_id;
350 };
351
352
353 /*
354   send a gratuitous arp
355  */
356 static void ctdb_control_send_arp(struct tevent_context *ev,
357                                   struct tevent_timer *te,
358                                   struct timeval t, void *private_data)
359 {
360         struct ctdb_takeover_arp *arp = talloc_get_type(private_data, 
361                                                         struct ctdb_takeover_arp);
362         int i, ret;
363         struct ctdb_tcp_array *tcparray;
364         const char *iface = ctdb_vnn_iface_string(arp->vnn);
365
366         ret = ctdb_sys_send_arp(&arp->addr, iface);
367         if (ret != 0) {
368                 DEBUG(DEBUG_CRIT,(__location__ " sending of arp failed on iface '%s' (%s)\n",
369                                   iface, strerror(errno)));
370         }
371
372         tcparray = arp->tcparray;
373         if (tcparray) {
374                 for (i=0;i<tcparray->num;i++) {
375                         struct ctdb_connection *tcon;
376
377                         tcon = &tcparray->connections[i];
378                         DEBUG(DEBUG_INFO,("sending tcp tickle ack for %u->%s:%u\n",
379                                 (unsigned)ntohs(tcon->dst.ip.sin_port),
380                                 ctdb_addr_to_str(&tcon->src),
381                                 (unsigned)ntohs(tcon->src.ip.sin_port)));
382                         ret = ctdb_sys_send_tcp(
383                                 &tcon->src,
384                                 &tcon->dst,
385                                 0, 0, 0);
386                         if (ret != 0) {
387                                 DEBUG(DEBUG_CRIT,(__location__ " Failed to send tcp tickle ack for %s\n",
388                                         ctdb_addr_to_str(&tcon->src)));
389                         }
390                 }
391         }
392
393         arp->count++;
394
395         if (arp->count == CTDB_ARP_REPEAT) {
396                 talloc_free(arp);
397                 return;
398         }
399
400         tevent_add_timer(arp->ctdb->ev, arp->vnn->takeover_ctx,
401                          timeval_current_ofs(CTDB_ARP_INTERVAL, 100000),
402                          ctdb_control_send_arp, arp);
403 }
404
405 static int32_t ctdb_announce_vnn_iface(struct ctdb_context *ctdb,
406                                        struct ctdb_vnn *vnn)
407 {
408         struct ctdb_takeover_arp *arp;
409         struct ctdb_tcp_array *tcparray;
410
411         if (!vnn->takeover_ctx) {
412                 vnn->takeover_ctx = talloc_new(vnn);
413                 if (!vnn->takeover_ctx) {
414                         return -1;
415                 }
416         }
417
418         arp = talloc_zero(vnn->takeover_ctx, struct ctdb_takeover_arp);
419         if (!arp) {
420                 return -1;
421         }
422
423         arp->ctdb = ctdb;
424         arp->addr = vnn->public_address;
425         arp->vnn  = vnn;
426
427         tcparray = vnn->tcp_array;
428         if (tcparray) {
429                 /* add all of the known tcp connections for this IP to the
430                    list of tcp connections to send tickle acks for */
431                 arp->tcparray = talloc_steal(arp, tcparray);
432
433                 vnn->tcp_array = NULL;
434                 vnn->tcp_update_needed = true;
435         }
436
437         tevent_add_timer(arp->ctdb->ev, vnn->takeover_ctx,
438                          timeval_zero(), ctdb_control_send_arp, arp);
439
440         return 0;
441 }
442
443 struct ctdb_do_takeip_state {
444         struct ctdb_req_control_old *c;
445         struct ctdb_vnn *vnn;
446 };
447
448 /*
449   called when takeip event finishes
450  */
451 static void ctdb_do_takeip_callback(struct ctdb_context *ctdb, int status,
452                                     void *private_data)
453 {
454         struct ctdb_do_takeip_state *state =
455                 talloc_get_type(private_data, struct ctdb_do_takeip_state);
456         int32_t ret;
457         TDB_DATA data;
458
459         if (status != 0) {
460                 if (status == -ETIME) {
461                         ctdb_ban_self(ctdb);
462                 }
463                 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
464                                  ctdb_addr_to_str(&state->vnn->public_address),
465                                  ctdb_vnn_iface_string(state->vnn)));
466                 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
467
468                 talloc_free(state);
469                 return;
470         }
471
472         if (ctdb->do_checkpublicip) {
473
474         ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
475         if (ret != 0) {
476                 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
477                 talloc_free(state);
478                 return;
479         }
480
481         }
482
483         data.dptr  = (uint8_t *)ctdb_addr_to_str(&state->vnn->public_address);
484         data.dsize = strlen((char *)data.dptr) + 1;
485         DEBUG(DEBUG_INFO,(__location__ " sending TAKE_IP for '%s'\n", data.dptr));
486
487         ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_TAKE_IP, data);
488
489
490         /* the control succeeded */
491         ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
492         talloc_free(state);
493         return;
494 }
495
496 static int ctdb_takeip_destructor(struct ctdb_do_takeip_state *state)
497 {
498         state->vnn->update_in_flight = false;
499         return 0;
500 }
501
502 /*
503   take over an ip address
504  */
505 static int32_t ctdb_do_takeip(struct ctdb_context *ctdb,
506                               struct ctdb_req_control_old *c,
507                               struct ctdb_vnn *vnn)
508 {
509         int ret;
510         struct ctdb_do_takeip_state *state;
511
512         if (vnn->update_in_flight) {
513                 DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u rejected "
514                                     "update for this IP already in flight\n",
515                                     ctdb_addr_to_str(&vnn->public_address),
516                                     vnn->public_netmask_bits));
517                 return -1;
518         }
519
520         ret = ctdb_vnn_assign_iface(ctdb, vnn);
521         if (ret != 0) {
522                 DEBUG(DEBUG_ERR,("Takeover of IP %s/%u failed to "
523                                  "assign a usable interface\n",
524                                  ctdb_addr_to_str(&vnn->public_address),
525                                  vnn->public_netmask_bits));
526                 return -1;
527         }
528
529         state = talloc(vnn, struct ctdb_do_takeip_state);
530         CTDB_NO_MEMORY(ctdb, state);
531
532         state->c = NULL;
533         state->vnn   = vnn;
534
535         vnn->update_in_flight = true;
536         talloc_set_destructor(state, ctdb_takeip_destructor);
537
538         DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u on interface %s\n",
539                             ctdb_addr_to_str(&vnn->public_address),
540                             vnn->public_netmask_bits,
541                             ctdb_vnn_iface_string(vnn)));
542
543         ret = ctdb_event_script_callback(ctdb,
544                                          state,
545                                          ctdb_do_takeip_callback,
546                                          state,
547                                          CTDB_EVENT_TAKE_IP,
548                                          "%s %s %u",
549                                          ctdb_vnn_iface_string(vnn),
550                                          ctdb_addr_to_str(&vnn->public_address),
551                                          vnn->public_netmask_bits);
552
553         if (ret != 0) {
554                 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
555                         ctdb_addr_to_str(&vnn->public_address),
556                         ctdb_vnn_iface_string(vnn)));
557                 talloc_free(state);
558                 return -1;
559         }
560
561         state->c = talloc_steal(ctdb, c);
562         return 0;
563 }
564
565 struct ctdb_do_updateip_state {
566         struct ctdb_req_control_old *c;
567         struct ctdb_interface *old;
568         struct ctdb_vnn *vnn;
569 };
570
571 /*
572   called when updateip event finishes
573  */
574 static void ctdb_do_updateip_callback(struct ctdb_context *ctdb, int status,
575                                       void *private_data)
576 {
577         struct ctdb_do_updateip_state *state =
578                 talloc_get_type(private_data, struct ctdb_do_updateip_state);
579
580         if (status != 0) {
581                 if (status == -ETIME) {
582                         ctdb_ban_self(ctdb);
583                 }
584                 DEBUG(DEBUG_ERR,
585                       ("Failed update of IP %s from interface %s to %s\n",
586                        ctdb_addr_to_str(&state->vnn->public_address),
587                        iface_string(state->old),
588                        ctdb_vnn_iface_string(state->vnn)));
589
590                 /*
591                  * All we can do is reset the old interface
592                  * and let the next run fix it
593                  */
594                 ctdb_vnn_unassign_iface(ctdb, state->vnn);
595                 state->vnn->iface = state->old;
596                 state->vnn->iface->references++;
597
598                 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
599                 talloc_free(state);
600                 return;
601         }
602
603         /* the control succeeded */
604         ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
605         talloc_free(state);
606         return;
607 }
608
609 static int ctdb_updateip_destructor(struct ctdb_do_updateip_state *state)
610 {
611         state->vnn->update_in_flight = false;
612         return 0;
613 }
614
615 /*
616   update (move) an ip address
617  */
618 static int32_t ctdb_do_updateip(struct ctdb_context *ctdb,
619                                 struct ctdb_req_control_old *c,
620                                 struct ctdb_vnn *vnn)
621 {
622         int ret;
623         struct ctdb_do_updateip_state *state;
624         struct ctdb_interface *old = vnn->iface;
625         const char *old_name = iface_string(old);
626         const char *new_name;
627
628         if (vnn->update_in_flight) {
629                 DEBUG(DEBUG_NOTICE,("Update of IP %s/%u rejected "
630                                     "update for this IP already in flight\n",
631                                     ctdb_addr_to_str(&vnn->public_address),
632                                     vnn->public_netmask_bits));
633                 return -1;
634         }
635
636         ctdb_vnn_unassign_iface(ctdb, vnn);
637         ret = ctdb_vnn_assign_iface(ctdb, vnn);
638         if (ret != 0) {
639                 DEBUG(DEBUG_ERR,("Update of IP %s/%u failed to "
640                                  "assign a usable interface (old iface '%s')\n",
641                                  ctdb_addr_to_str(&vnn->public_address),
642                                  vnn->public_netmask_bits,
643                                  old_name));
644                 return -1;
645         }
646
647         if (old == vnn->iface) {
648                 /* A benign update from one interface onto itself.
649                  * no need to run the eventscripts in this case, just return
650                  * success.
651                  */
652                 ctdb_request_control_reply(ctdb, c, NULL, 0, NULL);
653                 return 0;
654         }
655
656         state = talloc(vnn, struct ctdb_do_updateip_state);
657         CTDB_NO_MEMORY(ctdb, state);
658
659         state->c = NULL;
660         state->old = old;
661         state->vnn = vnn;
662
663         vnn->update_in_flight = true;
664         talloc_set_destructor(state, ctdb_updateip_destructor);
665
666         new_name = ctdb_vnn_iface_string(vnn);
667         DEBUG(DEBUG_NOTICE,("Update of IP %s/%u from "
668                             "interface %s to %s\n",
669                             ctdb_addr_to_str(&vnn->public_address),
670                             vnn->public_netmask_bits,
671                             old_name,
672                             new_name));
673
674         ret = ctdb_event_script_callback(ctdb,
675                                          state,
676                                          ctdb_do_updateip_callback,
677                                          state,
678                                          CTDB_EVENT_UPDATE_IP,
679                                          "%s %s %s %u",
680                                          old_name,
681                                          new_name,
682                                          ctdb_addr_to_str(&vnn->public_address),
683                                          vnn->public_netmask_bits);
684         if (ret != 0) {
685                 DEBUG(DEBUG_ERR,
686                       ("Failed update IP %s from interface %s to %s\n",
687                        ctdb_addr_to_str(&vnn->public_address),
688                        old_name, new_name));
689                 talloc_free(state);
690                 return -1;
691         }
692
693         state->c = talloc_steal(ctdb, c);
694         return 0;
695 }
696
697 /*
698   Find the vnn of the node that has a public ip address
699   returns -1 if the address is not known as a public address
700  */
701 static struct ctdb_vnn *find_public_ip_vnn(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
702 {
703         struct ctdb_vnn *vnn;
704
705         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
706                 if (ctdb_same_ip(&vnn->public_address, addr)) {
707                         return vnn;
708                 }
709         }
710
711         return NULL;
712 }
713
714 /*
715   take over an ip address
716  */
717 int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb,
718                                  struct ctdb_req_control_old *c,
719                                  TDB_DATA indata,
720                                  bool *async_reply)
721 {
722         int ret;
723         struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
724         struct ctdb_vnn *vnn;
725         bool have_ip = false;
726         bool do_updateip = false;
727         bool do_takeip = false;
728         struct ctdb_interface *best_iface = NULL;
729
730         if (pip->pnn != ctdb->pnn) {
731                 DEBUG(DEBUG_ERR,(__location__" takeoverip called for an ip '%s' "
732                                  "with pnn %d, but we're node %d\n",
733                                  ctdb_addr_to_str(&pip->addr),
734                                  pip->pnn, ctdb->pnn));
735                 return -1;
736         }
737
738         /* update out vnn list */
739         vnn = find_public_ip_vnn(ctdb, &pip->addr);
740         if (vnn == NULL) {
741                 DEBUG(DEBUG_INFO,("takeoverip called for an ip '%s' that is not a public address\n",
742                         ctdb_addr_to_str(&pip->addr)));
743                 return 0;
744         }
745
746         if (ctdb->tunable.disable_ip_failover == 0 && ctdb->do_checkpublicip) {
747                 have_ip = ctdb_sys_have_ip(&pip->addr);
748         }
749         best_iface = ctdb_vnn_best_iface(ctdb, vnn);
750         if (best_iface == NULL) {
751                 DEBUG(DEBUG_ERR,("takeoverip of IP %s/%u failed to find"
752                                  "a usable interface (old %s, have_ip %d)\n",
753                                  ctdb_addr_to_str(&vnn->public_address),
754                                  vnn->public_netmask_bits,
755                                  ctdb_vnn_iface_string(vnn),
756                                  have_ip));
757                 return -1;
758         }
759
760         if (vnn->pnn != ctdb->pnn && have_ip && vnn->pnn != -1) {
761                 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
762                                   "and we have it on iface[%s], but it was assigned to node %d"
763                                   "and we are node %d, banning ourself\n",
764                                  ctdb_addr_to_str(&vnn->public_address),
765                                  ctdb_vnn_iface_string(vnn), vnn->pnn, ctdb->pnn));
766                 ctdb_ban_self(ctdb);
767                 return -1;
768         }
769
770         if (vnn->pnn == -1 && have_ip) {
771                 /* This will cause connections to be reset and
772                  * reestablished.  However, this is a very unusual
773                  * situation and doing this will completely repair the
774                  * inconsistency in the VNN.
775                  */
776                 DEBUG(DEBUG_WARNING,
777                       (__location__
778                        " Doing updateip for IP %s already on an interface\n",
779                        ctdb_addr_to_str(&vnn->public_address)));
780                 do_updateip = true;
781         }
782
783         if (vnn->iface) {
784                 if (vnn->iface != best_iface) {
785                         if (!vnn->iface->link_up) {
786                                 do_updateip = true;
787                         } else if (vnn->iface->references > (best_iface->references + 1)) {
788                                 /* only move when the rebalance gains something */
789                                         do_updateip = true;
790                         }
791                 }
792         }
793
794         if (!have_ip) {
795                 if (do_updateip) {
796                         ctdb_vnn_unassign_iface(ctdb, vnn);
797                         do_updateip = false;
798                 }
799                 do_takeip = true;
800         }
801
802         if (do_takeip) {
803                 ret = ctdb_do_takeip(ctdb, c, vnn);
804                 if (ret != 0) {
805                         return -1;
806                 }
807         } else if (do_updateip) {
808                 ret = ctdb_do_updateip(ctdb, c, vnn);
809                 if (ret != 0) {
810                         return -1;
811                 }
812         } else {
813                 /*
814                  * The interface is up and the kernel known the ip
815                  * => do nothing
816                  */
817                 DEBUG(DEBUG_INFO,("Redundant takeover of IP %s/%u on interface %s (ip already held)\n",
818                         ctdb_addr_to_str(&pip->addr),
819                         vnn->public_netmask_bits,
820                         ctdb_vnn_iface_string(vnn)));
821                 return 0;
822         }
823
824         /* tell ctdb_control.c that we will be replying asynchronously */
825         *async_reply = true;
826
827         return 0;
828 }
829
830 static void do_delete_ip(struct ctdb_context *ctdb, struct ctdb_vnn *vnn)
831 {
832         DLIST_REMOVE(ctdb->vnn, vnn);
833         ctdb_vnn_unassign_iface(ctdb, vnn);
834         ctdb_remove_orphaned_ifaces(ctdb, vnn);
835         talloc_free(vnn);
836 }
837
838 static struct ctdb_vnn *release_ip_post(struct ctdb_context *ctdb,
839                                         struct ctdb_vnn *vnn,
840                                         ctdb_sock_addr *addr)
841 {
842         TDB_DATA data;
843
844         /* Send a message to all clients of this node telling them
845          * that the cluster has been reconfigured and they should
846          * close any connections on this IP address
847          */
848         data.dptr = (uint8_t *)ctdb_addr_to_str(addr);
849         data.dsize = strlen((char *)data.dptr)+1;
850         DEBUG(DEBUG_INFO, ("Sending RELEASE_IP message for %s\n", data.dptr));
851         ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_RELEASE_IP, data);
852
853         ctdb_vnn_unassign_iface(ctdb, vnn);
854
855         /* Process the IP if it has been marked for deletion */
856         if (vnn->delete_pending) {
857                 do_delete_ip(ctdb, vnn);
858                 return NULL;
859         }
860
861         return vnn;
862 }
863
864 struct release_ip_callback_state {
865         struct ctdb_req_control_old *c;
866         ctdb_sock_addr *addr;
867         struct ctdb_vnn *vnn;
868         uint32_t target_pnn;
869 };
870
871 /*
872   called when releaseip event finishes
873  */
874 static void release_ip_callback(struct ctdb_context *ctdb, int status,
875                                 void *private_data)
876 {
877         struct release_ip_callback_state *state =
878                 talloc_get_type(private_data, struct release_ip_callback_state);
879
880         if (status == -ETIME) {
881                 ctdb_ban_self(ctdb);
882         }
883
884         if (ctdb->tunable.disable_ip_failover == 0 && ctdb->do_checkpublicip) {
885                 if  (ctdb_sys_have_ip(state->addr)) {
886                         DEBUG(DEBUG_ERR,
887                               ("IP %s still hosted during release IP callback, failing\n",
888                                ctdb_addr_to_str(state->addr)));
889                         ctdb_request_control_reply(ctdb, state->c,
890                                                    NULL, -1, NULL);
891                         talloc_free(state);
892                         return;
893                 }
894         }
895
896         state->vnn->pnn = state->target_pnn;
897         state->vnn = release_ip_post(ctdb, state->vnn, state->addr);
898
899         /* the control succeeded */
900         ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
901         talloc_free(state);
902 }
903
904 static int ctdb_releaseip_destructor(struct release_ip_callback_state *state)
905 {
906         if (state->vnn != NULL) {
907                 state->vnn->update_in_flight = false;
908         }
909         return 0;
910 }
911
912 /*
913   release an ip address
914  */
915 int32_t ctdb_control_release_ip(struct ctdb_context *ctdb, 
916                                 struct ctdb_req_control_old *c,
917                                 TDB_DATA indata, 
918                                 bool *async_reply)
919 {
920         int ret;
921         struct release_ip_callback_state *state;
922         struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
923         struct ctdb_vnn *vnn;
924         const char *iface;
925
926         /* update our vnn list */
927         vnn = find_public_ip_vnn(ctdb, &pip->addr);
928         if (vnn == NULL) {
929                 DEBUG(DEBUG_INFO,("releaseip called for an ip '%s' that is not a public address\n",
930                         ctdb_addr_to_str(&pip->addr)));
931                 return 0;
932         }
933
934         /* stop any previous arps */
935         talloc_free(vnn->takeover_ctx);
936         vnn->takeover_ctx = NULL;
937
938         /* RELEASE_IP controls are sent to all nodes that should not
939          * be hosting a particular IP.  This serves 2 purposes.  The
940          * first is to help resolve any inconsistencies.  If a node
941          * does unexpectly host an IP then it will be released.  The
942          * 2nd is to use a "redundant release" to tell non-takeover
943          * nodes where an IP is moving to.  This is how "ctdb ip" can
944          * report the (likely) location of an IP by only asking the
945          * local node.  Redundant releases need to update the PNN but
946          * are otherwise ignored.
947          */
948         if (ctdb->tunable.disable_ip_failover == 0 && ctdb->do_checkpublicip) {
949                 if (!ctdb_sys_have_ip(&pip->addr)) {
950                         DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u on interface %s (ip not held)\n",
951                                 ctdb_addr_to_str(&pip->addr),
952                                 vnn->public_netmask_bits,
953                                 ctdb_vnn_iface_string(vnn)));
954                         vnn->pnn = pip->pnn;
955                         ctdb_vnn_unassign_iface(ctdb, vnn);
956                         return 0;
957                 }
958         } else {
959                 if (vnn->iface == NULL) {
960                         DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u (ip not held)\n",
961                                            ctdb_addr_to_str(&pip->addr),
962                                            vnn->public_netmask_bits));
963                         vnn->pnn = pip->pnn;
964                         return 0;
965                 }
966         }
967
968         /* There is a potential race between take_ip and us because we
969          * update the VNN via a callback that run when the
970          * eventscripts have been run.  Avoid the race by allowing one
971          * update to be in flight at a time.
972          */
973         if (vnn->update_in_flight) {
974                 DEBUG(DEBUG_NOTICE,("Release of IP %s/%u rejected "
975                                     "update for this IP already in flight\n",
976                                     ctdb_addr_to_str(&vnn->public_address),
977                                     vnn->public_netmask_bits));
978                 return -1;
979         }
980
981         iface = ctdb_vnn_iface_string(vnn);
982
983         DEBUG(DEBUG_NOTICE,("Release of IP %s/%u on interface %s  node:%d\n",
984                 ctdb_addr_to_str(&pip->addr),
985                 vnn->public_netmask_bits,
986                 iface,
987                 pip->pnn));
988
989         state = talloc(ctdb, struct release_ip_callback_state);
990         if (state == NULL) {
991                 ctdb_set_error(ctdb, "Out of memory at %s:%d",
992                                __FILE__, __LINE__);
993                 return -1;
994         }
995
996         state->c = NULL;
997         state->addr = talloc(state, ctdb_sock_addr);
998         if (state->addr == NULL) {
999                 ctdb_set_error(ctdb, "Out of memory at %s:%d",
1000                                __FILE__, __LINE__);
1001                 talloc_free(state);
1002                 return -1;
1003         }
1004         *state->addr = pip->addr;
1005         state->target_pnn = pip->pnn;
1006         state->vnn   = vnn;
1007
1008         vnn->update_in_flight = true;
1009         talloc_set_destructor(state, ctdb_releaseip_destructor);
1010
1011         ret = ctdb_event_script_callback(ctdb, 
1012                                          state, release_ip_callback, state,
1013                                          CTDB_EVENT_RELEASE_IP,
1014                                          "%s %s %u",
1015                                          iface,
1016                                          ctdb_addr_to_str(&pip->addr),
1017                                          vnn->public_netmask_bits);
1018         if (ret != 0) {
1019                 DEBUG(DEBUG_ERR,(__location__ " Failed to release IP %s on interface %s\n",
1020                         ctdb_addr_to_str(&pip->addr),
1021                         ctdb_vnn_iface_string(vnn)));
1022                 talloc_free(state);
1023                 return -1;
1024         }
1025
1026         /* tell the control that we will be reply asynchronously */
1027         *async_reply = true;
1028         state->c = talloc_steal(state, c);
1029         return 0;
1030 }
1031
1032 static int ctdb_add_public_address(struct ctdb_context *ctdb,
1033                                    ctdb_sock_addr *addr,
1034                                    unsigned mask, const char *ifaces,
1035                                    bool check_address)
1036 {
1037         struct ctdb_vnn      *vnn;
1038         char *tmp;
1039         const char *iface;
1040
1041         /* Verify that we don't have an entry for this IP yet */
1042         for (vnn = ctdb->vnn; vnn != NULL; vnn = vnn->next) {
1043                 if (ctdb_same_sockaddr(addr, &vnn->public_address)) {
1044                         DEBUG(DEBUG_ERR,
1045                               ("Duplicate public IP address '%s'\n",
1046                                ctdb_addr_to_str(addr)));
1047                         return -1;
1048                 }
1049         }
1050
1051         /* Create a new VNN structure for this IP address */
1052         vnn = talloc_zero(ctdb, struct ctdb_vnn);
1053         if (vnn == NULL) {
1054                 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
1055                 return -1;
1056         }
1057         tmp = talloc_strdup(vnn, ifaces);
1058         if (tmp == NULL) {
1059                 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
1060                 talloc_free(vnn);
1061                 return -1;
1062         }
1063         for (iface = strtok(tmp, ","); iface; iface = strtok(NULL, ",")) {
1064                 struct vnn_interface *vnn_iface;
1065                 struct ctdb_interface *i;
1066                 if (!ctdb_sys_check_iface_exists(iface)) {
1067                         DEBUG(DEBUG_ERR,
1068                               ("Unknown interface %s for public address %s\n",
1069                                iface, ctdb_addr_to_str(addr)));
1070                         talloc_free(vnn);
1071                         return -1;
1072                 }
1073
1074                 i = ctdb_add_local_iface(ctdb, iface);
1075                 if (i == NULL) {
1076                         DEBUG(DEBUG_ERR,
1077                               ("Failed to add interface '%s' "
1078                                "for public address %s\n",
1079                                iface, ctdb_addr_to_str(addr)));
1080                         talloc_free(vnn);
1081                         return -1;
1082                 }
1083
1084                 vnn_iface = talloc_zero(vnn, struct vnn_interface);
1085                 if (vnn_iface == NULL) {
1086                         DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
1087                         talloc_free(vnn);
1088                         return -1;
1089                 }
1090
1091                 vnn_iface->iface = i;
1092                 DLIST_ADD_END(vnn->ifaces, vnn_iface);
1093         }
1094         talloc_free(tmp);
1095         vnn->public_address      = *addr;
1096         vnn->public_netmask_bits = mask;
1097         vnn->pnn                 = -1;
1098
1099         DLIST_ADD(ctdb->vnn, vnn);
1100
1101         return 0;
1102 }
1103
1104 /*
1105   setup the public address lists from a file
1106 */
1107 int ctdb_set_public_addresses(struct ctdb_context *ctdb, bool check_addresses)
1108 {
1109         char **lines;
1110         int nlines;
1111         int i;
1112
1113         lines = file_lines_load(ctdb->public_addresses_file, &nlines, 0, ctdb);
1114         if (lines == NULL) {
1115                 ctdb_set_error(ctdb, "Failed to load public address list '%s'\n", ctdb->public_addresses_file);
1116                 return -1;
1117         }
1118         while (nlines > 0 && strcmp(lines[nlines-1], "") == 0) {
1119                 nlines--;
1120         }
1121
1122         for (i=0;i<nlines;i++) {
1123                 unsigned mask;
1124                 ctdb_sock_addr addr;
1125                 const char *addrstr;
1126                 const char *ifaces;
1127                 char *tok, *line;
1128
1129                 line = lines[i];
1130                 while ((*line == ' ') || (*line == '\t')) {
1131                         line++;
1132                 }
1133                 if (*line == '#') {
1134                         continue;
1135                 }
1136                 if (strcmp(line, "") == 0) {
1137                         continue;
1138                 }
1139                 tok = strtok(line, " \t");
1140                 addrstr = tok;
1141                 tok = strtok(NULL, " \t");
1142                 if (tok == NULL) {
1143                         if (NULL == ctdb->default_public_interface) {
1144                                 DEBUG(DEBUG_CRIT,("No default public interface and no interface specified at line %u of public address list\n",
1145                                          i+1));
1146                                 talloc_free(lines);
1147                                 return -1;
1148                         }
1149                         ifaces = ctdb->default_public_interface;
1150                 } else {
1151                         ifaces = tok;
1152                 }
1153
1154                 if (!addrstr || !parse_ip_mask(addrstr, ifaces, &addr, &mask)) {
1155                         DEBUG(DEBUG_CRIT,("Badly formed line %u in public address list\n", i+1));
1156                         talloc_free(lines);
1157                         return -1;
1158                 }
1159                 if (ctdb_add_public_address(ctdb, &addr, mask, ifaces, check_addresses)) {
1160                         DEBUG(DEBUG_CRIT,("Failed to add line %u to the public address list\n", i+1));
1161                         talloc_free(lines);
1162                         return -1;
1163                 }
1164         }
1165
1166
1167         talloc_free(lines);
1168         return 0;
1169 }
1170
1171 /*
1172   destroy a ctdb_client_ip structure
1173  */
1174 static int ctdb_client_ip_destructor(struct ctdb_client_ip *ip)
1175 {
1176         DEBUG(DEBUG_DEBUG,("destroying client tcp for %s:%u (client_id %u)\n",
1177                 ctdb_addr_to_str(&ip->addr),
1178                 ntohs(ip->addr.ip.sin_port),
1179                 ip->client_id));
1180
1181         DLIST_REMOVE(ip->ctdb->client_ip_list, ip);
1182         return 0;
1183 }
1184
1185 /*
1186   called by a client to inform us of a TCP connection that it is managing
1187   that should tickled with an ACK when IP takeover is done
1188  */
1189 int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id,
1190                                 TDB_DATA indata)
1191 {
1192         struct ctdb_client *client = reqid_find(ctdb->idr, client_id, struct ctdb_client);
1193         struct ctdb_connection *tcp_sock = NULL;
1194         struct ctdb_tcp_list *tcp;
1195         struct ctdb_connection t;
1196         int ret;
1197         TDB_DATA data;
1198         struct ctdb_client_ip *ip;
1199         struct ctdb_vnn *vnn;
1200         ctdb_sock_addr addr;
1201
1202         /* If we don't have public IPs, tickles are useless */
1203         if (ctdb->vnn == NULL) {
1204                 return 0;
1205         }
1206
1207         tcp_sock = (struct ctdb_connection *)indata.dptr;
1208
1209         addr = tcp_sock->src;
1210         ctdb_canonicalize_ip(&addr,  &tcp_sock->src);
1211         addr = tcp_sock->dst;
1212         ctdb_canonicalize_ip(&addr, &tcp_sock->dst);
1213
1214         ZERO_STRUCT(addr);
1215         memcpy(&addr, &tcp_sock->dst, sizeof(addr));
1216         vnn = find_public_ip_vnn(ctdb, &addr);
1217         if (vnn == NULL) {
1218                 switch (addr.sa.sa_family) {
1219                 case AF_INET:
1220                         if (ntohl(addr.ip.sin_addr.s_addr) != INADDR_LOOPBACK) {
1221                                 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public address.\n", 
1222                                         ctdb_addr_to_str(&addr)));
1223                         }
1224                         break;
1225                 case AF_INET6:
1226                         DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public ipv6 address.\n", 
1227                                 ctdb_addr_to_str(&addr)));
1228                         break;
1229                 default:
1230                         DEBUG(DEBUG_ERR,(__location__ " Unknown family type %d\n", addr.sa.sa_family));
1231                 }
1232
1233                 return 0;
1234         }
1235
1236         if (vnn->pnn != ctdb->pnn) {
1237                 DEBUG(DEBUG_ERR,("Attempt to register tcp client for IP %s we don't hold - failing (client_id %u pid %u)\n",
1238                         ctdb_addr_to_str(&addr),
1239                         client_id, client->pid));
1240                 /* failing this call will tell smbd to die */
1241                 return -1;
1242         }
1243
1244         ip = talloc(client, struct ctdb_client_ip);
1245         CTDB_NO_MEMORY(ctdb, ip);
1246
1247         ip->ctdb      = ctdb;
1248         ip->addr      = addr;
1249         ip->client_id = client_id;
1250         talloc_set_destructor(ip, ctdb_client_ip_destructor);
1251         DLIST_ADD(ctdb->client_ip_list, ip);
1252
1253         tcp = talloc(client, struct ctdb_tcp_list);
1254         CTDB_NO_MEMORY(ctdb, tcp);
1255
1256         tcp->connection.src = tcp_sock->src;
1257         tcp->connection.dst = tcp_sock->dst;
1258
1259         DLIST_ADD(client->tcp_list, tcp);
1260
1261         t.src = tcp_sock->src;
1262         t.dst = tcp_sock->dst;
1263
1264         data.dptr = (uint8_t *)&t;
1265         data.dsize = sizeof(t);
1266
1267         switch (addr.sa.sa_family) {
1268         case AF_INET:
1269                 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1270                         (unsigned)ntohs(tcp_sock->dst.ip.sin_port),
1271                         ctdb_addr_to_str(&tcp_sock->src),
1272                         (unsigned)ntohs(tcp_sock->src.ip.sin_port), client_id, client->pid));
1273                 break;
1274         case AF_INET6:
1275                 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1276                         (unsigned)ntohs(tcp_sock->dst.ip6.sin6_port),
1277                         ctdb_addr_to_str(&tcp_sock->src),
1278                         (unsigned)ntohs(tcp_sock->src.ip6.sin6_port), client_id, client->pid));
1279                 break;
1280         default:
1281                 DEBUG(DEBUG_ERR,(__location__ " Unknown family %d\n", addr.sa.sa_family));
1282         }
1283
1284
1285         /* tell all nodes about this tcp connection */
1286         ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0, 
1287                                        CTDB_CONTROL_TCP_ADD,
1288                                        0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
1289         if (ret != 0) {
1290                 DEBUG(DEBUG_ERR,(__location__ " Failed to send CTDB_CONTROL_TCP_ADD\n"));
1291                 return -1;
1292         }
1293
1294         return 0;
1295 }
1296
1297 /*
1298   find a tcp address on a list
1299  */
1300 static struct ctdb_connection *ctdb_tcp_find(struct ctdb_tcp_array *array,
1301                                            struct ctdb_connection *tcp)
1302 {
1303         int i;
1304
1305         if (array == NULL) {
1306                 return NULL;
1307         }
1308
1309         for (i=0;i<array->num;i++) {
1310                 if (ctdb_same_sockaddr(&array->connections[i].src, &tcp->src) &&
1311                     ctdb_same_sockaddr(&array->connections[i].dst, &tcp->dst)) {
1312                         return &array->connections[i];
1313                 }
1314         }
1315         return NULL;
1316 }
1317
1318
1319
1320 /*
1321   called by a daemon to inform us of a TCP connection that one of its
1322   clients managing that should tickled with an ACK when IP takeover is
1323   done
1324  */
1325 int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata, bool tcp_update_needed)
1326 {
1327         struct ctdb_connection *p = (struct ctdb_connection *)indata.dptr;
1328         struct ctdb_tcp_array *tcparray;
1329         struct ctdb_connection tcp;
1330         struct ctdb_vnn *vnn;
1331
1332         /* If we don't have public IPs, tickles are useless */
1333         if (ctdb->vnn == NULL) {
1334                 return 0;
1335         }
1336
1337         vnn = find_public_ip_vnn(ctdb, &p->dst);
1338         if (vnn == NULL) {
1339                 DEBUG(DEBUG_INFO,(__location__ " got TCP_ADD control for an address which is not a public address '%s'\n",
1340                         ctdb_addr_to_str(&p->dst)));
1341
1342                 return -1;
1343         }
1344
1345
1346         tcparray = vnn->tcp_array;
1347
1348         /* If this is the first tickle */
1349         if (tcparray == NULL) {
1350                 tcparray = talloc(vnn, struct ctdb_tcp_array);
1351                 CTDB_NO_MEMORY(ctdb, tcparray);
1352                 vnn->tcp_array = tcparray;
1353
1354                 tcparray->num = 0;
1355                 tcparray->connections = talloc_size(tcparray, sizeof(struct ctdb_connection));
1356                 CTDB_NO_MEMORY(ctdb, tcparray->connections);
1357
1358                 tcparray->connections[tcparray->num].src = p->src;
1359                 tcparray->connections[tcparray->num].dst = p->dst;
1360                 tcparray->num++;
1361
1362                 if (tcp_update_needed) {
1363                         vnn->tcp_update_needed = true;
1364                 }
1365                 return 0;
1366         }
1367
1368
1369         /* Do we already have this tickle ?*/
1370         tcp.src = p->src;
1371         tcp.dst = p->dst;
1372         if (ctdb_tcp_find(tcparray, &tcp) != NULL) {
1373                 DEBUG(DEBUG_DEBUG,("Already had tickle info for %s:%u for vnn:%u\n",
1374                         ctdb_addr_to_str(&tcp.dst),
1375                         ntohs(tcp.dst.ip.sin_port),
1376                         vnn->pnn));
1377                 return 0;
1378         }
1379
1380         /* A new tickle, we must add it to the array */
1381         tcparray->connections = talloc_realloc(tcparray, tcparray->connections,
1382                                         struct ctdb_connection,
1383                                         tcparray->num+1);
1384         CTDB_NO_MEMORY(ctdb, tcparray->connections);
1385
1386         tcparray->connections[tcparray->num].src = p->src;
1387         tcparray->connections[tcparray->num].dst = p->dst;
1388         tcparray->num++;
1389
1390         DEBUG(DEBUG_INFO,("Added tickle info for %s:%u from vnn %u\n",
1391                 ctdb_addr_to_str(&tcp.dst),
1392                 ntohs(tcp.dst.ip.sin_port),
1393                 vnn->pnn));
1394
1395         if (tcp_update_needed) {
1396                 vnn->tcp_update_needed = true;
1397         }
1398
1399         return 0;
1400 }
1401
1402
1403 static void ctdb_remove_connection(struct ctdb_vnn *vnn, struct ctdb_connection *conn)
1404 {
1405         struct ctdb_connection *tcpp;
1406
1407         if (vnn == NULL) {
1408                 return;
1409         }
1410
1411         /* if the array is empty we cant remove it
1412            and we don't need to do anything
1413          */
1414         if (vnn->tcp_array == NULL) {
1415                 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesn't exist (array is empty) %s:%u\n",
1416                         ctdb_addr_to_str(&conn->dst),
1417                         ntohs(conn->dst.ip.sin_port)));
1418                 return;
1419         }
1420
1421
1422         /* See if we know this connection
1423            if we don't know this connection  then we dont need to do anything
1424          */
1425         tcpp = ctdb_tcp_find(vnn->tcp_array, conn);
1426         if (tcpp == NULL) {
1427                 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesn't exist %s:%u\n",
1428                         ctdb_addr_to_str(&conn->dst),
1429                         ntohs(conn->dst.ip.sin_port)));
1430                 return;
1431         }
1432
1433
1434         /* We need to remove this entry from the array.
1435            Instead of allocating a new array and copying data to it
1436            we cheat and just copy the last entry in the existing array
1437            to the entry that is to be removed and just shring the 
1438            ->num field
1439          */
1440         *tcpp = vnn->tcp_array->connections[vnn->tcp_array->num - 1];
1441         vnn->tcp_array->num--;
1442
1443         /* If we deleted the last entry we also need to remove the entire array
1444          */
1445         if (vnn->tcp_array->num == 0) {
1446                 talloc_free(vnn->tcp_array);
1447                 vnn->tcp_array = NULL;
1448         }               
1449
1450         vnn->tcp_update_needed = true;
1451
1452         DEBUG(DEBUG_INFO,("Removed tickle info for %s:%u\n",
1453                 ctdb_addr_to_str(&conn->src),
1454                 ntohs(conn->src.ip.sin_port)));
1455 }
1456
1457
1458 /*
1459   called by a daemon to inform us of a TCP connection that one of its
1460   clients used are no longer needed in the tickle database
1461  */
1462 int32_t ctdb_control_tcp_remove(struct ctdb_context *ctdb, TDB_DATA indata)
1463 {
1464         struct ctdb_vnn *vnn;
1465         struct ctdb_connection *conn = (struct ctdb_connection *)indata.dptr;
1466
1467         /* If we don't have public IPs, tickles are useless */
1468         if (ctdb->vnn == NULL) {
1469                 return 0;
1470         }
1471
1472         vnn = find_public_ip_vnn(ctdb, &conn->dst);
1473         if (vnn == NULL) {
1474                 DEBUG(DEBUG_ERR,
1475                       (__location__ " unable to find public address %s\n",
1476                        ctdb_addr_to_str(&conn->dst)));
1477                 return 0;
1478         }
1479
1480         ctdb_remove_connection(vnn, conn);
1481
1482         return 0;
1483 }
1484
1485
1486 /*
1487   Called when another daemon starts - causes all tickles for all
1488   public addresses we are serving to be sent to the new node on the
1489   next check.  This actually causes the next scheduled call to
1490   tdb_update_tcp_tickles() to update all nodes.  This is simple and
1491   doesn't require careful error handling.
1492  */
1493 int32_t ctdb_control_startup(struct ctdb_context *ctdb, uint32_t pnn)
1494 {
1495         struct ctdb_vnn *vnn;
1496
1497         DEBUG(DEBUG_INFO, ("Received startup control from node %lu\n",
1498                            (unsigned long) pnn));
1499
1500         for (vnn = ctdb->vnn; vnn != NULL; vnn = vnn->next) {
1501                 vnn->tcp_update_needed = true;
1502         }
1503
1504         return 0;
1505 }
1506
1507
1508 /*
1509   called when a client structure goes away - hook to remove
1510   elements from the tcp_list in all daemons
1511  */
1512 void ctdb_takeover_client_destructor_hook(struct ctdb_client *client)
1513 {
1514         while (client->tcp_list) {
1515                 struct ctdb_vnn *vnn;
1516                 struct ctdb_tcp_list *tcp = client->tcp_list;
1517                 struct ctdb_connection *conn = &tcp->connection;
1518
1519                 DLIST_REMOVE(client->tcp_list, tcp);
1520
1521                 vnn = find_public_ip_vnn(client->ctdb,
1522                                          &conn->dst);
1523                 if (vnn == NULL) {
1524                         DEBUG(DEBUG_ERR,
1525                               (__location__ " unable to find public address %s\n",
1526                                ctdb_addr_to_str(&conn->dst)));
1527                         continue;
1528                 }
1529
1530                 /* If the IP address is hosted on this node then
1531                  * remove the connection. */
1532                 if (vnn->pnn == client->ctdb->pnn) {
1533                         ctdb_remove_connection(vnn, conn);
1534                 }
1535
1536                 /* Otherwise this function has been called because the
1537                  * server IP address has been released to another node
1538                  * and the client has exited.  This means that we
1539                  * should not delete the connection information.  The
1540                  * takeover node processes connections too. */
1541         }
1542 }
1543
1544
1545 void ctdb_release_all_ips(struct ctdb_context *ctdb)
1546 {
1547         struct ctdb_vnn *vnn, *next;
1548         int count = 0;
1549
1550         if (ctdb->tunable.disable_ip_failover == 1) {
1551                 return;
1552         }
1553
1554         for (vnn = ctdb->vnn; vnn != NULL; vnn = next) {
1555                 /* vnn can be freed below in release_ip_post() */
1556                 next = vnn->next;
1557
1558                 if (!ctdb_sys_have_ip(&vnn->public_address)) {
1559                         ctdb_vnn_unassign_iface(ctdb, vnn);
1560                         continue;
1561                 }
1562
1563                 /* Don't allow multiple releases at once.  Some code,
1564                  * particularly ctdb_tickle_sentenced_connections() is
1565                  * not re-entrant */
1566                 if (vnn->update_in_flight) {
1567                         DEBUG(DEBUG_WARNING,
1568                               (__location__
1569                                " Not releasing IP %s/%u on interface %s, an update is already in progress\n",
1570                                     ctdb_addr_to_str(&vnn->public_address),
1571                                     vnn->public_netmask_bits,
1572                                     ctdb_vnn_iface_string(vnn)));
1573                         continue;
1574                 }
1575                 vnn->update_in_flight = true;
1576
1577                 DEBUG(DEBUG_INFO,("Release of IP %s/%u on interface %s node:-1\n",
1578                                     ctdb_addr_to_str(&vnn->public_address),
1579                                     vnn->public_netmask_bits,
1580                                     ctdb_vnn_iface_string(vnn)));
1581
1582                 ctdb_event_script_args(ctdb, CTDB_EVENT_RELEASE_IP, "%s %s %u",
1583                                        ctdb_vnn_iface_string(vnn),
1584                                        ctdb_addr_to_str(&vnn->public_address),
1585                                        vnn->public_netmask_bits);
1586                 /* releaseip timeouts are converted to success, so to
1587                  * detect failures just check if the IP address is
1588                  * still there...
1589                  */
1590                 if (ctdb_sys_have_ip(&vnn->public_address)) {
1591                         DEBUG(DEBUG_ERR,
1592                               (__location__
1593                                " IP address %s not released\n",
1594                                ctdb_addr_to_str(&vnn->public_address)));
1595                         vnn->update_in_flight = false;
1596                         continue;
1597                 }
1598
1599                 vnn = release_ip_post(ctdb, vnn, &vnn->public_address);
1600                 if (vnn != NULL) {
1601                         vnn->update_in_flight = false;
1602                 }
1603                 count++;
1604         }
1605
1606         DEBUG(DEBUG_NOTICE,(__location__ " Released %d public IPs\n", count));
1607 }
1608
1609
1610 /*
1611   get list of public IPs
1612  */
1613 int32_t ctdb_control_get_public_ips(struct ctdb_context *ctdb, 
1614                                     struct ctdb_req_control_old *c, TDB_DATA *outdata)
1615 {
1616         int i, num, len;
1617         struct ctdb_public_ip_list_old *ips;
1618         struct ctdb_vnn *vnn;
1619         bool only_available = false;
1620
1621         if (c->flags & CTDB_PUBLIC_IP_FLAGS_ONLY_AVAILABLE) {
1622                 only_available = true;
1623         }
1624
1625         /* count how many public ip structures we have */
1626         num = 0;
1627         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1628                 num++;
1629         }
1630
1631         len = offsetof(struct ctdb_public_ip_list_old, ips) +
1632                 num*sizeof(struct ctdb_public_ip);
1633         ips = talloc_zero_size(outdata, len);
1634         CTDB_NO_MEMORY(ctdb, ips);
1635
1636         i = 0;
1637         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1638                 if (only_available && !ctdb_vnn_available(ctdb, vnn)) {
1639                         continue;
1640                 }
1641                 ips->ips[i].pnn  = vnn->pnn;
1642                 ips->ips[i].addr = vnn->public_address;
1643                 i++;
1644         }
1645         ips->num = i;
1646         len = offsetof(struct ctdb_public_ip_list_old, ips) +
1647                 i*sizeof(struct ctdb_public_ip);
1648
1649         outdata->dsize = len;
1650         outdata->dptr  = (uint8_t *)ips;
1651
1652         return 0;
1653 }
1654
1655
1656 int32_t ctdb_control_get_public_ip_info(struct ctdb_context *ctdb,
1657                                         struct ctdb_req_control_old *c,
1658                                         TDB_DATA indata,
1659                                         TDB_DATA *outdata)
1660 {
1661         int i, num, len;
1662         ctdb_sock_addr *addr;
1663         struct ctdb_public_ip_info_old *info;
1664         struct ctdb_vnn *vnn;
1665         struct vnn_interface *iface;
1666
1667         addr = (ctdb_sock_addr *)indata.dptr;
1668
1669         vnn = find_public_ip_vnn(ctdb, addr);
1670         if (vnn == NULL) {
1671                 DEBUG(DEBUG_ERR,(__location__ " Could not get public ip info, "
1672                                  "'%s'not a public address\n",
1673                                  ctdb_addr_to_str(addr)));
1674                 return -1;
1675         }
1676
1677         /* count how many public ip structures we have */
1678         num = 0;
1679         for (iface = vnn->ifaces; iface != NULL; iface = iface->next) {
1680                 num++;
1681         }
1682
1683         len = offsetof(struct ctdb_public_ip_info_old, ifaces) +
1684                 num*sizeof(struct ctdb_iface);
1685         info = talloc_zero_size(outdata, len);
1686         CTDB_NO_MEMORY(ctdb, info);
1687
1688         info->ip.addr = vnn->public_address;
1689         info->ip.pnn = vnn->pnn;
1690         info->active_idx = 0xFFFFFFFF;
1691
1692         i = 0;
1693         for (iface = vnn->ifaces; iface != NULL; iface = iface->next) {
1694                 struct ctdb_interface *cur;
1695
1696                 cur = iface->iface;
1697                 if (vnn->iface == cur) {
1698                         info->active_idx = i;
1699                 }
1700                 strncpy(info->ifaces[i].name, cur->name,
1701                         sizeof(info->ifaces[i].name));
1702                 info->ifaces[i].name[sizeof(info->ifaces[i].name)-1] = '\0';
1703                 info->ifaces[i].link_state = cur->link_up;
1704                 info->ifaces[i].references = cur->references;
1705
1706                 i++;
1707         }
1708         info->num = i;
1709         len = offsetof(struct ctdb_public_ip_info_old, ifaces) +
1710                 i*sizeof(struct ctdb_iface);
1711
1712         outdata->dsize = len;
1713         outdata->dptr  = (uint8_t *)info;
1714
1715         return 0;
1716 }
1717
1718 int32_t ctdb_control_get_ifaces(struct ctdb_context *ctdb,
1719                                 struct ctdb_req_control_old *c,
1720                                 TDB_DATA *outdata)
1721 {
1722         int i, num, len;
1723         struct ctdb_iface_list_old *ifaces;
1724         struct ctdb_interface *cur;
1725
1726         /* count how many public ip structures we have */
1727         num = 0;
1728         for (cur=ctdb->ifaces;cur;cur=cur->next) {
1729                 num++;
1730         }
1731
1732         len = offsetof(struct ctdb_iface_list_old, ifaces) +
1733                 num*sizeof(struct ctdb_iface);
1734         ifaces = talloc_zero_size(outdata, len);
1735         CTDB_NO_MEMORY(ctdb, ifaces);
1736
1737         i = 0;
1738         for (cur=ctdb->ifaces;cur;cur=cur->next) {
1739                 strncpy(ifaces->ifaces[i].name, cur->name,
1740                         sizeof(ifaces->ifaces[i].name));
1741                 ifaces->ifaces[i].name[sizeof(ifaces->ifaces[i].name)-1] = '\0';
1742                 ifaces->ifaces[i].link_state = cur->link_up;
1743                 ifaces->ifaces[i].references = cur->references;
1744                 i++;
1745         }
1746         ifaces->num = i;
1747         len = offsetof(struct ctdb_iface_list_old, ifaces) +
1748                 i*sizeof(struct ctdb_iface);
1749
1750         outdata->dsize = len;
1751         outdata->dptr  = (uint8_t *)ifaces;
1752
1753         return 0;
1754 }
1755
1756 int32_t ctdb_control_set_iface_link(struct ctdb_context *ctdb,
1757                                     struct ctdb_req_control_old *c,
1758                                     TDB_DATA indata)
1759 {
1760         struct ctdb_iface *info;
1761         struct ctdb_interface *iface;
1762         bool link_up = false;
1763
1764         info = (struct ctdb_iface *)indata.dptr;
1765
1766         if (info->name[CTDB_IFACE_SIZE] != '\0') {
1767                 int len = strnlen(info->name, CTDB_IFACE_SIZE);
1768                 DEBUG(DEBUG_ERR, (__location__ " name[%*.*s] not terminated\n",
1769                                   len, len, info->name));
1770                 return -1;
1771         }
1772
1773         switch (info->link_state) {
1774         case 0:
1775                 link_up = false;
1776                 break;
1777         case 1:
1778                 link_up = true;
1779                 break;
1780         default:
1781                 DEBUG(DEBUG_ERR, (__location__ " link_state[%u] invalid\n",
1782                                   (unsigned int)info->link_state));
1783                 return -1;
1784         }
1785
1786         if (info->references != 0) {
1787                 DEBUG(DEBUG_ERR, (__location__ " references[%u] should be 0\n",
1788                                   (unsigned int)info->references));
1789                 return -1;
1790         }
1791
1792         iface = ctdb_find_iface(ctdb, info->name);
1793         if (iface == NULL) {
1794                 return -1;
1795         }
1796
1797         if (link_up == iface->link_up) {
1798                 return 0;
1799         }
1800
1801         DEBUG(DEBUG_ERR,
1802               ("iface[%s] has changed it's link status %s => %s\n",
1803                iface->name,
1804                iface->link_up?"up":"down",
1805                link_up?"up":"down"));
1806
1807         iface->link_up = link_up;
1808         return 0;
1809 }
1810
1811
1812 /*
1813   called by a daemon to inform us of the entire list of TCP tickles for
1814   a particular public address.
1815   this control should only be sent by the node that is currently serving
1816   that public address.
1817  */
1818 int32_t ctdb_control_set_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata)
1819 {
1820         struct ctdb_tickle_list_old *list = (struct ctdb_tickle_list_old *)indata.dptr;
1821         struct ctdb_tcp_array *tcparray;
1822         struct ctdb_vnn *vnn;
1823
1824         /* We must at least have tickles.num or else we cant verify the size
1825            of the received data blob
1826          */
1827         if (indata.dsize < offsetof(struct ctdb_tickle_list_old, connections)) {
1828                 DEBUG(DEBUG_ERR,("Bad indata in ctdb_tickle_list. Not enough data for the tickle.num field\n"));
1829                 return -1;
1830         }
1831
1832         /* verify that the size of data matches what we expect */
1833         if (indata.dsize < offsetof(struct ctdb_tickle_list_old, connections)
1834                          + sizeof(struct ctdb_connection) * list->num) {
1835                 DEBUG(DEBUG_ERR,("Bad indata in ctdb_tickle_list\n"));
1836                 return -1;
1837         }
1838
1839         DEBUG(DEBUG_INFO, ("Received tickle update for public address %s\n",
1840                            ctdb_addr_to_str(&list->addr)));
1841
1842         vnn = find_public_ip_vnn(ctdb, &list->addr);
1843         if (vnn == NULL) {
1844                 DEBUG(DEBUG_INFO,(__location__ " Could not set tcp tickle list, '%s' is not a public address\n",
1845                         ctdb_addr_to_str(&list->addr)));
1846
1847                 return 1;
1848         }
1849
1850         if (vnn->pnn == ctdb->pnn) {
1851                 DEBUG(DEBUG_INFO,
1852                       ("Ignoring redundant set tcp tickle list, this node hosts '%s'\n",
1853                        ctdb_addr_to_str(&list->addr)));
1854                 return 0;
1855         }
1856
1857         /* remove any old ticklelist we might have */
1858         talloc_free(vnn->tcp_array);
1859         vnn->tcp_array = NULL;
1860
1861         tcparray = talloc(vnn, struct ctdb_tcp_array);
1862         CTDB_NO_MEMORY(ctdb, tcparray);
1863
1864         tcparray->num = list->num;
1865
1866         tcparray->connections = talloc_array(tcparray, struct ctdb_connection, tcparray->num);
1867         CTDB_NO_MEMORY(ctdb, tcparray->connections);
1868
1869         memcpy(tcparray->connections, &list->connections[0],
1870                sizeof(struct ctdb_connection)*tcparray->num);
1871
1872         /* We now have a new fresh tickle list array for this vnn */
1873         vnn->tcp_array = tcparray;
1874
1875         return 0;
1876 }
1877
1878 /*
1879   called to return the full list of tickles for the puclic address associated 
1880   with the provided vnn
1881  */
1882 int32_t ctdb_control_get_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata)
1883 {
1884         ctdb_sock_addr *addr = (ctdb_sock_addr *)indata.dptr;
1885         struct ctdb_tickle_list_old *list;
1886         struct ctdb_tcp_array *tcparray;
1887         int num, i;
1888         struct ctdb_vnn *vnn;
1889         unsigned port;
1890
1891         vnn = find_public_ip_vnn(ctdb, addr);
1892         if (vnn == NULL) {
1893                 DEBUG(DEBUG_ERR,(__location__ " Could not get tcp tickle list, '%s' is not a public address\n",
1894                         ctdb_addr_to_str(addr)));
1895
1896                 return 1;
1897         }
1898
1899         port = ctdb_addr_to_port(addr);
1900
1901         tcparray = vnn->tcp_array;
1902         num = 0;
1903         if (tcparray != NULL) {
1904                 if (port == 0) {
1905                         /* All connections */
1906                         num = tcparray->num;
1907                 } else {
1908                         /* Count connections for port */
1909                         for (i = 0; i < tcparray->num; i++) {
1910                                 if (port == ctdb_addr_to_port(&tcparray->connections[i].dst)) {
1911                                         num++;
1912                                 }
1913                         }
1914                 }
1915         }
1916
1917         outdata->dsize = offsetof(struct ctdb_tickle_list_old, connections)
1918                         + sizeof(struct ctdb_connection) * num;
1919
1920         outdata->dptr  = talloc_size(outdata, outdata->dsize);
1921         CTDB_NO_MEMORY(ctdb, outdata->dptr);
1922         list = (struct ctdb_tickle_list_old *)outdata->dptr;
1923
1924         list->addr = *addr;
1925         list->num = num;
1926
1927         if (num == 0) {
1928                 return 0;
1929         }
1930
1931         num = 0;
1932         for (i = 0; i < tcparray->num; i++) {
1933                 if (port == 0 || \
1934                     port == ctdb_addr_to_port(&tcparray->connections[i].dst)) {
1935                         list->connections[num] = tcparray->connections[i];
1936                         num++;
1937                 }
1938         }
1939
1940         return 0;
1941 }
1942
1943
1944 /*
1945   set the list of all tcp tickles for a public address
1946  */
1947 static int ctdb_send_set_tcp_tickles_for_ip(struct ctdb_context *ctdb,
1948                                             ctdb_sock_addr *addr,
1949                                             struct ctdb_tcp_array *tcparray)
1950 {
1951         int ret, num;
1952         TDB_DATA data;
1953         struct ctdb_tickle_list_old *list;
1954
1955         if (tcparray) {
1956                 num = tcparray->num;
1957         } else {
1958                 num = 0;
1959         }
1960
1961         data.dsize = offsetof(struct ctdb_tickle_list_old, connections) +
1962                         sizeof(struct ctdb_connection) * num;
1963         data.dptr = talloc_size(ctdb, data.dsize);
1964         CTDB_NO_MEMORY(ctdb, data.dptr);
1965
1966         list = (struct ctdb_tickle_list_old *)data.dptr;
1967         list->addr = *addr;
1968         list->num = num;
1969         if (tcparray) {
1970                 memcpy(&list->connections[0], tcparray->connections, sizeof(struct ctdb_connection) * num);
1971         }
1972
1973         ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_ALL, 0,
1974                                        CTDB_CONTROL_SET_TCP_TICKLE_LIST,
1975                                        0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
1976         if (ret != 0) {
1977                 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set tcp tickles failed\n"));
1978                 return -1;
1979         }
1980
1981         talloc_free(data.dptr);
1982
1983         return ret;
1984 }
1985
1986 static void ctdb_send_set_tcp_tickles_for_all(struct ctdb_context *ctdb,
1987                                               bool force)
1988 {
1989         struct ctdb_vnn *vnn;
1990         int ret;
1991
1992         for (vnn = ctdb->vnn; vnn != NULL; vnn = vnn->next) {
1993                 /* we only send out updates for public addresses that
1994                    we have taken over
1995                  */
1996                 if (ctdb->pnn != vnn->pnn) {
1997                         continue;
1998                 }
1999
2000                 /* We only send out the updates if we need to */
2001                 if (!force && !vnn->tcp_update_needed) {
2002                         continue;
2003                 }
2004
2005                 ret = ctdb_send_set_tcp_tickles_for_ip(ctdb,
2006                                                        &vnn->public_address,
2007                                                        vnn->tcp_array);
2008                 if (ret != 0) {
2009                         D_ERR("Failed to send the tickle update for ip %s\n",
2010                               ctdb_addr_to_str(&vnn->public_address));
2011                         vnn->tcp_update_needed = true;
2012                 } else {
2013                         D_INFO("Sent tickle update for ip %s\n",
2014                                ctdb_addr_to_str(&vnn->public_address));
2015                         vnn->tcp_update_needed = false;
2016                 }
2017         }
2018
2019 }
2020
2021 /*
2022   perform tickle updates if required
2023  */
2024 static void ctdb_update_tcp_tickles(struct tevent_context *ev,
2025                                     struct tevent_timer *te,
2026                                     struct timeval t, void *private_data)
2027 {
2028         struct ctdb_context *ctdb = talloc_get_type(
2029                 private_data, struct ctdb_context);
2030
2031         ctdb_send_set_tcp_tickles_for_all(ctdb, false);
2032
2033         tevent_add_timer(ctdb->ev, ctdb->tickle_update_context,
2034                          timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
2035                          ctdb_update_tcp_tickles, ctdb);
2036 }
2037
2038 /*
2039   start periodic update of tcp tickles
2040  */
2041 void ctdb_start_tcp_tickle_update(struct ctdb_context *ctdb)
2042 {
2043         ctdb->tickle_update_context = talloc_new(ctdb);
2044
2045         tevent_add_timer(ctdb->ev, ctdb->tickle_update_context,
2046                          timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
2047                          ctdb_update_tcp_tickles, ctdb);
2048 }
2049
2050
2051
2052
2053 struct control_gratious_arp {
2054         struct ctdb_context *ctdb;
2055         ctdb_sock_addr addr;
2056         const char *iface;
2057         int count;
2058 };
2059
2060 /*
2061   send a control_gratuitous arp
2062  */
2063 static void send_gratious_arp(struct tevent_context *ev,
2064                               struct tevent_timer *te,
2065                               struct timeval t, void *private_data)
2066 {
2067         int ret;
2068         struct control_gratious_arp *arp = talloc_get_type(private_data, 
2069                                                         struct control_gratious_arp);
2070
2071         ret = ctdb_sys_send_arp(&arp->addr, arp->iface);
2072         if (ret != 0) {
2073                 DEBUG(DEBUG_ERR,(__location__ " sending of gratious arp on iface '%s' failed (%s)\n",
2074                                  arp->iface, strerror(errno)));
2075         }
2076
2077
2078         arp->count++;
2079         if (arp->count == CTDB_ARP_REPEAT) {
2080                 talloc_free(arp);
2081                 return;
2082         }
2083
2084         tevent_add_timer(arp->ctdb->ev, arp,
2085                          timeval_current_ofs(CTDB_ARP_INTERVAL, 0),
2086                          send_gratious_arp, arp);
2087 }
2088
2089
2090 /*
2091   send a gratious arp 
2092  */
2093 int32_t ctdb_control_send_gratious_arp(struct ctdb_context *ctdb, TDB_DATA indata)
2094 {
2095         struct ctdb_addr_info_old *gratious_arp = (struct ctdb_addr_info_old *)indata.dptr;
2096         struct control_gratious_arp *arp;
2097
2098         /* verify the size of indata */
2099         if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
2100                 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_gratious_arp structure. Got %u require %u bytes\n", 
2101                                  (unsigned)indata.dsize, 
2102                                  (unsigned)offsetof(struct ctdb_addr_info_old, iface)));
2103                 return -1;
2104         }
2105         if (indata.dsize != 
2106                 ( offsetof(struct ctdb_addr_info_old, iface)
2107                 + gratious_arp->len ) ){
2108
2109                 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2110                         "but should be %u bytes\n", 
2111                          (unsigned)indata.dsize, 
2112                          (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+gratious_arp->len)));
2113                 return -1;
2114         }
2115
2116
2117         arp = talloc(ctdb, struct control_gratious_arp);
2118         CTDB_NO_MEMORY(ctdb, arp);
2119
2120         arp->ctdb  = ctdb;
2121         arp->addr   = gratious_arp->addr;
2122         arp->iface = talloc_strdup(arp, gratious_arp->iface);
2123         CTDB_NO_MEMORY(ctdb, arp->iface);
2124         arp->count = 0;
2125
2126         tevent_add_timer(arp->ctdb->ev, arp,
2127                          timeval_zero(), send_gratious_arp, arp);
2128
2129         return 0;
2130 }
2131
2132 int32_t ctdb_control_add_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2133 {
2134         struct ctdb_addr_info_old *pub = (struct ctdb_addr_info_old *)indata.dptr;
2135         int ret;
2136
2137         /* verify the size of indata */
2138         if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
2139                 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_addr_info structure\n"));
2140                 return -1;
2141         }
2142         if (indata.dsize != 
2143                 ( offsetof(struct ctdb_addr_info_old, iface)
2144                 + pub->len ) ){
2145
2146                 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2147                         "but should be %u bytes\n", 
2148                          (unsigned)indata.dsize, 
2149                          (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+pub->len)));
2150                 return -1;
2151         }
2152
2153         DEBUG(DEBUG_NOTICE,("Add IP %s\n", ctdb_addr_to_str(&pub->addr)));
2154
2155         ret = ctdb_add_public_address(ctdb, &pub->addr, pub->mask, &pub->iface[0], true);
2156
2157         if (ret != 0) {
2158                 DEBUG(DEBUG_ERR,(__location__ " Failed to add public address\n"));
2159                 return -1;
2160         }
2161
2162         return 0;
2163 }
2164
2165 int32_t ctdb_control_del_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2166 {
2167         struct ctdb_addr_info_old *pub = (struct ctdb_addr_info_old *)indata.dptr;
2168         struct ctdb_vnn *vnn;
2169
2170         /* verify the size of indata */
2171         if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
2172                 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_addr_info structure\n"));
2173                 return -1;
2174         }
2175         if (indata.dsize != 
2176                 ( offsetof(struct ctdb_addr_info_old, iface)
2177                 + pub->len ) ){
2178
2179                 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2180                         "but should be %u bytes\n", 
2181                          (unsigned)indata.dsize, 
2182                          (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+pub->len)));
2183                 return -1;
2184         }
2185
2186         DEBUG(DEBUG_NOTICE,("Delete IP %s\n", ctdb_addr_to_str(&pub->addr)));
2187
2188         /* walk over all public addresses until we find a match */
2189         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2190                 if (ctdb_same_ip(&vnn->public_address, &pub->addr)) {
2191                         if (vnn->pnn == ctdb->pnn) {
2192                                 /* This IP is currently being hosted.
2193                                  * Defer the deletion until the next
2194                                  * takeover run. "ctdb reloadips" will
2195                                  * always cause a takeover run.  "ctdb
2196                                  * delip" will now need an explicit
2197                                  * "ctdb ipreallocated" afterwards. */
2198                                 vnn->delete_pending = true;
2199                         } else {
2200                                 /* This IP is not hosted on the
2201                                  * current node so just delete it
2202                                  * now. */
2203                                 do_delete_ip(ctdb, vnn);
2204                         }
2205
2206                         return 0;
2207                 }
2208         }
2209
2210         DEBUG(DEBUG_ERR,("Delete IP of unknown public IP address %s\n",
2211                          ctdb_addr_to_str(&pub->addr)));
2212         return -1;
2213 }
2214
2215
2216 struct ipreallocated_callback_state {
2217         struct ctdb_req_control_old *c;
2218 };
2219
2220 static void ctdb_ipreallocated_callback(struct ctdb_context *ctdb,
2221                                         int status, void *p)
2222 {
2223         struct ipreallocated_callback_state *state =
2224                 talloc_get_type(p, struct ipreallocated_callback_state);
2225
2226         if (status != 0) {
2227                 DEBUG(DEBUG_ERR,
2228                       (" \"ipreallocated\" event script failed (status %d)\n",
2229                        status));
2230                 if (status == -ETIME) {
2231                         ctdb_ban_self(ctdb);
2232                 }
2233         }
2234
2235         ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
2236         talloc_free(state);
2237 }
2238
2239 /* A control to run the ipreallocated event */
2240 int32_t ctdb_control_ipreallocated(struct ctdb_context *ctdb,
2241                                    struct ctdb_req_control_old *c,
2242                                    bool *async_reply)
2243 {
2244         int ret;
2245         struct ipreallocated_callback_state *state;
2246
2247         state = talloc(ctdb, struct ipreallocated_callback_state);
2248         CTDB_NO_MEMORY(ctdb, state);
2249
2250         DEBUG(DEBUG_INFO,(__location__ " Running \"ipreallocated\" event\n"));
2251
2252         ret = ctdb_event_script_callback(ctdb, state,
2253                                          ctdb_ipreallocated_callback, state,
2254                                          CTDB_EVENT_IPREALLOCATED,
2255                                          "%s", "");
2256
2257         if (ret != 0) {
2258                 DEBUG(DEBUG_ERR,("Failed to run \"ipreallocated\" event \n"));
2259                 talloc_free(state);
2260                 return -1;
2261         }
2262
2263         /* tell the control that we will be reply asynchronously */
2264         state->c    = talloc_steal(state, c);
2265         *async_reply = true;
2266
2267         return 0;
2268 }
2269
2270
2271 struct ctdb_reloadips_handle {
2272         struct ctdb_context *ctdb;
2273         struct ctdb_req_control_old *c;
2274         int status;
2275         int fd[2];
2276         pid_t child;
2277         struct tevent_fd *fde;
2278 };
2279
2280 static int ctdb_reloadips_destructor(struct ctdb_reloadips_handle *h)
2281 {
2282         if (h == h->ctdb->reload_ips) {
2283                 h->ctdb->reload_ips = NULL;
2284         }
2285         if (h->c != NULL) {
2286                 ctdb_request_control_reply(h->ctdb, h->c, NULL, h->status, NULL);
2287                 h->c = NULL;
2288         }
2289         ctdb_kill(h->ctdb, h->child, SIGKILL);
2290         return 0;
2291 }
2292
2293 static void ctdb_reloadips_timeout_event(struct tevent_context *ev,
2294                                          struct tevent_timer *te,
2295                                          struct timeval t, void *private_data)
2296 {
2297         struct ctdb_reloadips_handle *h = talloc_get_type(private_data, struct ctdb_reloadips_handle);
2298
2299         talloc_free(h);
2300 }
2301
2302 static void ctdb_reloadips_child_handler(struct tevent_context *ev,
2303                                          struct tevent_fd *fde,
2304                                          uint16_t flags, void *private_data)
2305 {
2306         struct ctdb_reloadips_handle *h = talloc_get_type(private_data, struct ctdb_reloadips_handle);
2307
2308         char res;
2309         int ret;
2310
2311         ret = sys_read(h->fd[0], &res, 1);
2312         if (ret < 1 || res != 0) {
2313                 DEBUG(DEBUG_ERR, (__location__ " Reloadips child process returned error\n"));
2314                 res = 1;
2315         }
2316         h->status = res;
2317
2318         talloc_free(h);
2319 }
2320
2321 static int ctdb_reloadips_child(struct ctdb_context *ctdb)
2322 {
2323         TALLOC_CTX *mem_ctx = talloc_new(NULL);
2324         struct ctdb_public_ip_list_old *ips;
2325         struct ctdb_vnn *vnn;
2326         struct client_async_data *async_data;
2327         struct timeval timeout;
2328         TDB_DATA data;
2329         struct ctdb_client_control_state *state;
2330         bool first_add;
2331         int i, ret;
2332
2333         CTDB_NO_MEMORY(ctdb, mem_ctx);
2334
2335         /* Read IPs from local node */
2336         ret = ctdb_ctrl_get_public_ips(ctdb, TAKEOVER_TIMEOUT(),
2337                                        CTDB_CURRENT_NODE, mem_ctx, &ips);
2338         if (ret != 0) {
2339                 DEBUG(DEBUG_ERR,
2340                       ("Unable to fetch public IPs from local node\n"));
2341                 talloc_free(mem_ctx);
2342                 return -1;
2343         }
2344
2345         /* Read IPs file - this is safe since this is a child process */
2346         ctdb->vnn = NULL;
2347         if (ctdb_set_public_addresses(ctdb, false) != 0) {
2348                 DEBUG(DEBUG_ERR,("Failed to re-read public addresses file\n"));
2349                 talloc_free(mem_ctx);
2350                 return -1;
2351         }
2352
2353         async_data = talloc_zero(mem_ctx, struct client_async_data);
2354         CTDB_NO_MEMORY(ctdb, async_data);
2355
2356         /* Compare IPs between node and file for IPs to be deleted */
2357         for (i = 0; i < ips->num; i++) {
2358                 /* */
2359                 for (vnn = ctdb->vnn; vnn; vnn = vnn->next) {
2360                         if (ctdb_same_ip(&vnn->public_address,
2361                                          &ips->ips[i].addr)) {
2362                                 /* IP is still in file */
2363                                 break;
2364                         }
2365                 }
2366
2367                 if (vnn == NULL) {
2368                         /* Delete IP ips->ips[i] */
2369                         struct ctdb_addr_info_old *pub;
2370
2371                         DEBUG(DEBUG_NOTICE,
2372                               ("IP %s no longer configured, deleting it\n",
2373                                ctdb_addr_to_str(&ips->ips[i].addr)));
2374
2375                         pub = talloc_zero(mem_ctx, struct ctdb_addr_info_old);
2376                         CTDB_NO_MEMORY(ctdb, pub);
2377
2378                         pub->addr  = ips->ips[i].addr;
2379                         pub->mask  = 0;
2380                         pub->len   = 0;
2381
2382                         timeout = TAKEOVER_TIMEOUT();
2383
2384                         data.dsize = offsetof(struct ctdb_addr_info_old,
2385                                               iface) + pub->len;
2386                         data.dptr = (uint8_t *)pub;
2387
2388                         state = ctdb_control_send(ctdb, CTDB_CURRENT_NODE, 0,
2389                                                   CTDB_CONTROL_DEL_PUBLIC_IP,
2390                                                   0, data, async_data,
2391                                                   &timeout, NULL);
2392                         if (state == NULL) {
2393                                 DEBUG(DEBUG_ERR,
2394                                       (__location__
2395                                        " failed sending CTDB_CONTROL_DEL_PUBLIC_IP\n"));
2396                                 goto failed;
2397                         }
2398
2399                         ctdb_client_async_add(async_data, state);
2400                 }
2401         }
2402
2403         /* Compare IPs between node and file for IPs to be added */
2404         first_add = true;
2405         for (vnn = ctdb->vnn; vnn; vnn = vnn->next) {
2406                 for (i = 0; i < ips->num; i++) {
2407                         if (ctdb_same_ip(&vnn->public_address,
2408                                          &ips->ips[i].addr)) {
2409                                 /* IP already on node */
2410                                 break;
2411                         }
2412                 }
2413                 if (i == ips->num) {
2414                         /* Add IP ips->ips[i] */
2415                         struct ctdb_addr_info_old *pub;
2416                         const char *ifaces = NULL;
2417                         uint32_t len;
2418                         struct vnn_interface *iface = NULL;
2419
2420                         DEBUG(DEBUG_NOTICE,
2421                               ("New IP %s configured, adding it\n",
2422                                ctdb_addr_to_str(&vnn->public_address)));
2423                         if (first_add) {
2424                                 uint32_t pnn = ctdb_get_pnn(ctdb);
2425
2426                                 data.dsize = sizeof(pnn);
2427                                 data.dptr  = (uint8_t *)&pnn;
2428
2429                                 ret = ctdb_client_send_message(
2430                                         ctdb,
2431                                         CTDB_BROADCAST_CONNECTED,
2432                                         CTDB_SRVID_REBALANCE_NODE,
2433                                         data);
2434                                 if (ret != 0) {
2435                                         DEBUG(DEBUG_WARNING,
2436                                               ("Failed to send message to force node reallocation - IPs may be unbalanced\n"));
2437                                 }
2438
2439                                 first_add = false;
2440                         }
2441
2442                         ifaces = vnn->ifaces->iface->name;
2443                         iface = vnn->ifaces->next;
2444                         while (iface != NULL) {
2445                                 ifaces = talloc_asprintf(vnn, "%s,%s", ifaces,
2446                                                          iface->iface->name);
2447                                 iface = iface->next;
2448                         }
2449
2450                         len   = strlen(ifaces) + 1;
2451                         pub = talloc_zero_size(mem_ctx,
2452                                                offsetof(struct ctdb_addr_info_old, iface) + len);
2453                         CTDB_NO_MEMORY(ctdb, pub);
2454
2455                         pub->addr  = vnn->public_address;
2456                         pub->mask  = vnn->public_netmask_bits;
2457                         pub->len   = len;
2458                         memcpy(&pub->iface[0], ifaces, pub->len);
2459
2460                         timeout = TAKEOVER_TIMEOUT();
2461
2462                         data.dsize = offsetof(struct ctdb_addr_info_old,
2463                                               iface) + pub->len;
2464                         data.dptr = (uint8_t *)pub;
2465
2466                         state = ctdb_control_send(ctdb, CTDB_CURRENT_NODE, 0,
2467                                                   CTDB_CONTROL_ADD_PUBLIC_IP,
2468                                                   0, data, async_data,
2469                                                   &timeout, NULL);
2470                         if (state == NULL) {
2471                                 DEBUG(DEBUG_ERR,
2472                                       (__location__
2473                                        " failed sending CTDB_CONTROL_ADD_PUBLIC_IP\n"));
2474                                 goto failed;
2475                         }
2476
2477                         ctdb_client_async_add(async_data, state);
2478                 }
2479         }
2480
2481         if (ctdb_client_async_wait(ctdb, async_data) != 0) {
2482                 DEBUG(DEBUG_ERR,(__location__ " Add/delete IPs failed\n"));
2483                 goto failed;
2484         }
2485
2486         talloc_free(mem_ctx);
2487         return 0;
2488
2489 failed:
2490         talloc_free(mem_ctx);
2491         return -1;
2492 }
2493
2494 /* This control is sent to force the node to re-read the public addresses file
2495    and drop any addresses we should nnot longer host, and add new addresses
2496    that we are now able to host
2497 */
2498 int32_t ctdb_control_reload_public_ips(struct ctdb_context *ctdb, struct ctdb_req_control_old *c, bool *async_reply)
2499 {
2500         struct ctdb_reloadips_handle *h;
2501         pid_t parent = getpid();
2502
2503         if (ctdb->reload_ips != NULL) {
2504                 talloc_free(ctdb->reload_ips);
2505                 ctdb->reload_ips = NULL;
2506         }
2507
2508         h = talloc(ctdb, struct ctdb_reloadips_handle);
2509         CTDB_NO_MEMORY(ctdb, h);
2510         h->ctdb     = ctdb;
2511         h->c        = NULL;
2512         h->status   = -1;
2513         
2514         if (pipe(h->fd) == -1) {
2515                 DEBUG(DEBUG_ERR,("Failed to create pipe for ctdb_freeze_lock\n"));
2516                 talloc_free(h);
2517                 return -1;
2518         }
2519
2520         h->child = ctdb_fork(ctdb);
2521         if (h->child == (pid_t)-1) {
2522                 DEBUG(DEBUG_ERR, ("Failed to fork a child for reloadips\n"));
2523                 close(h->fd[0]);
2524                 close(h->fd[1]);
2525                 talloc_free(h);
2526                 return -1;
2527         }
2528
2529         /* child process */
2530         if (h->child == 0) {
2531                 signed char res = 0;
2532
2533                 close(h->fd[0]);
2534
2535                 prctl_set_comment("ctdb_reloadips");
2536                 if (switch_from_server_to_client(ctdb) != 0) {
2537                         DEBUG(DEBUG_CRIT,("ERROR: Failed to switch reloadips child into client mode\n"));
2538                         res = -1;
2539                 } else {
2540                         res = ctdb_reloadips_child(ctdb);
2541                         if (res != 0) {
2542                                 DEBUG(DEBUG_ERR,("Failed to reload ips on local node\n"));
2543                         }
2544                 }
2545
2546                 sys_write(h->fd[1], &res, 1);
2547                 ctdb_wait_for_process_to_exit(parent);
2548                 _exit(0);
2549         }
2550
2551         h->c             = talloc_steal(h, c);
2552
2553         close(h->fd[1]);
2554         set_close_on_exec(h->fd[0]);
2555
2556         talloc_set_destructor(h, ctdb_reloadips_destructor);
2557
2558
2559         h->fde = tevent_add_fd(ctdb->ev, h, h->fd[0], TEVENT_FD_READ,
2560                                ctdb_reloadips_child_handler, (void *)h);
2561         tevent_fd_set_auto_close(h->fde);
2562
2563         tevent_add_timer(ctdb->ev, h, timeval_current_ofs(120, 0),
2564                          ctdb_reloadips_timeout_event, h);
2565
2566         /* we reply later */
2567         *async_reply = true;
2568         return 0;
2569 }