ctdb-daemon: Use release_ip_post() when releasing all IP addresses
[samba.git] / ctdb / server / ctdb_takeover.c
1 /* 
2    ctdb ip takeover code
3
4    Copyright (C) Ronnie Sahlberg  2007
5    Copyright (C) Andrew Tridgell  2007
6    Copyright (C) Martin Schwenke  2011
7
8    This program is free software; you can redistribute it and/or modify
9    it under the terms of the GNU General Public License as published by
10    the Free Software Foundation; either version 3 of the License, or
11    (at your option) any later version.
12    
13    This program is distributed in the hope that it will be useful,
14    but WITHOUT ANY WARRANTY; without even the implied warranty of
15    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16    GNU General Public License for more details.
17    
18    You should have received a copy of the GNU General Public License
19    along with this program; if not, see <http://www.gnu.org/licenses/>.
20 */
21 #include "replace.h"
22 #include "system/network.h"
23 #include "system/filesys.h"
24 #include "system/time.h"
25 #include "system/wait.h"
26
27 #include <talloc.h>
28 #include <tevent.h>
29
30 #include "lib/util/dlinklist.h"
31 #include "lib/util/debug.h"
32 #include "lib/util/samba_util.h"
33 #include "lib/util/util_process.h"
34
35 #include "ctdb_private.h"
36 #include "ctdb_client.h"
37
38 #include "common/rb_tree.h"
39 #include "common/reqid.h"
40 #include "common/system.h"
41 #include "common/common.h"
42 #include "common/logging.h"
43
44 #include "server/ipalloc.h"
45
46 #define TAKEOVER_TIMEOUT() timeval_current_ofs(ctdb->tunable.takeover_timeout,0)
47
48 #define CTDB_ARP_INTERVAL 1
49 #define CTDB_ARP_REPEAT   3
50
51 struct ctdb_interface {
52         struct ctdb_interface *prev, *next;
53         const char *name;
54         bool link_up;
55         uint32_t references;
56 };
57
58 /* state associated with a public ip address */
59 struct ctdb_vnn {
60         struct ctdb_vnn *prev, *next;
61
62         struct ctdb_interface *iface;
63         const char **ifaces;
64         ctdb_sock_addr public_address;
65         uint8_t public_netmask_bits;
66
67         /* the node number that is serving this public address, if any.
68            If no node serves this ip it is set to -1 */
69         int32_t pnn;
70
71         /* List of clients to tickle for this public address */
72         struct ctdb_tcp_array *tcp_array;
73
74         /* whether we need to update the other nodes with changes to our list
75            of connected clients */
76         bool tcp_update_needed;
77
78         /* a context to hang sending gratious arp events off */
79         TALLOC_CTX *takeover_ctx;
80
81         /* Set to true any time an update to this VNN is in flight.
82            This helps to avoid races. */
83         bool update_in_flight;
84
85         /* If CTDB_CONTROL_DEL_PUBLIC_IP is received for this IP
86          * address then this flag is set.  It will be deleted in the
87          * release IP callback. */
88         bool delete_pending;
89 };
90
91 static const char *ctdb_vnn_iface_string(const struct ctdb_vnn *vnn)
92 {
93         if (vnn->iface) {
94                 return vnn->iface->name;
95         }
96
97         return "__none__";
98 }
99
100 static int ctdb_add_local_iface(struct ctdb_context *ctdb, const char *iface)
101 {
102         struct ctdb_interface *i;
103
104         if (strlen(iface) > CTDB_IFACE_SIZE) {
105                 DEBUG(DEBUG_ERR, ("Interface name too long \"%s\"\n", iface));
106                 return -1;
107         }
108
109         /* Verify that we don't have an entry for this ip yet */
110         for (i=ctdb->ifaces;i;i=i->next) {
111                 if (strcmp(i->name, iface) == 0) {
112                         return 0;
113                 }
114         }
115
116         /* create a new structure for this interface */
117         i = talloc_zero(ctdb, struct ctdb_interface);
118         CTDB_NO_MEMORY_FATAL(ctdb, i);
119         i->name = talloc_strdup(i, iface);
120         CTDB_NO_MEMORY(ctdb, i->name);
121
122         i->link_up = true;
123
124         DLIST_ADD(ctdb->ifaces, i);
125
126         return 0;
127 }
128
129 static bool vnn_has_interface_with_name(struct ctdb_vnn *vnn,
130                                         const char *name)
131 {
132         int n;
133
134         for (n = 0; vnn->ifaces[n] != NULL; n++) {
135                 if (strcmp(name, vnn->ifaces[n]) == 0) {
136                         return true;
137                 }
138         }
139
140         return false;
141 }
142
143 /* If any interfaces now have no possible IPs then delete them.  This
144  * implementation is naive (i.e. simple) rather than clever
145  * (i.e. complex).  Given that this is run on delip and that operation
146  * is rare, this doesn't need to be efficient - it needs to be
147  * foolproof.  One alternative is reference counting, where the logic
148  * is distributed and can, therefore, be broken in multiple places.
149  * Another alternative is to build a red-black tree of interfaces that
150  * can have addresses (by walking ctdb->vnn once) and then walking
151  * ctdb->ifaces once and deleting those not in the tree.  Let's go to
152  * one of those if the naive implementation causes problems...  :-)
153  */
154 static void ctdb_remove_orphaned_ifaces(struct ctdb_context *ctdb,
155                                         struct ctdb_vnn *vnn)
156 {
157         struct ctdb_interface *i, *next;
158
159         /* For each interface, check if there's an IP using it. */
160         for (i = ctdb->ifaces; i != NULL; i = next) {
161                 struct ctdb_vnn *tv;
162                 bool found;
163                 next = i->next;
164
165                 /* Only consider interfaces named in the given VNN. */
166                 if (!vnn_has_interface_with_name(vnn, i->name)) {
167                         continue;
168                 }
169
170                 /* Search for a vnn with this interface. */
171                 found = false;
172                 for (tv=ctdb->vnn; tv; tv=tv->next) {
173                         if (vnn_has_interface_with_name(tv, i->name)) {
174                                 found = true;
175                                 break;
176                         }
177                 }
178
179                 if (!found) {
180                         /* None of the VNNs are using this interface. */
181                         DLIST_REMOVE(ctdb->ifaces, i);
182                         talloc_free(i);
183                 }
184         }
185 }
186
187
188 static struct ctdb_interface *ctdb_find_iface(struct ctdb_context *ctdb,
189                                               const char *iface)
190 {
191         struct ctdb_interface *i;
192
193         for (i=ctdb->ifaces;i;i=i->next) {
194                 if (strcmp(i->name, iface) == 0) {
195                         return i;
196                 }
197         }
198
199         return NULL;
200 }
201
202 static struct ctdb_interface *ctdb_vnn_best_iface(struct ctdb_context *ctdb,
203                                                   struct ctdb_vnn *vnn)
204 {
205         int i;
206         struct ctdb_interface *cur = NULL;
207         struct ctdb_interface *best = NULL;
208
209         for (i=0; vnn->ifaces[i]; i++) {
210
211                 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
212                 if (cur == NULL) {
213                         continue;
214                 }
215
216                 if (!cur->link_up) {
217                         continue;
218                 }
219
220                 if (best == NULL) {
221                         best = cur;
222                         continue;
223                 }
224
225                 if (cur->references < best->references) {
226                         best = cur;
227                         continue;
228                 }
229         }
230
231         return best;
232 }
233
234 static int32_t ctdb_vnn_assign_iface(struct ctdb_context *ctdb,
235                                      struct ctdb_vnn *vnn)
236 {
237         struct ctdb_interface *best = NULL;
238
239         if (vnn->iface) {
240                 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
241                                    "still assigned to iface '%s'\n",
242                                    ctdb_addr_to_str(&vnn->public_address),
243                                    ctdb_vnn_iface_string(vnn)));
244                 return 0;
245         }
246
247         best = ctdb_vnn_best_iface(ctdb, vnn);
248         if (best == NULL) {
249                 DEBUG(DEBUG_ERR, (__location__ " public address '%s' "
250                                   "cannot assign to iface any iface\n",
251                                   ctdb_addr_to_str(&vnn->public_address)));
252                 return -1;
253         }
254
255         vnn->iface = best;
256         best->references++;
257         vnn->pnn = ctdb->pnn;
258
259         DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
260                            "now assigned to iface '%s' refs[%d]\n",
261                            ctdb_addr_to_str(&vnn->public_address),
262                            ctdb_vnn_iface_string(vnn),
263                            best->references));
264         return 0;
265 }
266
267 static void ctdb_vnn_unassign_iface(struct ctdb_context *ctdb,
268                                     struct ctdb_vnn *vnn)
269 {
270         DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
271                            "now unassigned (old iface '%s' refs[%d])\n",
272                            ctdb_addr_to_str(&vnn->public_address),
273                            ctdb_vnn_iface_string(vnn),
274                            vnn->iface?vnn->iface->references:0));
275         if (vnn->iface) {
276                 vnn->iface->references--;
277         }
278         vnn->iface = NULL;
279         if (vnn->pnn == ctdb->pnn) {
280                 vnn->pnn = -1;
281         }
282 }
283
284 static bool ctdb_vnn_available(struct ctdb_context *ctdb,
285                                struct ctdb_vnn *vnn)
286 {
287         int i;
288
289         /* Nodes that are not RUNNING can not host IPs */
290         if (ctdb->runstate != CTDB_RUNSTATE_RUNNING) {
291                 return false;
292         }
293
294         if (vnn->delete_pending) {
295                 return false;
296         }
297
298         if (vnn->iface && vnn->iface->link_up) {
299                 return true;
300         }
301
302         for (i=0; vnn->ifaces[i]; i++) {
303                 struct ctdb_interface *cur;
304
305                 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
306                 if (cur == NULL) {
307                         continue;
308                 }
309
310                 if (cur->link_up) {
311                         return true;
312                 }
313         }
314
315         return false;
316 }
317
318 struct ctdb_takeover_arp {
319         struct ctdb_context *ctdb;
320         uint32_t count;
321         ctdb_sock_addr addr;
322         struct ctdb_tcp_array *tcparray;
323         struct ctdb_vnn *vnn;
324 };
325
326
327 /*
328   lists of tcp endpoints
329  */
330 struct ctdb_tcp_list {
331         struct ctdb_tcp_list *prev, *next;
332         struct ctdb_connection connection;
333 };
334
335 /*
336   list of clients to kill on IP release
337  */
338 struct ctdb_client_ip {
339         struct ctdb_client_ip *prev, *next;
340         struct ctdb_context *ctdb;
341         ctdb_sock_addr addr;
342         uint32_t client_id;
343 };
344
345
346 /*
347   send a gratuitous arp
348  */
349 static void ctdb_control_send_arp(struct tevent_context *ev,
350                                   struct tevent_timer *te,
351                                   struct timeval t, void *private_data)
352 {
353         struct ctdb_takeover_arp *arp = talloc_get_type(private_data, 
354                                                         struct ctdb_takeover_arp);
355         int i, ret;
356         struct ctdb_tcp_array *tcparray;
357         const char *iface = ctdb_vnn_iface_string(arp->vnn);
358
359         ret = ctdb_sys_send_arp(&arp->addr, iface);
360         if (ret != 0) {
361                 DEBUG(DEBUG_CRIT,(__location__ " sending of arp failed on iface '%s' (%s)\n",
362                                   iface, strerror(errno)));
363         }
364
365         tcparray = arp->tcparray;
366         if (tcparray) {
367                 for (i=0;i<tcparray->num;i++) {
368                         struct ctdb_connection *tcon;
369
370                         tcon = &tcparray->connections[i];
371                         DEBUG(DEBUG_INFO,("sending tcp tickle ack for %u->%s:%u\n",
372                                 (unsigned)ntohs(tcon->dst.ip.sin_port),
373                                 ctdb_addr_to_str(&tcon->src),
374                                 (unsigned)ntohs(tcon->src.ip.sin_port)));
375                         ret = ctdb_sys_send_tcp(
376                                 &tcon->src,
377                                 &tcon->dst,
378                                 0, 0, 0);
379                         if (ret != 0) {
380                                 DEBUG(DEBUG_CRIT,(__location__ " Failed to send tcp tickle ack for %s\n",
381                                         ctdb_addr_to_str(&tcon->src)));
382                         }
383                 }
384         }
385
386         arp->count++;
387
388         if (arp->count == CTDB_ARP_REPEAT) {
389                 talloc_free(arp);
390                 return;
391         }
392
393         tevent_add_timer(arp->ctdb->ev, arp->vnn->takeover_ctx,
394                          timeval_current_ofs(CTDB_ARP_INTERVAL, 100000),
395                          ctdb_control_send_arp, arp);
396 }
397
398 static int32_t ctdb_announce_vnn_iface(struct ctdb_context *ctdb,
399                                        struct ctdb_vnn *vnn)
400 {
401         struct ctdb_takeover_arp *arp;
402         struct ctdb_tcp_array *tcparray;
403
404         if (!vnn->takeover_ctx) {
405                 vnn->takeover_ctx = talloc_new(vnn);
406                 if (!vnn->takeover_ctx) {
407                         return -1;
408                 }
409         }
410
411         arp = talloc_zero(vnn->takeover_ctx, struct ctdb_takeover_arp);
412         if (!arp) {
413                 return -1;
414         }
415
416         arp->ctdb = ctdb;
417         arp->addr = vnn->public_address;
418         arp->vnn  = vnn;
419
420         tcparray = vnn->tcp_array;
421         if (tcparray) {
422                 /* add all of the known tcp connections for this IP to the
423                    list of tcp connections to send tickle acks for */
424                 arp->tcparray = talloc_steal(arp, tcparray);
425
426                 vnn->tcp_array = NULL;
427                 vnn->tcp_update_needed = true;
428         }
429
430         tevent_add_timer(arp->ctdb->ev, vnn->takeover_ctx,
431                          timeval_zero(), ctdb_control_send_arp, arp);
432
433         return 0;
434 }
435
436 struct takeover_callback_state {
437         struct ctdb_req_control_old *c;
438         ctdb_sock_addr *addr;
439         struct ctdb_vnn *vnn;
440 };
441
442 struct ctdb_do_takeip_state {
443         struct ctdb_req_control_old *c;
444         struct ctdb_vnn *vnn;
445 };
446
447 /*
448   called when takeip event finishes
449  */
450 static void ctdb_do_takeip_callback(struct ctdb_context *ctdb, int status,
451                                     void *private_data)
452 {
453         struct ctdb_do_takeip_state *state =
454                 talloc_get_type(private_data, struct ctdb_do_takeip_state);
455         int32_t ret;
456         TDB_DATA data;
457
458         if (status != 0) {
459                 if (status == -ETIME) {
460                         ctdb_ban_self(ctdb);
461                 }
462                 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
463                                  ctdb_addr_to_str(&state->vnn->public_address),
464                                  ctdb_vnn_iface_string(state->vnn)));
465                 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
466
467                 talloc_free(state);
468                 return;
469         }
470
471         if (ctdb->do_checkpublicip) {
472
473         ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
474         if (ret != 0) {
475                 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
476                 talloc_free(state);
477                 return;
478         }
479
480         }
481
482         data.dptr  = (uint8_t *)ctdb_addr_to_str(&state->vnn->public_address);
483         data.dsize = strlen((char *)data.dptr) + 1;
484         DEBUG(DEBUG_INFO,(__location__ " sending TAKE_IP for '%s'\n", data.dptr));
485
486         ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_TAKE_IP, data);
487
488
489         /* the control succeeded */
490         ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
491         talloc_free(state);
492         return;
493 }
494
495 static int ctdb_takeip_destructor(struct ctdb_do_takeip_state *state)
496 {
497         state->vnn->update_in_flight = false;
498         return 0;
499 }
500
501 /*
502   take over an ip address
503  */
504 static int32_t ctdb_do_takeip(struct ctdb_context *ctdb,
505                               struct ctdb_req_control_old *c,
506                               struct ctdb_vnn *vnn)
507 {
508         int ret;
509         struct ctdb_do_takeip_state *state;
510
511         if (vnn->update_in_flight) {
512                 DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u rejected "
513                                     "update for this IP already in flight\n",
514                                     ctdb_addr_to_str(&vnn->public_address),
515                                     vnn->public_netmask_bits));
516                 return -1;
517         }
518
519         ret = ctdb_vnn_assign_iface(ctdb, vnn);
520         if (ret != 0) {
521                 DEBUG(DEBUG_ERR,("Takeover of IP %s/%u failed to "
522                                  "assign a usable interface\n",
523                                  ctdb_addr_to_str(&vnn->public_address),
524                                  vnn->public_netmask_bits));
525                 return -1;
526         }
527
528         state = talloc(vnn, struct ctdb_do_takeip_state);
529         CTDB_NO_MEMORY(ctdb, state);
530
531         state->c = talloc_steal(ctdb, c);
532         state->vnn   = vnn;
533
534         vnn->update_in_flight = true;
535         talloc_set_destructor(state, ctdb_takeip_destructor);
536
537         DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u on interface %s\n",
538                             ctdb_addr_to_str(&vnn->public_address),
539                             vnn->public_netmask_bits,
540                             ctdb_vnn_iface_string(vnn)));
541
542         ret = ctdb_event_script_callback(ctdb,
543                                          state,
544                                          ctdb_do_takeip_callback,
545                                          state,
546                                          CTDB_EVENT_TAKE_IP,
547                                          "%s %s %u",
548                                          ctdb_vnn_iface_string(vnn),
549                                          ctdb_addr_to_str(&vnn->public_address),
550                                          vnn->public_netmask_bits);
551
552         if (ret != 0) {
553                 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
554                         ctdb_addr_to_str(&vnn->public_address),
555                         ctdb_vnn_iface_string(vnn)));
556                 talloc_free(state);
557                 return -1;
558         }
559
560         return 0;
561 }
562
563 struct ctdb_do_updateip_state {
564         struct ctdb_req_control_old *c;
565         struct ctdb_interface *old;
566         struct ctdb_vnn *vnn;
567 };
568
569 /*
570   called when updateip event finishes
571  */
572 static void ctdb_do_updateip_callback(struct ctdb_context *ctdb, int status,
573                                       void *private_data)
574 {
575         struct ctdb_do_updateip_state *state =
576                 talloc_get_type(private_data, struct ctdb_do_updateip_state);
577         int32_t ret;
578
579         if (status != 0) {
580                 if (status == -ETIME) {
581                         ctdb_ban_self(ctdb);
582                 }
583                 DEBUG(DEBUG_ERR,(__location__ " Failed to move IP %s from interface %s to %s\n",
584                         ctdb_addr_to_str(&state->vnn->public_address),
585                         state->old->name,
586                         ctdb_vnn_iface_string(state->vnn)));
587
588                 /*
589                  * All we can do is reset the old interface
590                  * and let the next run fix it
591                  */
592                 ctdb_vnn_unassign_iface(ctdb, state->vnn);
593                 state->vnn->iface = state->old;
594                 state->vnn->iface->references++;
595
596                 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
597                 talloc_free(state);
598                 return;
599         }
600
601         if (ctdb->do_checkpublicip) {
602
603         ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
604         if (ret != 0) {
605                 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
606                 talloc_free(state);
607                 return;
608         }
609
610         }
611
612         /* the control succeeded */
613         ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
614         talloc_free(state);
615         return;
616 }
617
618 static int ctdb_updateip_destructor(struct ctdb_do_updateip_state *state)
619 {
620         state->vnn->update_in_flight = false;
621         return 0;
622 }
623
624 /*
625   update (move) an ip address
626  */
627 static int32_t ctdb_do_updateip(struct ctdb_context *ctdb,
628                                 struct ctdb_req_control_old *c,
629                                 struct ctdb_vnn *vnn)
630 {
631         int ret;
632         struct ctdb_do_updateip_state *state;
633         struct ctdb_interface *old = vnn->iface;
634         const char *new_name;
635
636         if (vnn->update_in_flight) {
637                 DEBUG(DEBUG_NOTICE,("Update of IP %s/%u rejected "
638                                     "update for this IP already in flight\n",
639                                     ctdb_addr_to_str(&vnn->public_address),
640                                     vnn->public_netmask_bits));
641                 return -1;
642         }
643
644         ctdb_vnn_unassign_iface(ctdb, vnn);
645         ret = ctdb_vnn_assign_iface(ctdb, vnn);
646         if (ret != 0) {
647                 DEBUG(DEBUG_ERR,("update of IP %s/%u failed to "
648                                  "assin a usable interface (old iface '%s')\n",
649                                  ctdb_addr_to_str(&vnn->public_address),
650                                  vnn->public_netmask_bits,
651                                  old->name));
652                 return -1;
653         }
654
655         new_name = ctdb_vnn_iface_string(vnn);
656         if (old->name != NULL && new_name != NULL && !strcmp(old->name, new_name)) {
657                 /* A benign update from one interface onto itself.
658                  * no need to run the eventscripts in this case, just return
659                  * success.
660                  */
661                 ctdb_request_control_reply(ctdb, c, NULL, 0, NULL);
662                 return 0;
663         }
664
665         state = talloc(vnn, struct ctdb_do_updateip_state);
666         CTDB_NO_MEMORY(ctdb, state);
667
668         state->c = talloc_steal(ctdb, c);
669         state->old = old;
670         state->vnn = vnn;
671
672         vnn->update_in_flight = true;
673         talloc_set_destructor(state, ctdb_updateip_destructor);
674
675         DEBUG(DEBUG_NOTICE,("Update of IP %s/%u from "
676                             "interface %s to %s\n",
677                             ctdb_addr_to_str(&vnn->public_address),
678                             vnn->public_netmask_bits,
679                             old->name,
680                             new_name));
681
682         ret = ctdb_event_script_callback(ctdb,
683                                          state,
684                                          ctdb_do_updateip_callback,
685                                          state,
686                                          CTDB_EVENT_UPDATE_IP,
687                                          "%s %s %s %u",
688                                          state->old->name,
689                                          new_name,
690                                          ctdb_addr_to_str(&vnn->public_address),
691                                          vnn->public_netmask_bits);
692         if (ret != 0) {
693                 DEBUG(DEBUG_ERR,(__location__ " Failed update IP %s from interface %s to %s\n",
694                                  ctdb_addr_to_str(&vnn->public_address),
695                                  old->name, new_name));
696                 talloc_free(state);
697                 return -1;
698         }
699
700         return 0;
701 }
702
703 /*
704   Find the vnn of the node that has a public ip address
705   returns -1 if the address is not known as a public address
706  */
707 static struct ctdb_vnn *find_public_ip_vnn(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
708 {
709         struct ctdb_vnn *vnn;
710
711         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
712                 if (ctdb_same_ip(&vnn->public_address, addr)) {
713                         return vnn;
714                 }
715         }
716
717         return NULL;
718 }
719
720 /*
721   take over an ip address
722  */
723 int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb,
724                                  struct ctdb_req_control_old *c,
725                                  TDB_DATA indata,
726                                  bool *async_reply)
727 {
728         int ret;
729         struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
730         struct ctdb_vnn *vnn;
731         bool have_ip = false;
732         bool do_updateip = false;
733         bool do_takeip = false;
734         struct ctdb_interface *best_iface = NULL;
735
736         if (pip->pnn != ctdb->pnn) {
737                 DEBUG(DEBUG_ERR,(__location__" takeoverip called for an ip '%s' "
738                                  "with pnn %d, but we're node %d\n",
739                                  ctdb_addr_to_str(&pip->addr),
740                                  pip->pnn, ctdb->pnn));
741                 return -1;
742         }
743
744         /* update out vnn list */
745         vnn = find_public_ip_vnn(ctdb, &pip->addr);
746         if (vnn == NULL) {
747                 DEBUG(DEBUG_INFO,("takeoverip called for an ip '%s' that is not a public address\n",
748                         ctdb_addr_to_str(&pip->addr)));
749                 return 0;
750         }
751
752         if (ctdb->tunable.disable_ip_failover == 0 && ctdb->do_checkpublicip) {
753                 have_ip = ctdb_sys_have_ip(&pip->addr);
754         }
755         best_iface = ctdb_vnn_best_iface(ctdb, vnn);
756         if (best_iface == NULL) {
757                 DEBUG(DEBUG_ERR,("takeoverip of IP %s/%u failed to find"
758                                  "a usable interface (old %s, have_ip %d)\n",
759                                  ctdb_addr_to_str(&vnn->public_address),
760                                  vnn->public_netmask_bits,
761                                  ctdb_vnn_iface_string(vnn),
762                                  have_ip));
763                 return -1;
764         }
765
766         if (vnn->iface == NULL && vnn->pnn == -1 && have_ip && best_iface != NULL) {
767                 DEBUG(DEBUG_ERR,("Taking over newly created ip\n"));
768                 have_ip = false;
769         }
770
771
772         if (vnn->iface == NULL && have_ip) {
773                 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
774                                   "but we have no interface assigned, has someone manually configured it? Ignore for now.\n",
775                                  ctdb_addr_to_str(&vnn->public_address)));
776                 return 0;
777         }
778
779         if (vnn->pnn != ctdb->pnn && have_ip && vnn->pnn != -1) {
780                 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
781                                   "and we have it on iface[%s], but it was assigned to node %d"
782                                   "and we are node %d, banning ourself\n",
783                                  ctdb_addr_to_str(&vnn->public_address),
784                                  ctdb_vnn_iface_string(vnn), vnn->pnn, ctdb->pnn));
785                 ctdb_ban_self(ctdb);
786                 return -1;
787         }
788
789         if (vnn->pnn == -1 && have_ip) {
790                 vnn->pnn = ctdb->pnn;
791                 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
792                                   "and we already have it on iface[%s], update local daemon\n",
793                                  ctdb_addr_to_str(&vnn->public_address),
794                                   ctdb_vnn_iface_string(vnn)));
795                 return 0;
796         }
797
798         if (vnn->iface) {
799                 if (vnn->iface != best_iface) {
800                         if (!vnn->iface->link_up) {
801                                 do_updateip = true;
802                         } else if (vnn->iface->references > (best_iface->references + 1)) {
803                                 /* only move when the rebalance gains something */
804                                         do_updateip = true;
805                         }
806                 }
807         }
808
809         if (!have_ip) {
810                 if (do_updateip) {
811                         ctdb_vnn_unassign_iface(ctdb, vnn);
812                         do_updateip = false;
813                 }
814                 do_takeip = true;
815         }
816
817         if (do_takeip) {
818                 ret = ctdb_do_takeip(ctdb, c, vnn);
819                 if (ret != 0) {
820                         return -1;
821                 }
822         } else if (do_updateip) {
823                 ret = ctdb_do_updateip(ctdb, c, vnn);
824                 if (ret != 0) {
825                         return -1;
826                 }
827         } else {
828                 /*
829                  * The interface is up and the kernel known the ip
830                  * => do nothing
831                  */
832                 DEBUG(DEBUG_INFO,("Redundant takeover of IP %s/%u on interface %s (ip already held)\n",
833                         ctdb_addr_to_str(&pip->addr),
834                         vnn->public_netmask_bits,
835                         ctdb_vnn_iface_string(vnn)));
836                 return 0;
837         }
838
839         /* tell ctdb_control.c that we will be replying asynchronously */
840         *async_reply = true;
841
842         return 0;
843 }
844
845 static void do_delete_ip(struct ctdb_context *ctdb, struct ctdb_vnn *vnn)
846 {
847         DLIST_REMOVE(ctdb->vnn, vnn);
848         ctdb_vnn_unassign_iface(ctdb, vnn);
849         ctdb_remove_orphaned_ifaces(ctdb, vnn);
850         talloc_free(vnn);
851 }
852
853 static struct ctdb_vnn *release_ip_post(struct ctdb_context *ctdb,
854                                         struct ctdb_vnn *vnn,
855                                         ctdb_sock_addr *addr)
856 {
857         TDB_DATA data;
858
859         /* Send a message to all clients of this node telling them
860          * that the cluster has been reconfigured and they should
861          * close any connections on this IP address
862          */
863         data.dptr = (uint8_t *)ctdb_addr_to_str(addr);
864         data.dsize = strlen((char *)data.dptr)+1;
865         DEBUG(DEBUG_INFO, ("Sending RELEASE_IP message for %s\n", data.dptr));
866         ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_RELEASE_IP, data);
867
868         ctdb_vnn_unassign_iface(ctdb, vnn);
869
870         /* Process the IP if it has been marked for deletion */
871         if (vnn->delete_pending) {
872                 do_delete_ip(ctdb, vnn);
873                 return NULL;
874         }
875
876         return vnn;
877 }
878
879 /*
880   called when releaseip event finishes
881  */
882 static void release_ip_callback(struct ctdb_context *ctdb, int status, 
883                                 void *private_data)
884 {
885         struct takeover_callback_state *state = 
886                 talloc_get_type(private_data, struct takeover_callback_state);
887
888         if (status == -ETIME) {
889                 ctdb_ban_self(ctdb);
890         }
891
892         if (ctdb->tunable.disable_ip_failover == 0 && ctdb->do_checkpublicip) {
893                 if  (ctdb_sys_have_ip(state->addr)) {
894                         DEBUG(DEBUG_ERR,
895                               ("IP %s still hosted during release IP callback, failing\n",
896                                ctdb_addr_to_str(state->addr)));
897                         ctdb_request_control_reply(ctdb, state->c,
898                                                    NULL, -1, NULL);
899                         talloc_free(state);
900                         return;
901                 }
902         }
903
904         state->vnn = release_ip_post(ctdb, state->vnn, state->addr);
905
906         /* the control succeeded */
907         ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
908         talloc_free(state);
909 }
910
911 static int ctdb_releaseip_destructor(struct takeover_callback_state *state)
912 {
913         if (state->vnn != NULL) {
914                 state->vnn->update_in_flight = false;
915         }
916         return 0;
917 }
918
919 /*
920   release an ip address
921  */
922 int32_t ctdb_control_release_ip(struct ctdb_context *ctdb, 
923                                 struct ctdb_req_control_old *c,
924                                 TDB_DATA indata, 
925                                 bool *async_reply)
926 {
927         int ret;
928         struct takeover_callback_state *state;
929         struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
930         struct ctdb_vnn *vnn;
931         char *iface;
932
933         /* update our vnn list */
934         vnn = find_public_ip_vnn(ctdb, &pip->addr);
935         if (vnn == NULL) {
936                 DEBUG(DEBUG_INFO,("releaseip called for an ip '%s' that is not a public address\n",
937                         ctdb_addr_to_str(&pip->addr)));
938                 return 0;
939         }
940         vnn->pnn = pip->pnn;
941
942         /* stop any previous arps */
943         talloc_free(vnn->takeover_ctx);
944         vnn->takeover_ctx = NULL;
945
946         /* Some ctdb tool commands (e.g. moveip) send
947          * lazy multicast to drop an IP from any node that isn't the
948          * intended new node.  The following causes makes ctdbd ignore
949          * a release for any address it doesn't host.
950          */
951         if (ctdb->tunable.disable_ip_failover == 0 && ctdb->do_checkpublicip) {
952                 if (!ctdb_sys_have_ip(&pip->addr)) {
953                         DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u on interface %s (ip not held)\n",
954                                 ctdb_addr_to_str(&pip->addr),
955                                 vnn->public_netmask_bits,
956                                 ctdb_vnn_iface_string(vnn)));
957                         ctdb_vnn_unassign_iface(ctdb, vnn);
958                         return 0;
959                 }
960         } else {
961                 if (vnn->iface == NULL) {
962                         DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u (ip not held)\n",
963                                            ctdb_addr_to_str(&pip->addr),
964                                            vnn->public_netmask_bits));
965                         return 0;
966                 }
967         }
968
969         /* There is a potential race between take_ip and us because we
970          * update the VNN via a callback that run when the
971          * eventscripts have been run.  Avoid the race by allowing one
972          * update to be in flight at a time.
973          */
974         if (vnn->update_in_flight) {
975                 DEBUG(DEBUG_NOTICE,("Release of IP %s/%u rejected "
976                                     "update for this IP already in flight\n",
977                                     ctdb_addr_to_str(&vnn->public_address),
978                                     vnn->public_netmask_bits));
979                 return -1;
980         }
981
982         iface = strdup(ctdb_vnn_iface_string(vnn));
983
984         DEBUG(DEBUG_NOTICE,("Release of IP %s/%u on interface %s  node:%d\n",
985                 ctdb_addr_to_str(&pip->addr),
986                 vnn->public_netmask_bits,
987                 iface,
988                 pip->pnn));
989
990         state = talloc(ctdb, struct takeover_callback_state);
991         if (state == NULL) {
992                 ctdb_set_error(ctdb, "Out of memory at %s:%d",
993                                __FILE__, __LINE__);
994                 free(iface);
995                 return -1;
996         }
997
998         state->c = talloc_steal(state, c);
999         state->addr = talloc(state, ctdb_sock_addr);       
1000         if (state->addr == NULL) {
1001                 ctdb_set_error(ctdb, "Out of memory at %s:%d",
1002                                __FILE__, __LINE__);
1003                 free(iface);
1004                 talloc_free(state);
1005                 return -1;
1006         }
1007         *state->addr = pip->addr;
1008         state->vnn   = vnn;
1009
1010         vnn->update_in_flight = true;
1011         talloc_set_destructor(state, ctdb_releaseip_destructor);
1012
1013         ret = ctdb_event_script_callback(ctdb, 
1014                                          state, release_ip_callback, state,
1015                                          CTDB_EVENT_RELEASE_IP,
1016                                          "%s %s %u",
1017                                          iface,
1018                                          ctdb_addr_to_str(&pip->addr),
1019                                          vnn->public_netmask_bits);
1020         free(iface);
1021         if (ret != 0) {
1022                 DEBUG(DEBUG_ERR,(__location__ " Failed to release IP %s on interface %s\n",
1023                         ctdb_addr_to_str(&pip->addr),
1024                         ctdb_vnn_iface_string(vnn)));
1025                 talloc_free(state);
1026                 return -1;
1027         }
1028
1029         /* tell the control that we will be reply asynchronously */
1030         *async_reply = true;
1031         return 0;
1032 }
1033
1034 static int ctdb_add_public_address(struct ctdb_context *ctdb,
1035                                    ctdb_sock_addr *addr,
1036                                    unsigned mask, const char *ifaces,
1037                                    bool check_address)
1038 {
1039         struct ctdb_vnn      *vnn;
1040         uint32_t num = 0;
1041         char *tmp;
1042         const char *iface;
1043         int i;
1044         int ret;
1045
1046         tmp = strdup(ifaces);
1047         for (iface = strtok(tmp, ","); iface; iface = strtok(NULL, ",")) {
1048                 if (!ctdb_sys_check_iface_exists(iface)) {
1049                         DEBUG(DEBUG_CRIT,("Interface %s does not exist. Can not add public-address : %s\n", iface, ctdb_addr_to_str(addr)));
1050                         free(tmp);
1051                         return -1;
1052                 }
1053         }
1054         free(tmp);
1055
1056         /* Verify that we don't have an entry for this ip yet */
1057         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1058                 if (ctdb_same_sockaddr(addr, &vnn->public_address)) {
1059                         DEBUG(DEBUG_CRIT,("Same ip '%s' specified multiple times in the public address list \n", 
1060                                 ctdb_addr_to_str(addr)));
1061                         return -1;
1062                 }               
1063         }
1064
1065         /* create a new vnn structure for this ip address */
1066         vnn = talloc_zero(ctdb, struct ctdb_vnn);
1067         CTDB_NO_MEMORY_FATAL(ctdb, vnn);
1068         vnn->ifaces = talloc_array(vnn, const char *, num + 2);
1069         tmp = talloc_strdup(vnn, ifaces);
1070         CTDB_NO_MEMORY_FATAL(ctdb, tmp);
1071         for (iface = strtok(tmp, ","); iface; iface = strtok(NULL, ",")) {
1072                 vnn->ifaces = talloc_realloc(vnn, vnn->ifaces, const char *, num + 2);
1073                 CTDB_NO_MEMORY_FATAL(ctdb, vnn->ifaces);
1074                 vnn->ifaces[num] = talloc_strdup(vnn, iface);
1075                 CTDB_NO_MEMORY_FATAL(ctdb, vnn->ifaces[num]);
1076                 num++;
1077         }
1078         talloc_free(tmp);
1079         vnn->ifaces[num] = NULL;
1080         vnn->public_address      = *addr;
1081         vnn->public_netmask_bits = mask;
1082         vnn->pnn                 = -1;
1083         if (check_address) {
1084                 if (ctdb_sys_have_ip(addr)) {
1085                         DEBUG(DEBUG_ERR,("We are already hosting public address '%s'. setting PNN to ourself:%d\n", ctdb_addr_to_str(addr), ctdb->pnn));
1086                         vnn->pnn = ctdb->pnn;
1087                 }
1088         }
1089
1090         for (i=0; vnn->ifaces[i]; i++) {
1091                 ret = ctdb_add_local_iface(ctdb, vnn->ifaces[i]);
1092                 if (ret != 0) {
1093                         DEBUG(DEBUG_CRIT, (__location__ " failed to add iface[%s] "
1094                                            "for public_address[%s]\n",
1095                                            vnn->ifaces[i], ctdb_addr_to_str(addr)));
1096                         talloc_free(vnn);
1097                         return -1;
1098                 }
1099         }
1100
1101         DLIST_ADD(ctdb->vnn, vnn);
1102
1103         return 0;
1104 }
1105
1106 /*
1107   setup the public address lists from a file
1108 */
1109 int ctdb_set_public_addresses(struct ctdb_context *ctdb, bool check_addresses)
1110 {
1111         char **lines;
1112         int nlines;
1113         int i;
1114
1115         lines = file_lines_load(ctdb->public_addresses_file, &nlines, 0, ctdb);
1116         if (lines == NULL) {
1117                 ctdb_set_error(ctdb, "Failed to load public address list '%s'\n", ctdb->public_addresses_file);
1118                 return -1;
1119         }
1120         while (nlines > 0 && strcmp(lines[nlines-1], "") == 0) {
1121                 nlines--;
1122         }
1123
1124         for (i=0;i<nlines;i++) {
1125                 unsigned mask;
1126                 ctdb_sock_addr addr;
1127                 const char *addrstr;
1128                 const char *ifaces;
1129                 char *tok, *line;
1130
1131                 line = lines[i];
1132                 while ((*line == ' ') || (*line == '\t')) {
1133                         line++;
1134                 }
1135                 if (*line == '#') {
1136                         continue;
1137                 }
1138                 if (strcmp(line, "") == 0) {
1139                         continue;
1140                 }
1141                 tok = strtok(line, " \t");
1142                 addrstr = tok;
1143                 tok = strtok(NULL, " \t");
1144                 if (tok == NULL) {
1145                         if (NULL == ctdb->default_public_interface) {
1146                                 DEBUG(DEBUG_CRIT,("No default public interface and no interface specified at line %u of public address list\n",
1147                                          i+1));
1148                                 talloc_free(lines);
1149                                 return -1;
1150                         }
1151                         ifaces = ctdb->default_public_interface;
1152                 } else {
1153                         ifaces = tok;
1154                 }
1155
1156                 if (!addrstr || !parse_ip_mask(addrstr, ifaces, &addr, &mask)) {
1157                         DEBUG(DEBUG_CRIT,("Badly formed line %u in public address list\n", i+1));
1158                         talloc_free(lines);
1159                         return -1;
1160                 }
1161                 if (ctdb_add_public_address(ctdb, &addr, mask, ifaces, check_addresses)) {
1162                         DEBUG(DEBUG_CRIT,("Failed to add line %u to the public address list\n", i+1));
1163                         talloc_free(lines);
1164                         return -1;
1165                 }
1166         }
1167
1168
1169         talloc_free(lines);
1170         return 0;
1171 }
1172
1173 static struct ctdb_public_ip_list *
1174 ctdb_fetch_remote_public_ips(struct ctdb_context *ctdb,
1175                              TALLOC_CTX *mem_ctx,
1176                              struct ctdb_node_map_old *nodemap,
1177                              uint32_t public_ip_flags)
1178 {
1179         int j, ret;
1180         struct ctdb_public_ip_list_old *ip_list;
1181         struct ctdb_public_ip_list *public_ips;
1182
1183         public_ips = talloc_zero_array(mem_ctx,
1184                                        struct ctdb_public_ip_list,
1185                                        nodemap->num);
1186         if (public_ips == NULL) {
1187                 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
1188                 return NULL;
1189         }
1190
1191         for (j = 0; j < nodemap->num; j++) {
1192                 if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) {
1193                         continue;
1194                 }
1195
1196                 /* Retrieve the list of public IPs from the
1197                  * node. Flags says whether it is known or
1198                  * available. */
1199                 ret = ctdb_ctrl_get_public_ips_flags(
1200                         ctdb, TAKEOVER_TIMEOUT(), j, public_ips,
1201                         public_ip_flags, &ip_list);
1202                 if (ret != 0) {
1203                         DEBUG(DEBUG_ERR,
1204                               ("Failed to read public IPs from node: %u\n", j));
1205                         talloc_free(public_ips);
1206                         return NULL;
1207                 }
1208                 public_ips[j].num = ip_list->num;
1209                 if (ip_list->num == 0) {
1210                         talloc_free(ip_list);
1211                         continue;
1212                 }
1213                 public_ips[j].ip = talloc_zero_array(public_ips,
1214                                                      struct ctdb_public_ip,
1215                                                      ip_list->num);
1216                 if (public_ips[j].ip == NULL) {
1217                         DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
1218                         talloc_free(public_ips);
1219                         return NULL;
1220                 }
1221                 memcpy(public_ips[j].ip, &ip_list->ips[0],
1222                        sizeof(struct ctdb_public_ip) * ip_list->num);
1223                 talloc_free(ip_list);
1224         }
1225
1226         return public_ips;
1227 }
1228
1229 struct get_tunable_callback_data {
1230         const char *tunable;
1231         uint32_t *out;
1232         bool fatal;
1233 };
1234
1235 static void get_tunable_callback(struct ctdb_context *ctdb, uint32_t pnn,
1236                                  int32_t res, TDB_DATA outdata,
1237                                  void *callback)
1238 {
1239         struct get_tunable_callback_data *cd =
1240                 (struct get_tunable_callback_data *)callback;
1241         int size;
1242
1243         if (res != 0) {
1244                 /* Already handled in fail callback */
1245                 return;
1246         }
1247
1248         if (outdata.dsize != sizeof(uint32_t)) {
1249                 DEBUG(DEBUG_ERR,("Wrong size of returned data when reading \"%s\" tunable from node %d. Expected %d bytes but received %d bytes\n",
1250                                  cd->tunable, pnn, (int)sizeof(uint32_t),
1251                                  (int)outdata.dsize));
1252                 cd->fatal = true;
1253                 return;
1254         }
1255
1256         size = talloc_array_length(cd->out);
1257         if (pnn >= size) {
1258                 DEBUG(DEBUG_ERR,("Got %s reply from node %d but nodemap only has %d entries\n",
1259                                  cd->tunable, pnn, size));
1260                 return;
1261         }
1262
1263                 
1264         cd->out[pnn] = *(uint32_t *)outdata.dptr;
1265 }
1266
1267 static void get_tunable_fail_callback(struct ctdb_context *ctdb, uint32_t pnn,
1268                                        int32_t res, TDB_DATA outdata,
1269                                        void *callback)
1270 {
1271         struct get_tunable_callback_data *cd =
1272                 (struct get_tunable_callback_data *)callback;
1273
1274         switch (res) {
1275         case -ETIME:
1276                 DEBUG(DEBUG_ERR,
1277                       ("Timed out getting tunable \"%s\" from node %d\n",
1278                        cd->tunable, pnn));
1279                 cd->fatal = true;
1280                 break;
1281         case -EINVAL:
1282         case -1:
1283                 DEBUG(DEBUG_WARNING,
1284                       ("Tunable \"%s\" not implemented on node %d\n",
1285                        cd->tunable, pnn));
1286                 break;
1287         default:
1288                 DEBUG(DEBUG_ERR,
1289                       ("Unexpected error getting tunable \"%s\" from node %d\n",
1290                        cd->tunable, pnn));
1291                 cd->fatal = true;
1292         }
1293 }
1294
1295 static uint32_t *get_tunable_from_nodes(struct ctdb_context *ctdb,
1296                                         TALLOC_CTX *tmp_ctx,
1297                                         struct ctdb_node_map_old *nodemap,
1298                                         const char *tunable,
1299                                         uint32_t default_value)
1300 {
1301         TDB_DATA data;
1302         struct ctdb_control_get_tunable *t;
1303         uint32_t *nodes;
1304         uint32_t *tvals;
1305         struct get_tunable_callback_data callback_data;
1306         int i;
1307
1308         tvals = talloc_array(tmp_ctx, uint32_t, nodemap->num);
1309         CTDB_NO_MEMORY_NULL(ctdb, tvals);
1310         for (i=0; i<nodemap->num; i++) {
1311                 tvals[i] = default_value;
1312         }
1313                 
1314         callback_data.out = tvals;
1315         callback_data.tunable = tunable;
1316         callback_data.fatal = false;
1317
1318         data.dsize = offsetof(struct ctdb_control_get_tunable, name) + strlen(tunable) + 1;
1319         data.dptr  = talloc_size(tmp_ctx, data.dsize);
1320         t = (struct ctdb_control_get_tunable *)data.dptr;
1321         t->length = strlen(tunable)+1;
1322         memcpy(t->name, tunable, t->length);
1323         nodes = list_of_connected_nodes(ctdb, nodemap, tmp_ctx, true);
1324         if (ctdb_client_async_control(ctdb, CTDB_CONTROL_GET_TUNABLE,
1325                                       nodes, 0, TAKEOVER_TIMEOUT(),
1326                                       false, data,
1327                                       get_tunable_callback,
1328                                       get_tunable_fail_callback,
1329                                       &callback_data) != 0) {
1330                 if (callback_data.fatal) {
1331                         talloc_free(tvals);
1332                         tvals = NULL;
1333                 }
1334         }
1335         talloc_free(nodes);
1336         talloc_free(data.dptr);
1337
1338         return tvals;
1339 }
1340
1341 static struct ctdb_node_map *
1342 ctdb_node_map_old_to_new(TALLOC_CTX *mem_ctx,
1343                          const struct ctdb_node_map_old *old)
1344 {
1345         struct ctdb_node_map *new;
1346
1347         new = talloc(mem_ctx, struct ctdb_node_map);
1348         if (new == NULL) {
1349                 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
1350                 return NULL;
1351         }
1352         new->num = old->num;
1353         new->node = talloc_zero_array(new,
1354                                       struct ctdb_node_and_flags, new->num);
1355         memcpy(new->node, &old->nodes[0],
1356                sizeof(struct ctdb_node_and_flags) * new->num);
1357
1358         return new;
1359 }
1360
1361
1362 static bool set_ipflags(struct ctdb_context *ctdb,
1363                         struct ipalloc_state *ipalloc_state,
1364                         struct ctdb_node_map_old *nodemap)
1365 {
1366         uint32_t *tval_noiptakeover;
1367         uint32_t *tval_noiphostonalldisabled;
1368         struct ctdb_node_map *new;
1369
1370         tval_noiptakeover = get_tunable_from_nodes(ctdb, ipalloc_state, nodemap,
1371                                                    "NoIPTakeover", 0);
1372         if (tval_noiptakeover == NULL) {
1373                 return false;
1374         }
1375
1376         tval_noiphostonalldisabled =
1377                 get_tunable_from_nodes(ctdb, ipalloc_state, nodemap,
1378                                        "NoIPHostOnAllDisabled", 0);
1379         if (tval_noiphostonalldisabled == NULL) {
1380                 /* Caller frees tmp_ctx */
1381                 return false;
1382         }
1383
1384         new = ctdb_node_map_old_to_new(ipalloc_state, nodemap);
1385         if (new == NULL) {
1386                 return false;
1387         }
1388
1389         ipalloc_set_node_flags(ipalloc_state, new,
1390                              tval_noiptakeover,
1391                              tval_noiphostonalldisabled);
1392
1393         talloc_free(tval_noiptakeover);
1394         talloc_free(tval_noiphostonalldisabled);
1395         talloc_free(new);
1396
1397         return true;
1398 }
1399
1400 static enum ipalloc_algorithm
1401 determine_algorithm(const struct ctdb_tunable_list *tunables)
1402 {
1403         if (1 == tunables->lcp2_public_ip_assignment) {
1404                 return IPALLOC_LCP2;
1405         } else if (1 == tunables->deterministic_public_ips) {
1406                 return IPALLOC_DETERMINISTIC;
1407         } else {
1408                 return IPALLOC_NONDETERMINISTIC;
1409         }
1410 }
1411
1412 struct takeover_callback_data {
1413         uint32_t num_nodes;
1414         unsigned int *fail_count;
1415 };
1416
1417 static struct takeover_callback_data *
1418 takeover_callback_data_init(TALLOC_CTX *mem_ctx,
1419                             uint32_t num_nodes)
1420 {
1421         static struct takeover_callback_data *takeover_data;
1422
1423         takeover_data = talloc_zero(mem_ctx, struct takeover_callback_data);
1424         if (takeover_data == NULL) {
1425                 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
1426                 return NULL;
1427         }
1428
1429         takeover_data->fail_count = talloc_zero_array(takeover_data,
1430                                                       unsigned int, num_nodes);
1431         if (takeover_data->fail_count == NULL) {
1432                 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
1433                 talloc_free(takeover_data);
1434                 return NULL;
1435         }
1436
1437         takeover_data->num_nodes = num_nodes;
1438
1439         return takeover_data;
1440 }
1441
1442 static void takeover_run_fail_callback(struct ctdb_context *ctdb,
1443                                        uint32_t node_pnn, int32_t res,
1444                                        TDB_DATA outdata, void *callback_data)
1445 {
1446         struct takeover_callback_data *cd =
1447                 talloc_get_type_abort(callback_data,
1448                                       struct takeover_callback_data);
1449
1450         if (node_pnn >= cd->num_nodes) {
1451                 DEBUG(DEBUG_ERR, (__location__ " invalid PNN %u\n", node_pnn));
1452                 return;
1453         }
1454
1455         if (cd->fail_count[node_pnn] == 0) {
1456                 DEBUG(DEBUG_ERR,
1457                       ("Node %u failed the takeover run\n", node_pnn));
1458         }
1459
1460         cd->fail_count[node_pnn]++;
1461 }
1462
1463 static void takeover_run_process_failures(struct ctdb_context *ctdb,
1464                                           struct takeover_callback_data *tcd)
1465 {
1466         unsigned int max_fails = 0;
1467         uint32_t max_pnn = -1;
1468         uint32_t i;
1469
1470         for (i = 0; i < tcd->num_nodes; i++) {
1471                 if (tcd->fail_count[i] > max_fails) {
1472                         max_pnn = i;
1473                         max_fails = tcd->fail_count[i];
1474                 }
1475         }
1476
1477         if (max_fails > 0) {
1478                 int ret;
1479                 TDB_DATA data;
1480
1481                 DEBUG(DEBUG_ERR,
1482                       ("Sending banning credits to %u with fail count %u\n",
1483                        max_pnn, max_fails));
1484
1485                 data.dptr = (uint8_t *)&max_pnn;
1486                 data.dsize = sizeof(uint32_t);
1487                 ret = ctdb_client_send_message(ctdb,
1488                                                CTDB_BROADCAST_CONNECTED,
1489                                                CTDB_SRVID_BANNING,
1490                                                data);
1491                 if (ret != 0) {
1492                         DEBUG(DEBUG_ERR,
1493                               ("Failed to set banning credits for node %u\n",
1494                                max_pnn));
1495                 }
1496         }
1497 }
1498
1499 /*
1500  * Recalculate the allocation of public IPs to nodes and have the
1501  * nodes host their allocated addresses.
1502  *
1503  * - Initialise IP allocation state.  Pass:
1504      + algorithm to be used;
1505      + whether IP rebalancing ("failback") should be done (this uses a
1506        cluster-wide configuration variable and only the value form the
1507        master node is used); and
1508  *   + list of nodes to force rebalance (internal structure, currently
1509  *     no way to fetch, only used by LCP2 for nodes that have had new
1510  *     IP addresses added).
1511  * - Set IP flags for IP allocation based on node map and tunables
1512  *   NoIPTakeover/NoIPHostOnAllDisabled from all connected nodes
1513  *   (tunable fetching done separately so values can be faked in unit
1514  *   testing)
1515  * - Retrieve known and available IP addresses (done separately so
1516  *   values can be faked in unit testing)
1517  * - Use ipalloc_set_public_ips() to set known and available IP
1518      addresses for allocation
1519  * - If cluster can't host IP addresses then early exit
1520  * - Run IP allocation algorithm
1521  * - Send RELEASE_IP to all nodes for IPs they should not host
1522  * - Send TAKE_IP to all nodes for IPs they should host
1523  * - Send IPREALLOCATED to all nodes (with backward compatibility hack)
1524  */
1525 int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map_old *nodemap,
1526                       uint32_t *force_rebalance_nodes)
1527 {
1528         int i, ret;
1529         struct ctdb_public_ip ip;
1530         uint32_t *nodes;
1531         struct public_ip_list *all_ips, *tmp_ip;
1532         TDB_DATA data;
1533         struct timeval timeout;
1534         struct client_async_data *async_data;
1535         struct ctdb_client_control_state *state;
1536         TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
1537         struct ipalloc_state *ipalloc_state;
1538         struct ctdb_public_ip_list *known_ips, *available_ips;
1539         struct takeover_callback_data *takeover_data;
1540
1541         /* Initialise fail callback data to be used with
1542          * takeover_run_fail_callback().  A failure in any of the
1543          * following steps will cause an early return, so this can be
1544          * reused for each of those steps without re-initialising. */
1545         takeover_data = takeover_callback_data_init(tmp_ctx,
1546                                                     nodemap->num);
1547         if (takeover_data == NULL) {
1548                 talloc_free(tmp_ctx);
1549                 return -1;
1550         }
1551
1552         /* Each of the later stages (RELEASE_IP, TAKEOVER_IP,
1553          * IPREALLOCATED) notionally has a timeout of TakeoverTimeout
1554          * seconds.  However, RELEASE_IP can take longer due to TCP
1555          * connection killing, so sometimes needs more time.
1556          * Therefore, use a cumulative timeout of TakeoverTimeout * 3
1557          * seconds across all 3 stages.  No explicit expiry checks are
1558          * needed before each stage because tevent is smart enough to
1559          * fire the timeouts even if they are in the past.  Initialise
1560          * this here to cope with early jumps to IPREALLOCATED. */
1561         timeout = timeval_current_ofs(3 * ctdb->tunable.takeover_timeout,0);
1562
1563         /*
1564          * ip failover is completely disabled, just send out the 
1565          * ipreallocated event.
1566          */
1567         if (ctdb->tunable.disable_ip_failover != 0) {
1568                 goto ipreallocated;
1569         }
1570
1571         ipalloc_state = ipalloc_state_init(tmp_ctx, ctdb->num_nodes,
1572                                            determine_algorithm(&ctdb->tunable),
1573                                            (ctdb->tunable.no_ip_failback != 0),
1574                                            force_rebalance_nodes);
1575         if (ipalloc_state == NULL) {
1576                 talloc_free(tmp_ctx);
1577                 return -1;
1578         }
1579
1580         if (!set_ipflags(ctdb, ipalloc_state, nodemap)) {
1581                 DEBUG(DEBUG_ERR,
1582                       ("Failed to set IP flags - aborting takeover run\n"));
1583                 talloc_free(tmp_ctx);
1584                 return -1;
1585         }
1586
1587         /* Fetch known/available public IPs from each active node */
1588         /* Fetch lists of known public IPs from all nodes */
1589         known_ips = ctdb_fetch_remote_public_ips(ctdb, ipalloc_state,
1590                                                  nodemap, 0);
1591         if (known_ips == NULL) {
1592                 DEBUG(DEBUG_ERR, ("Failed to read known public IPs\n"));
1593                 talloc_free(tmp_ctx);
1594                 return -1;
1595         }
1596         available_ips = ctdb_fetch_remote_public_ips(
1597                 ctdb, ipalloc_state, nodemap,
1598                 CTDB_PUBLIC_IP_FLAGS_ONLY_AVAILABLE);
1599         if (available_ips == NULL) {
1600                 DEBUG(DEBUG_ERR, ("Failed to read available public IPs\n"));
1601                 talloc_free(tmp_ctx);
1602                 return -1;
1603         }
1604
1605         if (! ipalloc_set_public_ips(ipalloc_state, known_ips, available_ips)) {
1606                 DEBUG(DEBUG_ERR, ("Failed to set public IPs\n"));
1607                 talloc_free(tmp_ctx);
1608                 return -1;
1609         }
1610
1611         if (! ipalloc_can_host_ips(ipalloc_state)) {
1612                 DEBUG(DEBUG_WARNING,("No nodes available to host public IPs yet\n"));
1613                 goto ipreallocated;
1614         }
1615
1616         /* Do the IP reassignment calculations */
1617         all_ips = ipalloc(ipalloc_state);
1618         if (all_ips == NULL) {
1619                 talloc_free(tmp_ctx);
1620                 return -1;
1621         }
1622
1623         /* Now tell all nodes to release any public IPs should not
1624          * host.  This will be a NOOP on nodes that don't currently
1625          * hold the given IP.
1626          */
1627         async_data = talloc_zero(tmp_ctx, struct client_async_data);
1628         CTDB_NO_MEMORY_FATAL(ctdb, async_data);
1629
1630         async_data->fail_callback = takeover_run_fail_callback;
1631         async_data->callback_data = takeover_data;
1632
1633         ZERO_STRUCT(ip); /* Avoid valgrind warnings for union */
1634
1635         /* Send a RELEASE_IP to all nodes that should not be hosting
1636          * each IP.  For each IP, all but one of these will be
1637          * redundant.  However, the redundant ones are used to tell
1638          * nodes which node should be hosting the IP so that commands
1639          * like "ctdb ip" can display a particular nodes idea of who
1640          * is hosting what. */
1641         for (i=0;i<nodemap->num;i++) {
1642                 /* don't talk to unconnected nodes, but do talk to banned nodes */
1643                 if (nodemap->nodes[i].flags & NODE_FLAGS_DISCONNECTED) {
1644                         continue;
1645                 }
1646
1647                 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1648                         if (tmp_ip->pnn == nodemap->nodes[i].pnn) {
1649                                 /* This node should be serving this
1650                                    vnn so don't tell it to release the ip
1651                                 */
1652                                 continue;
1653                         }
1654                         ip.pnn  = tmp_ip->pnn;
1655                         ip.addr = tmp_ip->addr;
1656
1657                         data.dsize = sizeof(ip);
1658                         data.dptr  = (uint8_t *)&ip;
1659                         state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
1660                                                   0, CTDB_CONTROL_RELEASE_IP, 0,
1661                                                   data, async_data,
1662                                                   &timeout, NULL);
1663                         if (state == NULL) {
1664                                 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_RELEASE_IP to node %u\n", nodemap->nodes[i].pnn));
1665                                 talloc_free(tmp_ctx);
1666                                 return -1;
1667                         }
1668
1669                         ctdb_client_async_add(async_data, state);
1670                 }
1671         }
1672         if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1673                 DEBUG(DEBUG_ERR,
1674                       ("Async control CTDB_CONTROL_RELEASE_IP failed\n"));
1675                 goto fail;
1676         }
1677         talloc_free(async_data);
1678
1679
1680         /* For each IP, send a TAKOVER_IP to the node that should be
1681          * hosting it.  Many of these will often be redundant (since
1682          * the allocation won't have changed) but they can be useful
1683          * to recover from inconsistencies. */
1684         async_data = talloc_zero(tmp_ctx, struct client_async_data);
1685         CTDB_NO_MEMORY_FATAL(ctdb, async_data);
1686
1687         async_data->fail_callback = takeover_run_fail_callback;
1688         async_data->callback_data = takeover_data;
1689
1690         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1691                 if (tmp_ip->pnn == -1) {
1692                         /* this IP won't be taken over */
1693                         continue;
1694                 }
1695
1696                 ip.pnn  = tmp_ip->pnn;
1697                 ip.addr = tmp_ip->addr;
1698
1699                 data.dsize = sizeof(ip);
1700                 data.dptr  = (uint8_t *)&ip;
1701                 state = ctdb_control_send(ctdb, tmp_ip->pnn,
1702                                           0, CTDB_CONTROL_TAKEOVER_IP, 0,
1703                                           data, async_data, &timeout, NULL);
1704                 if (state == NULL) {
1705                         DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_TAKEOVER_IP to node %u\n", tmp_ip->pnn));
1706                         talloc_free(tmp_ctx);
1707                         return -1;
1708                 }
1709
1710                 ctdb_client_async_add(async_data, state);
1711         }
1712         if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1713                 DEBUG(DEBUG_ERR,
1714                       ("Async control CTDB_CONTROL_TAKEOVER_IP failed\n"));
1715                 goto fail;
1716         }
1717
1718 ipreallocated:
1719         /*
1720          * Tell all nodes to run eventscripts to process the
1721          * "ipreallocated" event.  This can do a lot of things,
1722          * including restarting services to reconfigure them if public
1723          * IPs have moved.  Once upon a time this event only used to
1724          * update natgw.
1725          */
1726         nodes = list_of_connected_nodes(ctdb, nodemap, tmp_ctx, true);
1727         ret = ctdb_client_async_control(ctdb, CTDB_CONTROL_IPREALLOCATED,
1728                                         nodes, 0, timeout,
1729                                         false, tdb_null,
1730                                         NULL, takeover_run_fail_callback,
1731                                         takeover_data);
1732         if (ret != 0) {
1733                 DEBUG(DEBUG_ERR,
1734                       ("Async CTDB_CONTROL_IPREALLOCATED control failed\n"));
1735                 goto fail;
1736         }
1737
1738         talloc_free(tmp_ctx);
1739         return ret;
1740
1741 fail:
1742         takeover_run_process_failures(ctdb, takeover_data);
1743         talloc_free(tmp_ctx);
1744         return -1;
1745 }
1746
1747
1748 /*
1749   destroy a ctdb_client_ip structure
1750  */
1751 static int ctdb_client_ip_destructor(struct ctdb_client_ip *ip)
1752 {
1753         DEBUG(DEBUG_DEBUG,("destroying client tcp for %s:%u (client_id %u)\n",
1754                 ctdb_addr_to_str(&ip->addr),
1755                 ntohs(ip->addr.ip.sin_port),
1756                 ip->client_id));
1757
1758         DLIST_REMOVE(ip->ctdb->client_ip_list, ip);
1759         return 0;
1760 }
1761
1762 /*
1763   called by a client to inform us of a TCP connection that it is managing
1764   that should tickled with an ACK when IP takeover is done
1765  */
1766 int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id,
1767                                 TDB_DATA indata)
1768 {
1769         struct ctdb_client *client = reqid_find(ctdb->idr, client_id, struct ctdb_client);
1770         struct ctdb_connection *tcp_sock = NULL;
1771         struct ctdb_tcp_list *tcp;
1772         struct ctdb_connection t;
1773         int ret;
1774         TDB_DATA data;
1775         struct ctdb_client_ip *ip;
1776         struct ctdb_vnn *vnn;
1777         ctdb_sock_addr addr;
1778
1779         /* If we don't have public IPs, tickles are useless */
1780         if (ctdb->vnn == NULL) {
1781                 return 0;
1782         }
1783
1784         tcp_sock = (struct ctdb_connection *)indata.dptr;
1785
1786         addr = tcp_sock->src;
1787         ctdb_canonicalize_ip(&addr,  &tcp_sock->src);
1788         addr = tcp_sock->dst;
1789         ctdb_canonicalize_ip(&addr, &tcp_sock->dst);
1790
1791         ZERO_STRUCT(addr);
1792         memcpy(&addr, &tcp_sock->dst, sizeof(addr));
1793         vnn = find_public_ip_vnn(ctdb, &addr);
1794         if (vnn == NULL) {
1795                 switch (addr.sa.sa_family) {
1796                 case AF_INET:
1797                         if (ntohl(addr.ip.sin_addr.s_addr) != INADDR_LOOPBACK) {
1798                                 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public address.\n", 
1799                                         ctdb_addr_to_str(&addr)));
1800                         }
1801                         break;
1802                 case AF_INET6:
1803                         DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public ipv6 address.\n", 
1804                                 ctdb_addr_to_str(&addr)));
1805                         break;
1806                 default:
1807                         DEBUG(DEBUG_ERR,(__location__ " Unknown family type %d\n", addr.sa.sa_family));
1808                 }
1809
1810                 return 0;
1811         }
1812
1813         if (vnn->pnn != ctdb->pnn) {
1814                 DEBUG(DEBUG_ERR,("Attempt to register tcp client for IP %s we don't hold - failing (client_id %u pid %u)\n",
1815                         ctdb_addr_to_str(&addr),
1816                         client_id, client->pid));
1817                 /* failing this call will tell smbd to die */
1818                 return -1;
1819         }
1820
1821         ip = talloc(client, struct ctdb_client_ip);
1822         CTDB_NO_MEMORY(ctdb, ip);
1823
1824         ip->ctdb      = ctdb;
1825         ip->addr      = addr;
1826         ip->client_id = client_id;
1827         talloc_set_destructor(ip, ctdb_client_ip_destructor);
1828         DLIST_ADD(ctdb->client_ip_list, ip);
1829
1830         tcp = talloc(client, struct ctdb_tcp_list);
1831         CTDB_NO_MEMORY(ctdb, tcp);
1832
1833         tcp->connection.src = tcp_sock->src;
1834         tcp->connection.dst = tcp_sock->dst;
1835
1836         DLIST_ADD(client->tcp_list, tcp);
1837
1838         t.src = tcp_sock->src;
1839         t.dst = tcp_sock->dst;
1840
1841         data.dptr = (uint8_t *)&t;
1842         data.dsize = sizeof(t);
1843
1844         switch (addr.sa.sa_family) {
1845         case AF_INET:
1846                 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1847                         (unsigned)ntohs(tcp_sock->dst.ip.sin_port),
1848                         ctdb_addr_to_str(&tcp_sock->src),
1849                         (unsigned)ntohs(tcp_sock->src.ip.sin_port), client_id, client->pid));
1850                 break;
1851         case AF_INET6:
1852                 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1853                         (unsigned)ntohs(tcp_sock->dst.ip6.sin6_port),
1854                         ctdb_addr_to_str(&tcp_sock->src),
1855                         (unsigned)ntohs(tcp_sock->src.ip6.sin6_port), client_id, client->pid));
1856                 break;
1857         default:
1858                 DEBUG(DEBUG_ERR,(__location__ " Unknown family %d\n", addr.sa.sa_family));
1859         }
1860
1861
1862         /* tell all nodes about this tcp connection */
1863         ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0, 
1864                                        CTDB_CONTROL_TCP_ADD,
1865                                        0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
1866         if (ret != 0) {
1867                 DEBUG(DEBUG_ERR,(__location__ " Failed to send CTDB_CONTROL_TCP_ADD\n"));
1868                 return -1;
1869         }
1870
1871         return 0;
1872 }
1873
1874 /*
1875   find a tcp address on a list
1876  */
1877 static struct ctdb_connection *ctdb_tcp_find(struct ctdb_tcp_array *array,
1878                                            struct ctdb_connection *tcp)
1879 {
1880         int i;
1881
1882         if (array == NULL) {
1883                 return NULL;
1884         }
1885
1886         for (i=0;i<array->num;i++) {
1887                 if (ctdb_same_sockaddr(&array->connections[i].src, &tcp->src) &&
1888                     ctdb_same_sockaddr(&array->connections[i].dst, &tcp->dst)) {
1889                         return &array->connections[i];
1890                 }
1891         }
1892         return NULL;
1893 }
1894
1895
1896
1897 /*
1898   called by a daemon to inform us of a TCP connection that one of its
1899   clients managing that should tickled with an ACK when IP takeover is
1900   done
1901  */
1902 int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata, bool tcp_update_needed)
1903 {
1904         struct ctdb_connection *p = (struct ctdb_connection *)indata.dptr;
1905         struct ctdb_tcp_array *tcparray;
1906         struct ctdb_connection tcp;
1907         struct ctdb_vnn *vnn;
1908
1909         /* If we don't have public IPs, tickles are useless */
1910         if (ctdb->vnn == NULL) {
1911                 return 0;
1912         }
1913
1914         vnn = find_public_ip_vnn(ctdb, &p->dst);
1915         if (vnn == NULL) {
1916                 DEBUG(DEBUG_INFO,(__location__ " got TCP_ADD control for an address which is not a public address '%s'\n",
1917                         ctdb_addr_to_str(&p->dst)));
1918
1919                 return -1;
1920         }
1921
1922
1923         tcparray = vnn->tcp_array;
1924
1925         /* If this is the first tickle */
1926         if (tcparray == NULL) {
1927                 tcparray = talloc(vnn, struct ctdb_tcp_array);
1928                 CTDB_NO_MEMORY(ctdb, tcparray);
1929                 vnn->tcp_array = tcparray;
1930
1931                 tcparray->num = 0;
1932                 tcparray->connections = talloc_size(tcparray, sizeof(struct ctdb_connection));
1933                 CTDB_NO_MEMORY(ctdb, tcparray->connections);
1934
1935                 tcparray->connections[tcparray->num].src = p->src;
1936                 tcparray->connections[tcparray->num].dst = p->dst;
1937                 tcparray->num++;
1938
1939                 if (tcp_update_needed) {
1940                         vnn->tcp_update_needed = true;
1941                 }
1942                 return 0;
1943         }
1944
1945
1946         /* Do we already have this tickle ?*/
1947         tcp.src = p->src;
1948         tcp.dst = p->dst;
1949         if (ctdb_tcp_find(tcparray, &tcp) != NULL) {
1950                 DEBUG(DEBUG_DEBUG,("Already had tickle info for %s:%u for vnn:%u\n",
1951                         ctdb_addr_to_str(&tcp.dst),
1952                         ntohs(tcp.dst.ip.sin_port),
1953                         vnn->pnn));
1954                 return 0;
1955         }
1956
1957         /* A new tickle, we must add it to the array */
1958         tcparray->connections = talloc_realloc(tcparray, tcparray->connections,
1959                                         struct ctdb_connection,
1960                                         tcparray->num+1);
1961         CTDB_NO_MEMORY(ctdb, tcparray->connections);
1962
1963         tcparray->connections[tcparray->num].src = p->src;
1964         tcparray->connections[tcparray->num].dst = p->dst;
1965         tcparray->num++;
1966
1967         DEBUG(DEBUG_INFO,("Added tickle info for %s:%u from vnn %u\n",
1968                 ctdb_addr_to_str(&tcp.dst),
1969                 ntohs(tcp.dst.ip.sin_port),
1970                 vnn->pnn));
1971
1972         if (tcp_update_needed) {
1973                 vnn->tcp_update_needed = true;
1974         }
1975
1976         return 0;
1977 }
1978
1979
1980 static void ctdb_remove_connection(struct ctdb_vnn *vnn, struct ctdb_connection *conn)
1981 {
1982         struct ctdb_connection *tcpp;
1983
1984         if (vnn == NULL) {
1985                 return;
1986         }
1987
1988         /* if the array is empty we cant remove it
1989            and we don't need to do anything
1990          */
1991         if (vnn->tcp_array == NULL) {
1992                 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist (array is empty) %s:%u\n",
1993                         ctdb_addr_to_str(&conn->dst),
1994                         ntohs(conn->dst.ip.sin_port)));
1995                 return;
1996         }
1997
1998
1999         /* See if we know this connection
2000            if we don't know this connection  then we dont need to do anything
2001          */
2002         tcpp = ctdb_tcp_find(vnn->tcp_array, conn);
2003         if (tcpp == NULL) {
2004                 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist %s:%u\n",
2005                         ctdb_addr_to_str(&conn->dst),
2006                         ntohs(conn->dst.ip.sin_port)));
2007                 return;
2008         }
2009
2010
2011         /* We need to remove this entry from the array.
2012            Instead of allocating a new array and copying data to it
2013            we cheat and just copy the last entry in the existing array
2014            to the entry that is to be removed and just shring the 
2015            ->num field
2016          */
2017         *tcpp = vnn->tcp_array->connections[vnn->tcp_array->num - 1];
2018         vnn->tcp_array->num--;
2019
2020         /* If we deleted the last entry we also need to remove the entire array
2021          */
2022         if (vnn->tcp_array->num == 0) {
2023                 talloc_free(vnn->tcp_array);
2024                 vnn->tcp_array = NULL;
2025         }               
2026
2027         vnn->tcp_update_needed = true;
2028
2029         DEBUG(DEBUG_INFO,("Removed tickle info for %s:%u\n",
2030                 ctdb_addr_to_str(&conn->src),
2031                 ntohs(conn->src.ip.sin_port)));
2032 }
2033
2034
2035 /*
2036   called by a daemon to inform us of a TCP connection that one of its
2037   clients used are no longer needed in the tickle database
2038  */
2039 int32_t ctdb_control_tcp_remove(struct ctdb_context *ctdb, TDB_DATA indata)
2040 {
2041         struct ctdb_vnn *vnn;
2042         struct ctdb_connection *conn = (struct ctdb_connection *)indata.dptr;
2043
2044         /* If we don't have public IPs, tickles are useless */
2045         if (ctdb->vnn == NULL) {
2046                 return 0;
2047         }
2048
2049         vnn = find_public_ip_vnn(ctdb, &conn->dst);
2050         if (vnn == NULL) {
2051                 DEBUG(DEBUG_ERR,
2052                       (__location__ " unable to find public address %s\n",
2053                        ctdb_addr_to_str(&conn->dst)));
2054                 return 0;
2055         }
2056
2057         ctdb_remove_connection(vnn, conn);
2058
2059         return 0;
2060 }
2061
2062
2063 /*
2064   Called when another daemon starts - causes all tickles for all
2065   public addresses we are serving to be sent to the new node on the
2066   next check.  This actually causes the next scheduled call to
2067   tdb_update_tcp_tickles() to update all nodes.  This is simple and
2068   doesn't require careful error handling.
2069  */
2070 int32_t ctdb_control_startup(struct ctdb_context *ctdb, uint32_t pnn)
2071 {
2072         struct ctdb_vnn *vnn;
2073
2074         DEBUG(DEBUG_INFO, ("Received startup control from node %lu\n",
2075                            (unsigned long) pnn));
2076
2077         for (vnn = ctdb->vnn; vnn != NULL; vnn = vnn->next) {
2078                 vnn->tcp_update_needed = true;
2079         }
2080
2081         return 0;
2082 }
2083
2084
2085 /*
2086   called when a client structure goes away - hook to remove
2087   elements from the tcp_list in all daemons
2088  */
2089 void ctdb_takeover_client_destructor_hook(struct ctdb_client *client)
2090 {
2091         while (client->tcp_list) {
2092                 struct ctdb_vnn *vnn;
2093                 struct ctdb_tcp_list *tcp = client->tcp_list;
2094                 struct ctdb_connection *conn = &tcp->connection;
2095
2096                 DLIST_REMOVE(client->tcp_list, tcp);
2097
2098                 vnn = find_public_ip_vnn(client->ctdb,
2099                                          &conn->dst);
2100                 if (vnn == NULL) {
2101                         DEBUG(DEBUG_ERR,
2102                               (__location__ " unable to find public address %s\n",
2103                                ctdb_addr_to_str(&conn->dst)));
2104                         continue;
2105                 }
2106
2107                 /* If the IP address is hosted on this node then
2108                  * remove the connection. */
2109                 if (vnn->pnn == client->ctdb->pnn) {
2110                         ctdb_remove_connection(vnn, conn);
2111                 }
2112
2113                 /* Otherwise this function has been called because the
2114                  * server IP address has been released to another node
2115                  * and the client has exited.  This means that we
2116                  * should not delete the connection information.  The
2117                  * takeover node processes connections too. */
2118         }
2119 }
2120
2121
2122 void ctdb_release_all_ips(struct ctdb_context *ctdb)
2123 {
2124         struct ctdb_vnn *vnn, *next;
2125         int count = 0;
2126
2127         if (ctdb->tunable.disable_ip_failover == 1) {
2128                 return;
2129         }
2130
2131         for (vnn = ctdb->vnn; vnn != NULL; vnn = next) {
2132                 /* vnn can be freed below in release_ip_post() */
2133                 next = vnn->next;
2134
2135                 if (!ctdb_sys_have_ip(&vnn->public_address)) {
2136                         ctdb_vnn_unassign_iface(ctdb, vnn);
2137                         continue;
2138                 }
2139
2140                 /* Don't allow multiple releases at once.  Some code,
2141                  * particularly ctdb_tickle_sentenced_connections() is
2142                  * not re-entrant */
2143                 if (vnn->update_in_flight) {
2144                         DEBUG(DEBUG_WARNING,
2145                               (__location__
2146                                " Not releasing IP %s/%u on interface %s, an update is already in progess\n",
2147                                     ctdb_addr_to_str(&vnn->public_address),
2148                                     vnn->public_netmask_bits,
2149                                     ctdb_vnn_iface_string(vnn)));
2150                         continue;
2151                 }
2152                 vnn->update_in_flight = true;
2153
2154                 DEBUG(DEBUG_INFO,("Release of IP %s/%u on interface %s node:-1\n",
2155                                     ctdb_addr_to_str(&vnn->public_address),
2156                                     vnn->public_netmask_bits,
2157                                     ctdb_vnn_iface_string(vnn)));
2158
2159                 ctdb_event_script_args(ctdb, CTDB_EVENT_RELEASE_IP, "%s %s %u",
2160                                        ctdb_vnn_iface_string(vnn),
2161                                        ctdb_addr_to_str(&vnn->public_address),
2162                                        vnn->public_netmask_bits);
2163                 /* releaseip timeouts are converted to success, so to
2164                  * detect failures just check if the IP address is
2165                  * still there...
2166                  */
2167                 if (ctdb_sys_have_ip(&vnn->public_address)) {
2168                         DEBUG(DEBUG_ERR,
2169                               (__location__
2170                                " IP address %s not released\n",
2171                                ctdb_addr_to_str(&vnn->public_address)));
2172                         vnn->update_in_flight = false;
2173                         continue;
2174                 }
2175
2176                 vnn = release_ip_post(ctdb, vnn, &vnn->public_address);
2177                 if (vnn != NULL) {
2178                         vnn->update_in_flight = false;
2179                 }
2180                 count++;
2181         }
2182
2183         DEBUG(DEBUG_NOTICE,(__location__ " Released %d public IPs\n", count));
2184 }
2185
2186
2187 /*
2188   get list of public IPs
2189  */
2190 int32_t ctdb_control_get_public_ips(struct ctdb_context *ctdb, 
2191                                     struct ctdb_req_control_old *c, TDB_DATA *outdata)
2192 {
2193         int i, num, len;
2194         struct ctdb_public_ip_list_old *ips;
2195         struct ctdb_vnn *vnn;
2196         bool only_available = false;
2197
2198         if (c->flags & CTDB_PUBLIC_IP_FLAGS_ONLY_AVAILABLE) {
2199                 only_available = true;
2200         }
2201
2202         /* count how many public ip structures we have */
2203         num = 0;
2204         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2205                 num++;
2206         }
2207
2208         len = offsetof(struct ctdb_public_ip_list_old, ips) +
2209                 num*sizeof(struct ctdb_public_ip);
2210         ips = talloc_zero_size(outdata, len);
2211         CTDB_NO_MEMORY(ctdb, ips);
2212
2213         i = 0;
2214         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2215                 if (only_available && !ctdb_vnn_available(ctdb, vnn)) {
2216                         continue;
2217                 }
2218                 ips->ips[i].pnn  = vnn->pnn;
2219                 ips->ips[i].addr = vnn->public_address;
2220                 i++;
2221         }
2222         ips->num = i;
2223         len = offsetof(struct ctdb_public_ip_list_old, ips) +
2224                 i*sizeof(struct ctdb_public_ip);
2225
2226         outdata->dsize = len;
2227         outdata->dptr  = (uint8_t *)ips;
2228
2229         return 0;
2230 }
2231
2232
2233 int32_t ctdb_control_get_public_ip_info(struct ctdb_context *ctdb,
2234                                         struct ctdb_req_control_old *c,
2235                                         TDB_DATA indata,
2236                                         TDB_DATA *outdata)
2237 {
2238         int i, num, len;
2239         ctdb_sock_addr *addr;
2240         struct ctdb_public_ip_info_old *info;
2241         struct ctdb_vnn *vnn;
2242
2243         addr = (ctdb_sock_addr *)indata.dptr;
2244
2245         vnn = find_public_ip_vnn(ctdb, addr);
2246         if (vnn == NULL) {
2247                 DEBUG(DEBUG_ERR,(__location__ " Could not get public ip info, "
2248                                  "'%s'not a public address\n",
2249                                  ctdb_addr_to_str(addr)));
2250                 return -1;
2251         }
2252
2253         /* count how many public ip structures we have */
2254         num = 0;
2255         for (;vnn->ifaces[num];) {
2256                 num++;
2257         }
2258
2259         len = offsetof(struct ctdb_public_ip_info_old, ifaces) +
2260                 num*sizeof(struct ctdb_iface);
2261         info = talloc_zero_size(outdata, len);
2262         CTDB_NO_MEMORY(ctdb, info);
2263
2264         info->ip.addr = vnn->public_address;
2265         info->ip.pnn = vnn->pnn;
2266         info->active_idx = 0xFFFFFFFF;
2267
2268         for (i=0; vnn->ifaces[i]; i++) {
2269                 struct ctdb_interface *cur;
2270
2271                 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
2272                 if (cur == NULL) {
2273                         DEBUG(DEBUG_CRIT, (__location__ " internal error iface[%s] unknown\n",
2274                                            vnn->ifaces[i]));
2275                         return -1;
2276                 }
2277                 if (vnn->iface == cur) {
2278                         info->active_idx = i;
2279                 }
2280                 strncpy(info->ifaces[i].name, cur->name,
2281                         sizeof(info->ifaces[i].name));
2282                 info->ifaces[i].name[sizeof(info->ifaces[i].name)-1] = '\0';
2283                 info->ifaces[i].link_state = cur->link_up;
2284                 info->ifaces[i].references = cur->references;
2285         }
2286         info->num = i;
2287         len = offsetof(struct ctdb_public_ip_info_old, ifaces) +
2288                 i*sizeof(struct ctdb_iface);
2289
2290         outdata->dsize = len;
2291         outdata->dptr  = (uint8_t *)info;
2292
2293         return 0;
2294 }
2295
2296 int32_t ctdb_control_get_ifaces(struct ctdb_context *ctdb,
2297                                 struct ctdb_req_control_old *c,
2298                                 TDB_DATA *outdata)
2299 {
2300         int i, num, len;
2301         struct ctdb_iface_list_old *ifaces;
2302         struct ctdb_interface *cur;
2303
2304         /* count how many public ip structures we have */
2305         num = 0;
2306         for (cur=ctdb->ifaces;cur;cur=cur->next) {
2307                 num++;
2308         }
2309
2310         len = offsetof(struct ctdb_iface_list_old, ifaces) +
2311                 num*sizeof(struct ctdb_iface);
2312         ifaces = talloc_zero_size(outdata, len);
2313         CTDB_NO_MEMORY(ctdb, ifaces);
2314
2315         i = 0;
2316         for (cur=ctdb->ifaces;cur;cur=cur->next) {
2317                 strncpy(ifaces->ifaces[i].name, cur->name,
2318                         sizeof(ifaces->ifaces[i].name));
2319                 ifaces->ifaces[i].name[sizeof(ifaces->ifaces[i].name)-1] = '\0';
2320                 ifaces->ifaces[i].link_state = cur->link_up;
2321                 ifaces->ifaces[i].references = cur->references;
2322                 i++;
2323         }
2324         ifaces->num = i;
2325         len = offsetof(struct ctdb_iface_list_old, ifaces) +
2326                 i*sizeof(struct ctdb_iface);
2327
2328         outdata->dsize = len;
2329         outdata->dptr  = (uint8_t *)ifaces;
2330
2331         return 0;
2332 }
2333
2334 int32_t ctdb_control_set_iface_link(struct ctdb_context *ctdb,
2335                                     struct ctdb_req_control_old *c,
2336                                     TDB_DATA indata)
2337 {
2338         struct ctdb_iface *info;
2339         struct ctdb_interface *iface;
2340         bool link_up = false;
2341
2342         info = (struct ctdb_iface *)indata.dptr;
2343
2344         if (info->name[CTDB_IFACE_SIZE] != '\0') {
2345                 int len = strnlen(info->name, CTDB_IFACE_SIZE);
2346                 DEBUG(DEBUG_ERR, (__location__ " name[%*.*s] not terminated\n",
2347                                   len, len, info->name));
2348                 return -1;
2349         }
2350
2351         switch (info->link_state) {
2352         case 0:
2353                 link_up = false;
2354                 break;
2355         case 1:
2356                 link_up = true;
2357                 break;
2358         default:
2359                 DEBUG(DEBUG_ERR, (__location__ " link_state[%u] invalid\n",
2360                                   (unsigned int)info->link_state));
2361                 return -1;
2362         }
2363
2364         if (info->references != 0) {
2365                 DEBUG(DEBUG_ERR, (__location__ " references[%u] should be 0\n",
2366                                   (unsigned int)info->references));
2367                 return -1;
2368         }
2369
2370         iface = ctdb_find_iface(ctdb, info->name);
2371         if (iface == NULL) {
2372                 return -1;
2373         }
2374
2375         if (link_up == iface->link_up) {
2376                 return 0;
2377         }
2378
2379         DEBUG(DEBUG_ERR,
2380               ("iface[%s] has changed it's link status %s => %s\n",
2381                iface->name,
2382                iface->link_up?"up":"down",
2383                link_up?"up":"down"));
2384
2385         iface->link_up = link_up;
2386         return 0;
2387 }
2388
2389
2390 /*
2391   called by a daemon to inform us of the entire list of TCP tickles for
2392   a particular public address.
2393   this control should only be sent by the node that is currently serving
2394   that public address.
2395  */
2396 int32_t ctdb_control_set_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata)
2397 {
2398         struct ctdb_tickle_list_old *list = (struct ctdb_tickle_list_old *)indata.dptr;
2399         struct ctdb_tcp_array *tcparray;
2400         struct ctdb_vnn *vnn;
2401
2402         /* We must at least have tickles.num or else we cant verify the size
2403            of the received data blob
2404          */
2405         if (indata.dsize < offsetof(struct ctdb_tickle_list_old, connections)) {
2406                 DEBUG(DEBUG_ERR,("Bad indata in ctdb_tickle_list. Not enough data for the tickle.num field\n"));
2407                 return -1;
2408         }
2409
2410         /* verify that the size of data matches what we expect */
2411         if (indata.dsize < offsetof(struct ctdb_tickle_list_old, connections)
2412                          + sizeof(struct ctdb_connection) * list->num) {
2413                 DEBUG(DEBUG_ERR,("Bad indata in ctdb_tickle_list\n"));
2414                 return -1;
2415         }
2416
2417         DEBUG(DEBUG_INFO, ("Received tickle update for public address %s\n",
2418                            ctdb_addr_to_str(&list->addr)));
2419
2420         vnn = find_public_ip_vnn(ctdb, &list->addr);
2421         if (vnn == NULL) {
2422                 DEBUG(DEBUG_INFO,(__location__ " Could not set tcp tickle list, '%s' is not a public address\n",
2423                         ctdb_addr_to_str(&list->addr)));
2424
2425                 return 1;
2426         }
2427
2428         if (vnn->pnn == ctdb->pnn) {
2429                 DEBUG(DEBUG_INFO,
2430                       ("Ignoring redundant set tcp tickle list, this node hosts '%s'\n",
2431                        ctdb_addr_to_str(&list->addr)));
2432                 return 0;
2433         }
2434
2435         /* remove any old ticklelist we might have */
2436         talloc_free(vnn->tcp_array);
2437         vnn->tcp_array = NULL;
2438
2439         tcparray = talloc(vnn, struct ctdb_tcp_array);
2440         CTDB_NO_MEMORY(ctdb, tcparray);
2441
2442         tcparray->num = list->num;
2443
2444         tcparray->connections = talloc_array(tcparray, struct ctdb_connection, tcparray->num);
2445         CTDB_NO_MEMORY(ctdb, tcparray->connections);
2446
2447         memcpy(tcparray->connections, &list->connections[0],
2448                sizeof(struct ctdb_connection)*tcparray->num);
2449
2450         /* We now have a new fresh tickle list array for this vnn */
2451         vnn->tcp_array = tcparray;
2452
2453         return 0;
2454 }
2455
2456 /*
2457   called to return the full list of tickles for the puclic address associated 
2458   with the provided vnn
2459  */
2460 int32_t ctdb_control_get_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata)
2461 {
2462         ctdb_sock_addr *addr = (ctdb_sock_addr *)indata.dptr;
2463         struct ctdb_tickle_list_old *list;
2464         struct ctdb_tcp_array *tcparray;
2465         int num, i;
2466         struct ctdb_vnn *vnn;
2467         unsigned port;
2468
2469         vnn = find_public_ip_vnn(ctdb, addr);
2470         if (vnn == NULL) {
2471                 DEBUG(DEBUG_ERR,(__location__ " Could not get tcp tickle list, '%s' is not a public address\n",
2472                         ctdb_addr_to_str(addr)));
2473
2474                 return 1;
2475         }
2476
2477         port = ctdb_addr_to_port(addr);
2478
2479         tcparray = vnn->tcp_array;
2480         num = 0;
2481         if (tcparray != NULL) {
2482                 if (port == 0) {
2483                         /* All connections */
2484                         num = tcparray->num;
2485                 } else {
2486                         /* Count connections for port */
2487                         for (i = 0; i < tcparray->num; i++) {
2488                                 if (port == ctdb_addr_to_port(&tcparray->connections[i].dst)) {
2489                                         num++;
2490                                 }
2491                         }
2492                 }
2493         }
2494
2495         outdata->dsize = offsetof(struct ctdb_tickle_list_old, connections)
2496                         + sizeof(struct ctdb_connection) * num;
2497
2498         outdata->dptr  = talloc_size(outdata, outdata->dsize);
2499         CTDB_NO_MEMORY(ctdb, outdata->dptr);
2500         list = (struct ctdb_tickle_list_old *)outdata->dptr;
2501
2502         list->addr = *addr;
2503         list->num = num;
2504
2505         if (num == 0) {
2506                 return 0;
2507         }
2508
2509         num = 0;
2510         for (i = 0; i < tcparray->num; i++) {
2511                 if (port == 0 || \
2512                     port == ctdb_addr_to_port(&tcparray->connections[i].dst)) {
2513                         list->connections[num] = tcparray->connections[i];
2514                         num++;
2515                 }
2516         }
2517
2518         return 0;
2519 }
2520
2521
2522 /*
2523   set the list of all tcp tickles for a public address
2524  */
2525 static int ctdb_send_set_tcp_tickles_for_ip(struct ctdb_context *ctdb,
2526                                             ctdb_sock_addr *addr,
2527                                             struct ctdb_tcp_array *tcparray)
2528 {
2529         int ret, num;
2530         TDB_DATA data;
2531         struct ctdb_tickle_list_old *list;
2532
2533         if (tcparray) {
2534                 num = tcparray->num;
2535         } else {
2536                 num = 0;
2537         }
2538
2539         data.dsize = offsetof(struct ctdb_tickle_list_old, connections) +
2540                         sizeof(struct ctdb_connection) * num;
2541         data.dptr = talloc_size(ctdb, data.dsize);
2542         CTDB_NO_MEMORY(ctdb, data.dptr);
2543
2544         list = (struct ctdb_tickle_list_old *)data.dptr;
2545         list->addr = *addr;
2546         list->num = num;
2547         if (tcparray) {
2548                 memcpy(&list->connections[0], tcparray->connections, sizeof(struct ctdb_connection) * num);
2549         }
2550
2551         ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_ALL, 0,
2552                                        CTDB_CONTROL_SET_TCP_TICKLE_LIST,
2553                                        0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
2554         if (ret != 0) {
2555                 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set tcp tickles failed\n"));
2556                 return -1;
2557         }
2558
2559         talloc_free(data.dptr);
2560
2561         return ret;
2562 }
2563
2564
2565 /*
2566   perform tickle updates if required
2567  */
2568 static void ctdb_update_tcp_tickles(struct tevent_context *ev,
2569                                     struct tevent_timer *te,
2570                                     struct timeval t, void *private_data)
2571 {
2572         struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
2573         int ret;
2574         struct ctdb_vnn *vnn;
2575
2576         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2577                 /* we only send out updates for public addresses that 
2578                    we have taken over
2579                  */
2580                 if (ctdb->pnn != vnn->pnn) {
2581                         continue;
2582                 }
2583                 /* We only send out the updates if we need to */
2584                 if (!vnn->tcp_update_needed) {
2585                         continue;
2586                 }
2587                 ret = ctdb_send_set_tcp_tickles_for_ip(ctdb,
2588                                                        &vnn->public_address,
2589                                                        vnn->tcp_array);
2590                 if (ret != 0) {
2591                         DEBUG(DEBUG_ERR,("Failed to send the tickle update for public address %s\n",
2592                                 ctdb_addr_to_str(&vnn->public_address)));
2593                 } else {
2594                         DEBUG(DEBUG_INFO,
2595                               ("Sent tickle update for public address %s\n",
2596                                ctdb_addr_to_str(&vnn->public_address)));
2597                         vnn->tcp_update_needed = false;
2598                 }
2599         }
2600
2601         tevent_add_timer(ctdb->ev, ctdb->tickle_update_context,
2602                          timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
2603                          ctdb_update_tcp_tickles, ctdb);
2604 }
2605
2606 /*
2607   start periodic update of tcp tickles
2608  */
2609 void ctdb_start_tcp_tickle_update(struct ctdb_context *ctdb)
2610 {
2611         ctdb->tickle_update_context = talloc_new(ctdb);
2612
2613         tevent_add_timer(ctdb->ev, ctdb->tickle_update_context,
2614                          timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
2615                          ctdb_update_tcp_tickles, ctdb);
2616 }
2617
2618
2619
2620
2621 struct control_gratious_arp {
2622         struct ctdb_context *ctdb;
2623         ctdb_sock_addr addr;
2624         const char *iface;
2625         int count;
2626 };
2627
2628 /*
2629   send a control_gratuitous arp
2630  */
2631 static void send_gratious_arp(struct tevent_context *ev,
2632                               struct tevent_timer *te,
2633                               struct timeval t, void *private_data)
2634 {
2635         int ret;
2636         struct control_gratious_arp *arp = talloc_get_type(private_data, 
2637                                                         struct control_gratious_arp);
2638
2639         ret = ctdb_sys_send_arp(&arp->addr, arp->iface);
2640         if (ret != 0) {
2641                 DEBUG(DEBUG_ERR,(__location__ " sending of gratious arp on iface '%s' failed (%s)\n",
2642                                  arp->iface, strerror(errno)));
2643         }
2644
2645
2646         arp->count++;
2647         if (arp->count == CTDB_ARP_REPEAT) {
2648                 talloc_free(arp);
2649                 return;
2650         }
2651
2652         tevent_add_timer(arp->ctdb->ev, arp,
2653                          timeval_current_ofs(CTDB_ARP_INTERVAL, 0),
2654                          send_gratious_arp, arp);
2655 }
2656
2657
2658 /*
2659   send a gratious arp 
2660  */
2661 int32_t ctdb_control_send_gratious_arp(struct ctdb_context *ctdb, TDB_DATA indata)
2662 {
2663         struct ctdb_addr_info_old *gratious_arp = (struct ctdb_addr_info_old *)indata.dptr;
2664         struct control_gratious_arp *arp;
2665
2666         /* verify the size of indata */
2667         if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
2668                 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_gratious_arp structure. Got %u require %u bytes\n", 
2669                                  (unsigned)indata.dsize, 
2670                                  (unsigned)offsetof(struct ctdb_addr_info_old, iface)));
2671                 return -1;
2672         }
2673         if (indata.dsize != 
2674                 ( offsetof(struct ctdb_addr_info_old, iface)
2675                 + gratious_arp->len ) ){
2676
2677                 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2678                         "but should be %u bytes\n", 
2679                          (unsigned)indata.dsize, 
2680                          (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+gratious_arp->len)));
2681                 return -1;
2682         }
2683
2684
2685         arp = talloc(ctdb, struct control_gratious_arp);
2686         CTDB_NO_MEMORY(ctdb, arp);
2687
2688         arp->ctdb  = ctdb;
2689         arp->addr   = gratious_arp->addr;
2690         arp->iface = talloc_strdup(arp, gratious_arp->iface);
2691         CTDB_NO_MEMORY(ctdb, arp->iface);
2692         arp->count = 0;
2693
2694         tevent_add_timer(arp->ctdb->ev, arp,
2695                          timeval_zero(), send_gratious_arp, arp);
2696
2697         return 0;
2698 }
2699
2700 int32_t ctdb_control_add_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2701 {
2702         struct ctdb_addr_info_old *pub = (struct ctdb_addr_info_old *)indata.dptr;
2703         int ret;
2704
2705         /* verify the size of indata */
2706         if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
2707                 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_addr_info structure\n"));
2708                 return -1;
2709         }
2710         if (indata.dsize != 
2711                 ( offsetof(struct ctdb_addr_info_old, iface)
2712                 + pub->len ) ){
2713
2714                 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2715                         "but should be %u bytes\n", 
2716                          (unsigned)indata.dsize, 
2717                          (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+pub->len)));
2718                 return -1;
2719         }
2720
2721         DEBUG(DEBUG_NOTICE,("Add IP %s\n", ctdb_addr_to_str(&pub->addr)));
2722
2723         ret = ctdb_add_public_address(ctdb, &pub->addr, pub->mask, &pub->iface[0], true);
2724
2725         if (ret != 0) {
2726                 DEBUG(DEBUG_ERR,(__location__ " Failed to add public address\n"));
2727                 return -1;
2728         }
2729
2730         return 0;
2731 }
2732
2733 int32_t ctdb_control_del_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2734 {
2735         struct ctdb_addr_info_old *pub = (struct ctdb_addr_info_old *)indata.dptr;
2736         struct ctdb_vnn *vnn;
2737
2738         /* verify the size of indata */
2739         if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
2740                 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_addr_info structure\n"));
2741                 return -1;
2742         }
2743         if (indata.dsize != 
2744                 ( offsetof(struct ctdb_addr_info_old, iface)
2745                 + pub->len ) ){
2746
2747                 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2748                         "but should be %u bytes\n", 
2749                          (unsigned)indata.dsize, 
2750                          (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+pub->len)));
2751                 return -1;
2752         }
2753
2754         DEBUG(DEBUG_NOTICE,("Delete IP %s\n", ctdb_addr_to_str(&pub->addr)));
2755
2756         /* walk over all public addresses until we find a match */
2757         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2758                 if (ctdb_same_ip(&vnn->public_address, &pub->addr)) {
2759                         if (vnn->pnn == ctdb->pnn) {
2760                                 /* This IP is currently being hosted.
2761                                  * Defer the deletion until the next
2762                                  * takeover run. "ctdb reloadips" will
2763                                  * always cause a takeover run.  "ctdb
2764                                  * delip" will now need an explicit
2765                                  * "ctdb ipreallocated" afterwards. */
2766                                 vnn->delete_pending = true;
2767                         } else {
2768                                 /* This IP is not hosted on the
2769                                  * current node so just delete it
2770                                  * now. */
2771                                 do_delete_ip(ctdb, vnn);
2772                         }
2773
2774                         return 0;
2775                 }
2776         }
2777
2778         DEBUG(DEBUG_ERR,("Delete IP of unknown public IP address %s\n",
2779                          ctdb_addr_to_str(&pub->addr)));
2780         return -1;
2781 }
2782
2783
2784 struct ipreallocated_callback_state {
2785         struct ctdb_req_control_old *c;
2786 };
2787
2788 static void ctdb_ipreallocated_callback(struct ctdb_context *ctdb,
2789                                         int status, void *p)
2790 {
2791         struct ipreallocated_callback_state *state =
2792                 talloc_get_type(p, struct ipreallocated_callback_state);
2793
2794         if (status != 0) {
2795                 DEBUG(DEBUG_ERR,
2796                       (" \"ipreallocated\" event script failed (status %d)\n",
2797                        status));
2798                 if (status == -ETIME) {
2799                         ctdb_ban_self(ctdb);
2800                 }
2801         }
2802
2803         ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
2804         talloc_free(state);
2805 }
2806
2807 /* A control to run the ipreallocated event */
2808 int32_t ctdb_control_ipreallocated(struct ctdb_context *ctdb,
2809                                    struct ctdb_req_control_old *c,
2810                                    bool *async_reply)
2811 {
2812         int ret;
2813         struct ipreallocated_callback_state *state;
2814
2815         state = talloc(ctdb, struct ipreallocated_callback_state);
2816         CTDB_NO_MEMORY(ctdb, state);
2817
2818         DEBUG(DEBUG_INFO,(__location__ " Running \"ipreallocated\" event\n"));
2819
2820         ret = ctdb_event_script_callback(ctdb, state,
2821                                          ctdb_ipreallocated_callback, state,
2822                                          CTDB_EVENT_IPREALLOCATED,
2823                                          "%s", "");
2824
2825         if (ret != 0) {
2826                 DEBUG(DEBUG_ERR,("Failed to run \"ipreallocated\" event \n"));
2827                 talloc_free(state);
2828                 return -1;
2829         }
2830
2831         /* tell the control that we will be reply asynchronously */
2832         state->c    = talloc_steal(state, c);
2833         *async_reply = true;
2834
2835         return 0;
2836 }
2837
2838
2839 struct ctdb_reloadips_handle {
2840         struct ctdb_context *ctdb;
2841         struct ctdb_req_control_old *c;
2842         int status;
2843         int fd[2];
2844         pid_t child;
2845         struct tevent_fd *fde;
2846 };
2847
2848 static int ctdb_reloadips_destructor(struct ctdb_reloadips_handle *h)
2849 {
2850         if (h == h->ctdb->reload_ips) {
2851                 h->ctdb->reload_ips = NULL;
2852         }
2853         if (h->c != NULL) {
2854                 ctdb_request_control_reply(h->ctdb, h->c, NULL, h->status, NULL);
2855                 h->c = NULL;
2856         }
2857         ctdb_kill(h->ctdb, h->child, SIGKILL);
2858         return 0;
2859 }
2860
2861 static void ctdb_reloadips_timeout_event(struct tevent_context *ev,
2862                                          struct tevent_timer *te,
2863                                          struct timeval t, void *private_data)
2864 {
2865         struct ctdb_reloadips_handle *h = talloc_get_type(private_data, struct ctdb_reloadips_handle);
2866
2867         talloc_free(h);
2868 }
2869
2870 static void ctdb_reloadips_child_handler(struct tevent_context *ev,
2871                                          struct tevent_fd *fde,
2872                                          uint16_t flags, void *private_data)
2873 {
2874         struct ctdb_reloadips_handle *h = talloc_get_type(private_data, struct ctdb_reloadips_handle);
2875
2876         char res;
2877         int ret;
2878
2879         ret = sys_read(h->fd[0], &res, 1);
2880         if (ret < 1 || res != 0) {
2881                 DEBUG(DEBUG_ERR, (__location__ " Reloadips child process returned error\n"));
2882                 res = 1;
2883         }
2884         h->status = res;
2885
2886         talloc_free(h);
2887 }
2888
2889 static int ctdb_reloadips_child(struct ctdb_context *ctdb)
2890 {
2891         TALLOC_CTX *mem_ctx = talloc_new(NULL);
2892         struct ctdb_public_ip_list_old *ips;
2893         struct ctdb_vnn *vnn;
2894         struct client_async_data *async_data;
2895         struct timeval timeout;
2896         TDB_DATA data;
2897         struct ctdb_client_control_state *state;
2898         bool first_add;
2899         int i, ret;
2900
2901         CTDB_NO_MEMORY(ctdb, mem_ctx);
2902
2903         /* Read IPs from local node */
2904         ret = ctdb_ctrl_get_public_ips(ctdb, TAKEOVER_TIMEOUT(),
2905                                        CTDB_CURRENT_NODE, mem_ctx, &ips);
2906         if (ret != 0) {
2907                 DEBUG(DEBUG_ERR,
2908                       ("Unable to fetch public IPs from local node\n"));
2909                 talloc_free(mem_ctx);
2910                 return -1;
2911         }
2912
2913         /* Read IPs file - this is safe since this is a child process */
2914         ctdb->vnn = NULL;
2915         if (ctdb_set_public_addresses(ctdb, false) != 0) {
2916                 DEBUG(DEBUG_ERR,("Failed to re-read public addresses file\n"));
2917                 talloc_free(mem_ctx);
2918                 return -1;
2919         }
2920
2921         async_data = talloc_zero(mem_ctx, struct client_async_data);
2922         CTDB_NO_MEMORY(ctdb, async_data);
2923
2924         /* Compare IPs between node and file for IPs to be deleted */
2925         for (i = 0; i < ips->num; i++) {
2926                 /* */
2927                 for (vnn = ctdb->vnn; vnn; vnn = vnn->next) {
2928                         if (ctdb_same_ip(&vnn->public_address,
2929                                          &ips->ips[i].addr)) {
2930                                 /* IP is still in file */
2931                                 break;
2932                         }
2933                 }
2934
2935                 if (vnn == NULL) {
2936                         /* Delete IP ips->ips[i] */
2937                         struct ctdb_addr_info_old *pub;
2938
2939                         DEBUG(DEBUG_NOTICE,
2940                               ("IP %s no longer configured, deleting it\n",
2941                                ctdb_addr_to_str(&ips->ips[i].addr)));
2942
2943                         pub = talloc_zero(mem_ctx, struct ctdb_addr_info_old);
2944                         CTDB_NO_MEMORY(ctdb, pub);
2945
2946                         pub->addr  = ips->ips[i].addr;
2947                         pub->mask  = 0;
2948                         pub->len   = 0;
2949
2950                         timeout = TAKEOVER_TIMEOUT();
2951
2952                         data.dsize = offsetof(struct ctdb_addr_info_old,
2953                                               iface) + pub->len;
2954                         data.dptr = (uint8_t *)pub;
2955
2956                         state = ctdb_control_send(ctdb, CTDB_CURRENT_NODE, 0,
2957                                                   CTDB_CONTROL_DEL_PUBLIC_IP,
2958                                                   0, data, async_data,
2959                                                   &timeout, NULL);
2960                         if (state == NULL) {
2961                                 DEBUG(DEBUG_ERR,
2962                                       (__location__
2963                                        " failed sending CTDB_CONTROL_DEL_PUBLIC_IP\n"));
2964                                 goto failed;
2965                         }
2966
2967                         ctdb_client_async_add(async_data, state);
2968                 }
2969         }
2970
2971         /* Compare IPs between node and file for IPs to be added */
2972         first_add = true;
2973         for (vnn = ctdb->vnn; vnn; vnn = vnn->next) {
2974                 for (i = 0; i < ips->num; i++) {
2975                         if (ctdb_same_ip(&vnn->public_address,
2976                                          &ips->ips[i].addr)) {
2977                                 /* IP already on node */
2978                                 break;
2979                         }
2980                 }
2981                 if (i == ips->num) {
2982                         /* Add IP ips->ips[i] */
2983                         struct ctdb_addr_info_old *pub;
2984                         const char *ifaces = NULL;
2985                         uint32_t len;
2986                         int iface = 0;
2987
2988                         DEBUG(DEBUG_NOTICE,
2989                               ("New IP %s configured, adding it\n",
2990                                ctdb_addr_to_str(&vnn->public_address)));
2991                         if (first_add) {
2992                                 uint32_t pnn = ctdb_get_pnn(ctdb);
2993
2994                                 data.dsize = sizeof(pnn);
2995                                 data.dptr  = (uint8_t *)&pnn;
2996
2997                                 ret = ctdb_client_send_message(
2998                                         ctdb,
2999                                         CTDB_BROADCAST_CONNECTED,
3000                                         CTDB_SRVID_REBALANCE_NODE,
3001                                         data);
3002                                 if (ret != 0) {
3003                                         DEBUG(DEBUG_WARNING,
3004                                               ("Failed to send message to force node reallocation - IPs may be unbalanced\n"));
3005                                 }
3006
3007                                 first_add = false;
3008                         }
3009
3010                         ifaces = vnn->ifaces[0];
3011                         iface = 1;
3012                         while (vnn->ifaces[iface] != NULL) {
3013                                 ifaces = talloc_asprintf(vnn, "%s,%s", ifaces,
3014                                                          vnn->ifaces[iface]);
3015                                 iface++;
3016                         }
3017
3018                         len   = strlen(ifaces) + 1;
3019                         pub = talloc_zero_size(mem_ctx,
3020                                                offsetof(struct ctdb_addr_info_old, iface) + len);
3021                         CTDB_NO_MEMORY(ctdb, pub);
3022
3023                         pub->addr  = vnn->public_address;
3024                         pub->mask  = vnn->public_netmask_bits;
3025                         pub->len   = len;
3026                         memcpy(&pub->iface[0], ifaces, pub->len);
3027
3028                         timeout = TAKEOVER_TIMEOUT();
3029
3030                         data.dsize = offsetof(struct ctdb_addr_info_old,
3031                                               iface) + pub->len;
3032                         data.dptr = (uint8_t *)pub;
3033
3034                         state = ctdb_control_send(ctdb, CTDB_CURRENT_NODE, 0,
3035                                                   CTDB_CONTROL_ADD_PUBLIC_IP,
3036                                                   0, data, async_data,
3037                                                   &timeout, NULL);
3038                         if (state == NULL) {
3039                                 DEBUG(DEBUG_ERR,
3040                                       (__location__
3041                                        " failed sending CTDB_CONTROL_ADD_PUBLIC_IP\n"));
3042                                 goto failed;
3043                         }
3044
3045                         ctdb_client_async_add(async_data, state);
3046                 }
3047         }
3048
3049         if (ctdb_client_async_wait(ctdb, async_data) != 0) {
3050                 DEBUG(DEBUG_ERR,(__location__ " Add/delete IPs failed\n"));
3051                 goto failed;
3052         }
3053
3054         talloc_free(mem_ctx);
3055         return 0;
3056
3057 failed:
3058         talloc_free(mem_ctx);
3059         return -1;
3060 }
3061
3062 /* This control is sent to force the node to re-read the public addresses file
3063    and drop any addresses we should nnot longer host, and add new addresses
3064    that we are now able to host
3065 */
3066 int32_t ctdb_control_reload_public_ips(struct ctdb_context *ctdb, struct ctdb_req_control_old *c, bool *async_reply)
3067 {
3068         struct ctdb_reloadips_handle *h;
3069         pid_t parent = getpid();
3070
3071         if (ctdb->reload_ips != NULL) {
3072                 talloc_free(ctdb->reload_ips);
3073                 ctdb->reload_ips = NULL;
3074         }
3075
3076         h = talloc(ctdb, struct ctdb_reloadips_handle);
3077         CTDB_NO_MEMORY(ctdb, h);
3078         h->ctdb     = ctdb;
3079         h->c        = NULL;
3080         h->status   = -1;
3081         
3082         if (pipe(h->fd) == -1) {
3083                 DEBUG(DEBUG_ERR,("Failed to create pipe for ctdb_freeze_lock\n"));
3084                 talloc_free(h);
3085                 return -1;
3086         }
3087
3088         h->child = ctdb_fork(ctdb);
3089         if (h->child == (pid_t)-1) {
3090                 DEBUG(DEBUG_ERR, ("Failed to fork a child for reloadips\n"));
3091                 close(h->fd[0]);
3092                 close(h->fd[1]);
3093                 talloc_free(h);
3094                 return -1;
3095         }
3096
3097         /* child process */
3098         if (h->child == 0) {
3099                 signed char res = 0;
3100
3101                 close(h->fd[0]);
3102                 debug_extra = talloc_asprintf(NULL, "reloadips:");
3103
3104                 prctl_set_comment("ctdb_reloadips");
3105                 if (switch_from_server_to_client(ctdb, "reloadips-child") != 0) {
3106                         DEBUG(DEBUG_CRIT,("ERROR: Failed to switch reloadips child into client mode\n"));
3107                         res = -1;
3108                 } else {
3109                         res = ctdb_reloadips_child(ctdb);
3110                         if (res != 0) {
3111                                 DEBUG(DEBUG_ERR,("Failed to reload ips on local node\n"));
3112                         }
3113                 }
3114
3115                 sys_write(h->fd[1], &res, 1);
3116                 ctdb_wait_for_process_to_exit(parent);
3117                 _exit(0);
3118         }
3119
3120         h->c             = talloc_steal(h, c);
3121
3122         close(h->fd[1]);
3123         set_close_on_exec(h->fd[0]);
3124
3125         talloc_set_destructor(h, ctdb_reloadips_destructor);
3126
3127
3128         h->fde = tevent_add_fd(ctdb->ev, h, h->fd[0], TEVENT_FD_READ,
3129                                ctdb_reloadips_child_handler, (void *)h);
3130         tevent_fd_set_auto_close(h->fde);
3131
3132         tevent_add_timer(ctdb->ev, h, timeval_current_ofs(120, 0),
3133                          ctdb_reloadips_timeout_event, h);
3134
3135         /* we reply later */
3136         *async_reply = true;
3137         return 0;
3138 }