ctdb-takeover: Only apply banning credits to the worst offender
[metze/samba/wip.git] / ctdb / server / ctdb_takeover.c
1 /* 
2    ctdb ip takeover code
3
4    Copyright (C) Ronnie Sahlberg  2007
5    Copyright (C) Andrew Tridgell  2007
6    Copyright (C) Martin Schwenke  2011
7
8    This program is free software; you can redistribute it and/or modify
9    it under the terms of the GNU General Public License as published by
10    the Free Software Foundation; either version 3 of the License, or
11    (at your option) any later version.
12    
13    This program is distributed in the hope that it will be useful,
14    but WITHOUT ANY WARRANTY; without even the implied warranty of
15    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16    GNU General Public License for more details.
17    
18    You should have received a copy of the GNU General Public License
19    along with this program; if not, see <http://www.gnu.org/licenses/>.
20 */
21 #include "replace.h"
22 #include "system/network.h"
23 #include "system/filesys.h"
24 #include "system/time.h"
25 #include "system/wait.h"
26
27 #include <talloc.h>
28 #include <tevent.h>
29
30 #include "lib/util/dlinklist.h"
31 #include "lib/util/debug.h"
32 #include "lib/util/samba_util.h"
33 #include "lib/util/util_process.h"
34
35 #include "ctdb_private.h"
36 #include "ctdb_client.h"
37
38 #include "common/rb_tree.h"
39 #include "common/reqid.h"
40 #include "common/system.h"
41 #include "common/common.h"
42 #include "common/logging.h"
43
44 #include "server/ipalloc.h"
45
46 #define TAKEOVER_TIMEOUT() timeval_current_ofs(ctdb->tunable.takeover_timeout,0)
47
48 #define CTDB_ARP_INTERVAL 1
49 #define CTDB_ARP_REPEAT   3
50
51 struct ctdb_interface {
52         struct ctdb_interface *prev, *next;
53         const char *name;
54         bool link_up;
55         uint32_t references;
56 };
57
58 static const char *ctdb_vnn_iface_string(const struct ctdb_vnn *vnn)
59 {
60         if (vnn->iface) {
61                 return vnn->iface->name;
62         }
63
64         return "__none__";
65 }
66
67 static int ctdb_add_local_iface(struct ctdb_context *ctdb, const char *iface)
68 {
69         struct ctdb_interface *i;
70
71         if (strlen(iface) > CTDB_IFACE_SIZE) {
72                 DEBUG(DEBUG_ERR, ("Interface name too long \"%s\"\n", iface));
73                 return -1;
74         }
75
76         /* Verify that we don't have an entry for this ip yet */
77         for (i=ctdb->ifaces;i;i=i->next) {
78                 if (strcmp(i->name, iface) == 0) {
79                         return 0;
80                 }
81         }
82
83         /* create a new structure for this interface */
84         i = talloc_zero(ctdb, struct ctdb_interface);
85         CTDB_NO_MEMORY_FATAL(ctdb, i);
86         i->name = talloc_strdup(i, iface);
87         CTDB_NO_MEMORY(ctdb, i->name);
88
89         i->link_up = true;
90
91         DLIST_ADD(ctdb->ifaces, i);
92
93         return 0;
94 }
95
96 static bool vnn_has_interface_with_name(struct ctdb_vnn *vnn,
97                                         const char *name)
98 {
99         int n;
100
101         for (n = 0; vnn->ifaces[n] != NULL; n++) {
102                 if (strcmp(name, vnn->ifaces[n]) == 0) {
103                         return true;
104                 }
105         }
106
107         return false;
108 }
109
110 /* If any interfaces now have no possible IPs then delete them.  This
111  * implementation is naive (i.e. simple) rather than clever
112  * (i.e. complex).  Given that this is run on delip and that operation
113  * is rare, this doesn't need to be efficient - it needs to be
114  * foolproof.  One alternative is reference counting, where the logic
115  * is distributed and can, therefore, be broken in multiple places.
116  * Another alternative is to build a red-black tree of interfaces that
117  * can have addresses (by walking ctdb->vnn once) and then walking
118  * ctdb->ifaces once and deleting those not in the tree.  Let's go to
119  * one of those if the naive implementation causes problems...  :-)
120  */
121 static void ctdb_remove_orphaned_ifaces(struct ctdb_context *ctdb,
122                                         struct ctdb_vnn *vnn)
123 {
124         struct ctdb_interface *i, *next;
125
126         /* For each interface, check if there's an IP using it. */
127         for (i = ctdb->ifaces; i != NULL; i = next) {
128                 struct ctdb_vnn *tv;
129                 bool found;
130                 next = i->next;
131
132                 /* Only consider interfaces named in the given VNN. */
133                 if (!vnn_has_interface_with_name(vnn, i->name)) {
134                         continue;
135                 }
136
137                 /* Search for a vnn with this interface. */
138                 found = false;
139                 for (tv=ctdb->vnn; tv; tv=tv->next) {
140                         if (vnn_has_interface_with_name(tv, i->name)) {
141                                 found = true;
142                                 break;
143                         }
144                 }
145
146                 if (!found) {
147                         /* None of the VNNs are using this interface. */
148                         DLIST_REMOVE(ctdb->ifaces, i);
149                         talloc_free(i);
150                 }
151         }
152 }
153
154
155 static struct ctdb_interface *ctdb_find_iface(struct ctdb_context *ctdb,
156                                               const char *iface)
157 {
158         struct ctdb_interface *i;
159
160         for (i=ctdb->ifaces;i;i=i->next) {
161                 if (strcmp(i->name, iface) == 0) {
162                         return i;
163                 }
164         }
165
166         return NULL;
167 }
168
169 static struct ctdb_interface *ctdb_vnn_best_iface(struct ctdb_context *ctdb,
170                                                   struct ctdb_vnn *vnn)
171 {
172         int i;
173         struct ctdb_interface *cur = NULL;
174         struct ctdb_interface *best = NULL;
175
176         for (i=0; vnn->ifaces[i]; i++) {
177
178                 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
179                 if (cur == NULL) {
180                         continue;
181                 }
182
183                 if (!cur->link_up) {
184                         continue;
185                 }
186
187                 if (best == NULL) {
188                         best = cur;
189                         continue;
190                 }
191
192                 if (cur->references < best->references) {
193                         best = cur;
194                         continue;
195                 }
196         }
197
198         return best;
199 }
200
201 static int32_t ctdb_vnn_assign_iface(struct ctdb_context *ctdb,
202                                      struct ctdb_vnn *vnn)
203 {
204         struct ctdb_interface *best = NULL;
205
206         if (vnn->iface) {
207                 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
208                                    "still assigned to iface '%s'\n",
209                                    ctdb_addr_to_str(&vnn->public_address),
210                                    ctdb_vnn_iface_string(vnn)));
211                 return 0;
212         }
213
214         best = ctdb_vnn_best_iface(ctdb, vnn);
215         if (best == NULL) {
216                 DEBUG(DEBUG_ERR, (__location__ " public address '%s' "
217                                   "cannot assign to iface any iface\n",
218                                   ctdb_addr_to_str(&vnn->public_address)));
219                 return -1;
220         }
221
222         vnn->iface = best;
223         best->references++;
224         vnn->pnn = ctdb->pnn;
225
226         DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
227                            "now assigned to iface '%s' refs[%d]\n",
228                            ctdb_addr_to_str(&vnn->public_address),
229                            ctdb_vnn_iface_string(vnn),
230                            best->references));
231         return 0;
232 }
233
234 static void ctdb_vnn_unassign_iface(struct ctdb_context *ctdb,
235                                     struct ctdb_vnn *vnn)
236 {
237         DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
238                            "now unassigned (old iface '%s' refs[%d])\n",
239                            ctdb_addr_to_str(&vnn->public_address),
240                            ctdb_vnn_iface_string(vnn),
241                            vnn->iface?vnn->iface->references:0));
242         if (vnn->iface) {
243                 vnn->iface->references--;
244         }
245         vnn->iface = NULL;
246         if (vnn->pnn == ctdb->pnn) {
247                 vnn->pnn = -1;
248         }
249 }
250
251 static bool ctdb_vnn_available(struct ctdb_context *ctdb,
252                                struct ctdb_vnn *vnn)
253 {
254         int i;
255
256         /* Nodes that are not RUNNING can not host IPs */
257         if (ctdb->runstate != CTDB_RUNSTATE_RUNNING) {
258                 return false;
259         }
260
261         if (vnn->delete_pending) {
262                 return false;
263         }
264
265         if (vnn->iface && vnn->iface->link_up) {
266                 return true;
267         }
268
269         for (i=0; vnn->ifaces[i]; i++) {
270                 struct ctdb_interface *cur;
271
272                 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
273                 if (cur == NULL) {
274                         continue;
275                 }
276
277                 if (cur->link_up) {
278                         return true;
279                 }
280         }
281
282         return false;
283 }
284
285 struct ctdb_takeover_arp {
286         struct ctdb_context *ctdb;
287         uint32_t count;
288         ctdb_sock_addr addr;
289         struct ctdb_tcp_array *tcparray;
290         struct ctdb_vnn *vnn;
291 };
292
293
294 /*
295   lists of tcp endpoints
296  */
297 struct ctdb_tcp_list {
298         struct ctdb_tcp_list *prev, *next;
299         struct ctdb_connection connection;
300 };
301
302 /*
303   list of clients to kill on IP release
304  */
305 struct ctdb_client_ip {
306         struct ctdb_client_ip *prev, *next;
307         struct ctdb_context *ctdb;
308         ctdb_sock_addr addr;
309         uint32_t client_id;
310 };
311
312
313 /*
314   send a gratuitous arp
315  */
316 static void ctdb_control_send_arp(struct tevent_context *ev,
317                                   struct tevent_timer *te,
318                                   struct timeval t, void *private_data)
319 {
320         struct ctdb_takeover_arp *arp = talloc_get_type(private_data, 
321                                                         struct ctdb_takeover_arp);
322         int i, ret;
323         struct ctdb_tcp_array *tcparray;
324         const char *iface = ctdb_vnn_iface_string(arp->vnn);
325
326         ret = ctdb_sys_send_arp(&arp->addr, iface);
327         if (ret != 0) {
328                 DEBUG(DEBUG_CRIT,(__location__ " sending of arp failed on iface '%s' (%s)\n",
329                                   iface, strerror(errno)));
330         }
331
332         tcparray = arp->tcparray;
333         if (tcparray) {
334                 for (i=0;i<tcparray->num;i++) {
335                         struct ctdb_connection *tcon;
336
337                         tcon = &tcparray->connections[i];
338                         DEBUG(DEBUG_INFO,("sending tcp tickle ack for %u->%s:%u\n",
339                                 (unsigned)ntohs(tcon->dst.ip.sin_port),
340                                 ctdb_addr_to_str(&tcon->src),
341                                 (unsigned)ntohs(tcon->src.ip.sin_port)));
342                         ret = ctdb_sys_send_tcp(
343                                 &tcon->src,
344                                 &tcon->dst,
345                                 0, 0, 0);
346                         if (ret != 0) {
347                                 DEBUG(DEBUG_CRIT,(__location__ " Failed to send tcp tickle ack for %s\n",
348                                         ctdb_addr_to_str(&tcon->src)));
349                         }
350                 }
351         }
352
353         arp->count++;
354
355         if (arp->count == CTDB_ARP_REPEAT) {
356                 talloc_free(arp);
357                 return;
358         }
359
360         tevent_add_timer(arp->ctdb->ev, arp->vnn->takeover_ctx,
361                          timeval_current_ofs(CTDB_ARP_INTERVAL, 100000),
362                          ctdb_control_send_arp, arp);
363 }
364
365 static int32_t ctdb_announce_vnn_iface(struct ctdb_context *ctdb,
366                                        struct ctdb_vnn *vnn)
367 {
368         struct ctdb_takeover_arp *arp;
369         struct ctdb_tcp_array *tcparray;
370
371         if (!vnn->takeover_ctx) {
372                 vnn->takeover_ctx = talloc_new(vnn);
373                 if (!vnn->takeover_ctx) {
374                         return -1;
375                 }
376         }
377
378         arp = talloc_zero(vnn->takeover_ctx, struct ctdb_takeover_arp);
379         if (!arp) {
380                 return -1;
381         }
382
383         arp->ctdb = ctdb;
384         arp->addr = vnn->public_address;
385         arp->vnn  = vnn;
386
387         tcparray = vnn->tcp_array;
388         if (tcparray) {
389                 /* add all of the known tcp connections for this IP to the
390                    list of tcp connections to send tickle acks for */
391                 arp->tcparray = talloc_steal(arp, tcparray);
392
393                 vnn->tcp_array = NULL;
394                 vnn->tcp_update_needed = true;
395         }
396
397         tevent_add_timer(arp->ctdb->ev, vnn->takeover_ctx,
398                          timeval_zero(), ctdb_control_send_arp, arp);
399
400         return 0;
401 }
402
403 struct takeover_callback_state {
404         struct ctdb_req_control_old *c;
405         ctdb_sock_addr *addr;
406         struct ctdb_vnn *vnn;
407 };
408
409 struct ctdb_do_takeip_state {
410         struct ctdb_req_control_old *c;
411         struct ctdb_vnn *vnn;
412 };
413
414 /*
415   called when takeip event finishes
416  */
417 static void ctdb_do_takeip_callback(struct ctdb_context *ctdb, int status,
418                                     void *private_data)
419 {
420         struct ctdb_do_takeip_state *state =
421                 talloc_get_type(private_data, struct ctdb_do_takeip_state);
422         int32_t ret;
423         TDB_DATA data;
424
425         if (status != 0) {
426                 struct ctdb_node *node = ctdb->nodes[ctdb->pnn];
427         
428                 if (status == -ETIME) {
429                         ctdb_ban_self(ctdb);
430                 }
431                 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
432                                  ctdb_addr_to_str(&state->vnn->public_address),
433                                  ctdb_vnn_iface_string(state->vnn)));
434                 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
435
436                 node->flags |= NODE_FLAGS_UNHEALTHY;
437                 talloc_free(state);
438                 return;
439         }
440
441         if (ctdb->do_checkpublicip) {
442
443         ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
444         if (ret != 0) {
445                 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
446                 talloc_free(state);
447                 return;
448         }
449
450         }
451
452         data.dptr  = (uint8_t *)ctdb_addr_to_str(&state->vnn->public_address);
453         data.dsize = strlen((char *)data.dptr) + 1;
454         DEBUG(DEBUG_INFO,(__location__ " sending TAKE_IP for '%s'\n", data.dptr));
455
456         ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_TAKE_IP, data);
457
458
459         /* the control succeeded */
460         ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
461         talloc_free(state);
462         return;
463 }
464
465 static int ctdb_takeip_destructor(struct ctdb_do_takeip_state *state)
466 {
467         state->vnn->update_in_flight = false;
468         return 0;
469 }
470
471 /*
472   take over an ip address
473  */
474 static int32_t ctdb_do_takeip(struct ctdb_context *ctdb,
475                               struct ctdb_req_control_old *c,
476                               struct ctdb_vnn *vnn)
477 {
478         int ret;
479         struct ctdb_do_takeip_state *state;
480
481         if (vnn->update_in_flight) {
482                 DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u rejected "
483                                     "update for this IP already in flight\n",
484                                     ctdb_addr_to_str(&vnn->public_address),
485                                     vnn->public_netmask_bits));
486                 return -1;
487         }
488
489         ret = ctdb_vnn_assign_iface(ctdb, vnn);
490         if (ret != 0) {
491                 DEBUG(DEBUG_ERR,("Takeover of IP %s/%u failed to "
492                                  "assign a usable interface\n",
493                                  ctdb_addr_to_str(&vnn->public_address),
494                                  vnn->public_netmask_bits));
495                 return -1;
496         }
497
498         state = talloc(vnn, struct ctdb_do_takeip_state);
499         CTDB_NO_MEMORY(ctdb, state);
500
501         state->c = talloc_steal(ctdb, c);
502         state->vnn   = vnn;
503
504         vnn->update_in_flight = true;
505         talloc_set_destructor(state, ctdb_takeip_destructor);
506
507         DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u on interface %s\n",
508                             ctdb_addr_to_str(&vnn->public_address),
509                             vnn->public_netmask_bits,
510                             ctdb_vnn_iface_string(vnn)));
511
512         ret = ctdb_event_script_callback(ctdb,
513                                          state,
514                                          ctdb_do_takeip_callback,
515                                          state,
516                                          CTDB_EVENT_TAKE_IP,
517                                          "%s %s %u",
518                                          ctdb_vnn_iface_string(vnn),
519                                          ctdb_addr_to_str(&vnn->public_address),
520                                          vnn->public_netmask_bits);
521
522         if (ret != 0) {
523                 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
524                         ctdb_addr_to_str(&vnn->public_address),
525                         ctdb_vnn_iface_string(vnn)));
526                 talloc_free(state);
527                 return -1;
528         }
529
530         return 0;
531 }
532
533 struct ctdb_do_updateip_state {
534         struct ctdb_req_control_old *c;
535         struct ctdb_interface *old;
536         struct ctdb_vnn *vnn;
537 };
538
539 /*
540   called when updateip event finishes
541  */
542 static void ctdb_do_updateip_callback(struct ctdb_context *ctdb, int status,
543                                       void *private_data)
544 {
545         struct ctdb_do_updateip_state *state =
546                 talloc_get_type(private_data, struct ctdb_do_updateip_state);
547         int32_t ret;
548
549         if (status != 0) {
550                 if (status == -ETIME) {
551                         ctdb_ban_self(ctdb);
552                 }
553                 DEBUG(DEBUG_ERR,(__location__ " Failed to move IP %s from interface %s to %s\n",
554                         ctdb_addr_to_str(&state->vnn->public_address),
555                         state->old->name,
556                         ctdb_vnn_iface_string(state->vnn)));
557
558                 /*
559                  * All we can do is reset the old interface
560                  * and let the next run fix it
561                  */
562                 ctdb_vnn_unassign_iface(ctdb, state->vnn);
563                 state->vnn->iface = state->old;
564                 state->vnn->iface->references++;
565
566                 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
567                 talloc_free(state);
568                 return;
569         }
570
571         if (ctdb->do_checkpublicip) {
572
573         ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
574         if (ret != 0) {
575                 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
576                 talloc_free(state);
577                 return;
578         }
579
580         }
581
582         /* the control succeeded */
583         ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
584         talloc_free(state);
585         return;
586 }
587
588 static int ctdb_updateip_destructor(struct ctdb_do_updateip_state *state)
589 {
590         state->vnn->update_in_flight = false;
591         return 0;
592 }
593
594 /*
595   update (move) an ip address
596  */
597 static int32_t ctdb_do_updateip(struct ctdb_context *ctdb,
598                                 struct ctdb_req_control_old *c,
599                                 struct ctdb_vnn *vnn)
600 {
601         int ret;
602         struct ctdb_do_updateip_state *state;
603         struct ctdb_interface *old = vnn->iface;
604         const char *new_name;
605
606         if (vnn->update_in_flight) {
607                 DEBUG(DEBUG_NOTICE,("Update of IP %s/%u rejected "
608                                     "update for this IP already in flight\n",
609                                     ctdb_addr_to_str(&vnn->public_address),
610                                     vnn->public_netmask_bits));
611                 return -1;
612         }
613
614         ctdb_vnn_unassign_iface(ctdb, vnn);
615         ret = ctdb_vnn_assign_iface(ctdb, vnn);
616         if (ret != 0) {
617                 DEBUG(DEBUG_ERR,("update of IP %s/%u failed to "
618                                  "assin a usable interface (old iface '%s')\n",
619                                  ctdb_addr_to_str(&vnn->public_address),
620                                  vnn->public_netmask_bits,
621                                  old->name));
622                 return -1;
623         }
624
625         new_name = ctdb_vnn_iface_string(vnn);
626         if (old->name != NULL && new_name != NULL && !strcmp(old->name, new_name)) {
627                 /* A benign update from one interface onto itself.
628                  * no need to run the eventscripts in this case, just return
629                  * success.
630                  */
631                 ctdb_request_control_reply(ctdb, c, NULL, 0, NULL);
632                 return 0;
633         }
634
635         state = talloc(vnn, struct ctdb_do_updateip_state);
636         CTDB_NO_MEMORY(ctdb, state);
637
638         state->c = talloc_steal(ctdb, c);
639         state->old = old;
640         state->vnn = vnn;
641
642         vnn->update_in_flight = true;
643         talloc_set_destructor(state, ctdb_updateip_destructor);
644
645         DEBUG(DEBUG_NOTICE,("Update of IP %s/%u from "
646                             "interface %s to %s\n",
647                             ctdb_addr_to_str(&vnn->public_address),
648                             vnn->public_netmask_bits,
649                             old->name,
650                             new_name));
651
652         ret = ctdb_event_script_callback(ctdb,
653                                          state,
654                                          ctdb_do_updateip_callback,
655                                          state,
656                                          CTDB_EVENT_UPDATE_IP,
657                                          "%s %s %s %u",
658                                          state->old->name,
659                                          new_name,
660                                          ctdb_addr_to_str(&vnn->public_address),
661                                          vnn->public_netmask_bits);
662         if (ret != 0) {
663                 DEBUG(DEBUG_ERR,(__location__ " Failed update IP %s from interface %s to %s\n",
664                                  ctdb_addr_to_str(&vnn->public_address),
665                                  old->name, new_name));
666                 talloc_free(state);
667                 return -1;
668         }
669
670         return 0;
671 }
672
673 /*
674   Find the vnn of the node that has a public ip address
675   returns -1 if the address is not known as a public address
676  */
677 static struct ctdb_vnn *find_public_ip_vnn(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
678 {
679         struct ctdb_vnn *vnn;
680
681         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
682                 if (ctdb_same_ip(&vnn->public_address, addr)) {
683                         return vnn;
684                 }
685         }
686
687         return NULL;
688 }
689
690 /*
691   take over an ip address
692  */
693 int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb,
694                                  struct ctdb_req_control_old *c,
695                                  TDB_DATA indata,
696                                  bool *async_reply)
697 {
698         int ret;
699         struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
700         struct ctdb_vnn *vnn;
701         bool have_ip = false;
702         bool do_updateip = false;
703         bool do_takeip = false;
704         struct ctdb_interface *best_iface = NULL;
705
706         if (pip->pnn != ctdb->pnn) {
707                 DEBUG(DEBUG_ERR,(__location__" takeoverip called for an ip '%s' "
708                                  "with pnn %d, but we're node %d\n",
709                                  ctdb_addr_to_str(&pip->addr),
710                                  pip->pnn, ctdb->pnn));
711                 return -1;
712         }
713
714         /* update out vnn list */
715         vnn = find_public_ip_vnn(ctdb, &pip->addr);
716         if (vnn == NULL) {
717                 DEBUG(DEBUG_INFO,("takeoverip called for an ip '%s' that is not a public address\n",
718                         ctdb_addr_to_str(&pip->addr)));
719                 return 0;
720         }
721
722         if (ctdb->tunable.disable_ip_failover == 0 && ctdb->do_checkpublicip) {
723                 have_ip = ctdb_sys_have_ip(&pip->addr);
724         }
725         best_iface = ctdb_vnn_best_iface(ctdb, vnn);
726         if (best_iface == NULL) {
727                 DEBUG(DEBUG_ERR,("takeoverip of IP %s/%u failed to find"
728                                  "a usable interface (old %s, have_ip %d)\n",
729                                  ctdb_addr_to_str(&vnn->public_address),
730                                  vnn->public_netmask_bits,
731                                  ctdb_vnn_iface_string(vnn),
732                                  have_ip));
733                 return -1;
734         }
735
736         if (vnn->iface == NULL && vnn->pnn == -1 && have_ip && best_iface != NULL) {
737                 DEBUG(DEBUG_ERR,("Taking over newly created ip\n"));
738                 have_ip = false;
739         }
740
741
742         if (vnn->iface == NULL && have_ip) {
743                 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
744                                   "but we have no interface assigned, has someone manually configured it? Ignore for now.\n",
745                                  ctdb_addr_to_str(&vnn->public_address)));
746                 return 0;
747         }
748
749         if (vnn->pnn != ctdb->pnn && have_ip && vnn->pnn != -1) {
750                 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
751                                   "and we have it on iface[%s], but it was assigned to node %d"
752                                   "and we are node %d, banning ourself\n",
753                                  ctdb_addr_to_str(&vnn->public_address),
754                                  ctdb_vnn_iface_string(vnn), vnn->pnn, ctdb->pnn));
755                 ctdb_ban_self(ctdb);
756                 return -1;
757         }
758
759         if (vnn->pnn == -1 && have_ip) {
760                 vnn->pnn = ctdb->pnn;
761                 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
762                                   "and we already have it on iface[%s], update local daemon\n",
763                                  ctdb_addr_to_str(&vnn->public_address),
764                                   ctdb_vnn_iface_string(vnn)));
765                 return 0;
766         }
767
768         if (vnn->iface) {
769                 if (vnn->iface != best_iface) {
770                         if (!vnn->iface->link_up) {
771                                 do_updateip = true;
772                         } else if (vnn->iface->references > (best_iface->references + 1)) {
773                                 /* only move when the rebalance gains something */
774                                         do_updateip = true;
775                         }
776                 }
777         }
778
779         if (!have_ip) {
780                 if (do_updateip) {
781                         ctdb_vnn_unassign_iface(ctdb, vnn);
782                         do_updateip = false;
783                 }
784                 do_takeip = true;
785         }
786
787         if (do_takeip) {
788                 ret = ctdb_do_takeip(ctdb, c, vnn);
789                 if (ret != 0) {
790                         return -1;
791                 }
792         } else if (do_updateip) {
793                 ret = ctdb_do_updateip(ctdb, c, vnn);
794                 if (ret != 0) {
795                         return -1;
796                 }
797         } else {
798                 /*
799                  * The interface is up and the kernel known the ip
800                  * => do nothing
801                  */
802                 DEBUG(DEBUG_INFO,("Redundant takeover of IP %s/%u on interface %s (ip already held)\n",
803                         ctdb_addr_to_str(&pip->addr),
804                         vnn->public_netmask_bits,
805                         ctdb_vnn_iface_string(vnn)));
806                 return 0;
807         }
808
809         /* tell ctdb_control.c that we will be replying asynchronously */
810         *async_reply = true;
811
812         return 0;
813 }
814
815 static void do_delete_ip(struct ctdb_context *ctdb, struct ctdb_vnn *vnn)
816 {
817         DLIST_REMOVE(ctdb->vnn, vnn);
818         ctdb_vnn_unassign_iface(ctdb, vnn);
819         ctdb_remove_orphaned_ifaces(ctdb, vnn);
820         talloc_free(vnn);
821 }
822
823 /*
824   called when releaseip event finishes
825  */
826 static void release_ip_callback(struct ctdb_context *ctdb, int status, 
827                                 void *private_data)
828 {
829         struct takeover_callback_state *state = 
830                 talloc_get_type(private_data, struct takeover_callback_state);
831         TDB_DATA data;
832
833         if (status == -ETIME) {
834                 ctdb_ban_self(ctdb);
835         }
836
837         if (ctdb->tunable.disable_ip_failover == 0 && ctdb->do_checkpublicip) {
838                 if  (ctdb_sys_have_ip(state->addr)) {
839                         DEBUG(DEBUG_ERR,
840                               ("IP %s still hosted during release IP callback, failing\n",
841                                ctdb_addr_to_str(state->addr)));
842                         ctdb_request_control_reply(ctdb, state->c,
843                                                    NULL, -1, NULL);
844                         talloc_free(state);
845                         return;
846                 }
847         }
848
849         /* send a message to all clients of this node telling them
850            that the cluster has been reconfigured and they should
851            release any sockets on this IP */
852         data.dptr = (uint8_t *)talloc_strdup(state, ctdb_addr_to_str(state->addr));
853         CTDB_NO_MEMORY_VOID(ctdb, data.dptr);
854         data.dsize = strlen((char *)data.dptr)+1;
855
856         DEBUG(DEBUG_INFO,(__location__ " sending RELEASE_IP for '%s'\n", data.dptr));
857
858         ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_RELEASE_IP, data);
859
860         ctdb_vnn_unassign_iface(ctdb, state->vnn);
861
862         /* Process the IP if it has been marked for deletion */
863         if (state->vnn->delete_pending) {
864                 do_delete_ip(ctdb, state->vnn);
865                 state->vnn = NULL;
866         }
867
868         /* the control succeeded */
869         ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
870         talloc_free(state);
871 }
872
873 static int ctdb_releaseip_destructor(struct takeover_callback_state *state)
874 {
875         if (state->vnn != NULL) {
876                 state->vnn->update_in_flight = false;
877         }
878         return 0;
879 }
880
881 /*
882   release an ip address
883  */
884 int32_t ctdb_control_release_ip(struct ctdb_context *ctdb, 
885                                 struct ctdb_req_control_old *c,
886                                 TDB_DATA indata, 
887                                 bool *async_reply)
888 {
889         int ret;
890         struct takeover_callback_state *state;
891         struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
892         struct ctdb_vnn *vnn;
893         char *iface;
894
895         /* update our vnn list */
896         vnn = find_public_ip_vnn(ctdb, &pip->addr);
897         if (vnn == NULL) {
898                 DEBUG(DEBUG_INFO,("releaseip called for an ip '%s' that is not a public address\n",
899                         ctdb_addr_to_str(&pip->addr)));
900                 return 0;
901         }
902         vnn->pnn = pip->pnn;
903
904         /* stop any previous arps */
905         talloc_free(vnn->takeover_ctx);
906         vnn->takeover_ctx = NULL;
907
908         /* Some ctdb tool commands (e.g. moveip) send
909          * lazy multicast to drop an IP from any node that isn't the
910          * intended new node.  The following causes makes ctdbd ignore
911          * a release for any address it doesn't host.
912          */
913         if (ctdb->tunable.disable_ip_failover == 0 && ctdb->do_checkpublicip) {
914                 if (!ctdb_sys_have_ip(&pip->addr)) {
915                         DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u on interface %s (ip not held)\n",
916                                 ctdb_addr_to_str(&pip->addr),
917                                 vnn->public_netmask_bits,
918                                 ctdb_vnn_iface_string(vnn)));
919                         ctdb_vnn_unassign_iface(ctdb, vnn);
920                         return 0;
921                 }
922         } else {
923                 if (vnn->iface == NULL) {
924                         DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u (ip not held)\n",
925                                            ctdb_addr_to_str(&pip->addr),
926                                            vnn->public_netmask_bits));
927                         return 0;
928                 }
929         }
930
931         /* There is a potential race between take_ip and us because we
932          * update the VNN via a callback that run when the
933          * eventscripts have been run.  Avoid the race by allowing one
934          * update to be in flight at a time.
935          */
936         if (vnn->update_in_flight) {
937                 DEBUG(DEBUG_NOTICE,("Release of IP %s/%u rejected "
938                                     "update for this IP already in flight\n",
939                                     ctdb_addr_to_str(&vnn->public_address),
940                                     vnn->public_netmask_bits));
941                 return -1;
942         }
943
944         iface = strdup(ctdb_vnn_iface_string(vnn));
945
946         DEBUG(DEBUG_NOTICE,("Release of IP %s/%u on interface %s  node:%d\n",
947                 ctdb_addr_to_str(&pip->addr),
948                 vnn->public_netmask_bits,
949                 iface,
950                 pip->pnn));
951
952         state = talloc(ctdb, struct takeover_callback_state);
953         if (state == NULL) {
954                 ctdb_set_error(ctdb, "Out of memory at %s:%d",
955                                __FILE__, __LINE__);
956                 free(iface);
957                 return -1;
958         }
959
960         state->c = talloc_steal(state, c);
961         state->addr = talloc(state, ctdb_sock_addr);       
962         if (state->addr == NULL) {
963                 ctdb_set_error(ctdb, "Out of memory at %s:%d",
964                                __FILE__, __LINE__);
965                 free(iface);
966                 talloc_free(state);
967                 return -1;
968         }
969         *state->addr = pip->addr;
970         state->vnn   = vnn;
971
972         vnn->update_in_flight = true;
973         talloc_set_destructor(state, ctdb_releaseip_destructor);
974
975         ret = ctdb_event_script_callback(ctdb, 
976                                          state, release_ip_callback, state,
977                                          CTDB_EVENT_RELEASE_IP,
978                                          "%s %s %u",
979                                          iface,
980                                          ctdb_addr_to_str(&pip->addr),
981                                          vnn->public_netmask_bits);
982         free(iface);
983         if (ret != 0) {
984                 DEBUG(DEBUG_ERR,(__location__ " Failed to release IP %s on interface %s\n",
985                         ctdb_addr_to_str(&pip->addr),
986                         ctdb_vnn_iface_string(vnn)));
987                 talloc_free(state);
988                 return -1;
989         }
990
991         /* tell the control that we will be reply asynchronously */
992         *async_reply = true;
993         return 0;
994 }
995
996 static int ctdb_add_public_address(struct ctdb_context *ctdb,
997                                    ctdb_sock_addr *addr,
998                                    unsigned mask, const char *ifaces,
999                                    bool check_address)
1000 {
1001         struct ctdb_vnn      *vnn;
1002         uint32_t num = 0;
1003         char *tmp;
1004         const char *iface;
1005         int i;
1006         int ret;
1007
1008         tmp = strdup(ifaces);
1009         for (iface = strtok(tmp, ","); iface; iface = strtok(NULL, ",")) {
1010                 if (!ctdb_sys_check_iface_exists(iface)) {
1011                         DEBUG(DEBUG_CRIT,("Interface %s does not exist. Can not add public-address : %s\n", iface, ctdb_addr_to_str(addr)));
1012                         free(tmp);
1013                         return -1;
1014                 }
1015         }
1016         free(tmp);
1017
1018         /* Verify that we don't have an entry for this ip yet */
1019         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1020                 if (ctdb_same_sockaddr(addr, &vnn->public_address)) {
1021                         DEBUG(DEBUG_CRIT,("Same ip '%s' specified multiple times in the public address list \n", 
1022                                 ctdb_addr_to_str(addr)));
1023                         return -1;
1024                 }               
1025         }
1026
1027         /* create a new vnn structure for this ip address */
1028         vnn = talloc_zero(ctdb, struct ctdb_vnn);
1029         CTDB_NO_MEMORY_FATAL(ctdb, vnn);
1030         vnn->ifaces = talloc_array(vnn, const char *, num + 2);
1031         tmp = talloc_strdup(vnn, ifaces);
1032         CTDB_NO_MEMORY_FATAL(ctdb, tmp);
1033         for (iface = strtok(tmp, ","); iface; iface = strtok(NULL, ",")) {
1034                 vnn->ifaces = talloc_realloc(vnn, vnn->ifaces, const char *, num + 2);
1035                 CTDB_NO_MEMORY_FATAL(ctdb, vnn->ifaces);
1036                 vnn->ifaces[num] = talloc_strdup(vnn, iface);
1037                 CTDB_NO_MEMORY_FATAL(ctdb, vnn->ifaces[num]);
1038                 num++;
1039         }
1040         talloc_free(tmp);
1041         vnn->ifaces[num] = NULL;
1042         vnn->public_address      = *addr;
1043         vnn->public_netmask_bits = mask;
1044         vnn->pnn                 = -1;
1045         if (check_address) {
1046                 if (ctdb_sys_have_ip(addr)) {
1047                         DEBUG(DEBUG_ERR,("We are already hosting public address '%s'. setting PNN to ourself:%d\n", ctdb_addr_to_str(addr), ctdb->pnn));
1048                         vnn->pnn = ctdb->pnn;
1049                 }
1050         }
1051
1052         for (i=0; vnn->ifaces[i]; i++) {
1053                 ret = ctdb_add_local_iface(ctdb, vnn->ifaces[i]);
1054                 if (ret != 0) {
1055                         DEBUG(DEBUG_CRIT, (__location__ " failed to add iface[%s] "
1056                                            "for public_address[%s]\n",
1057                                            vnn->ifaces[i], ctdb_addr_to_str(addr)));
1058                         talloc_free(vnn);
1059                         return -1;
1060                 }
1061         }
1062
1063         DLIST_ADD(ctdb->vnn, vnn);
1064
1065         return 0;
1066 }
1067
1068 /*
1069   setup the public address lists from a file
1070 */
1071 int ctdb_set_public_addresses(struct ctdb_context *ctdb, bool check_addresses)
1072 {
1073         char **lines;
1074         int nlines;
1075         int i;
1076
1077         lines = file_lines_load(ctdb->public_addresses_file, &nlines, 0, ctdb);
1078         if (lines == NULL) {
1079                 ctdb_set_error(ctdb, "Failed to load public address list '%s'\n", ctdb->public_addresses_file);
1080                 return -1;
1081         }
1082         while (nlines > 0 && strcmp(lines[nlines-1], "") == 0) {
1083                 nlines--;
1084         }
1085
1086         for (i=0;i<nlines;i++) {
1087                 unsigned mask;
1088                 ctdb_sock_addr addr;
1089                 const char *addrstr;
1090                 const char *ifaces;
1091                 char *tok, *line;
1092
1093                 line = lines[i];
1094                 while ((*line == ' ') || (*line == '\t')) {
1095                         line++;
1096                 }
1097                 if (*line == '#') {
1098                         continue;
1099                 }
1100                 if (strcmp(line, "") == 0) {
1101                         continue;
1102                 }
1103                 tok = strtok(line, " \t");
1104                 addrstr = tok;
1105                 tok = strtok(NULL, " \t");
1106                 if (tok == NULL) {
1107                         if (NULL == ctdb->default_public_interface) {
1108                                 DEBUG(DEBUG_CRIT,("No default public interface and no interface specified at line %u of public address list\n",
1109                                          i+1));
1110                                 talloc_free(lines);
1111                                 return -1;
1112                         }
1113                         ifaces = ctdb->default_public_interface;
1114                 } else {
1115                         ifaces = tok;
1116                 }
1117
1118                 if (!addrstr || !parse_ip_mask(addrstr, ifaces, &addr, &mask)) {
1119                         DEBUG(DEBUG_CRIT,("Badly formed line %u in public address list\n", i+1));
1120                         talloc_free(lines);
1121                         return -1;
1122                 }
1123                 if (ctdb_add_public_address(ctdb, &addr, mask, ifaces, check_addresses)) {
1124                         DEBUG(DEBUG_CRIT,("Failed to add line %u to the public address list\n", i+1));
1125                         talloc_free(lines);
1126                         return -1;
1127                 }
1128         }
1129
1130
1131         talloc_free(lines);
1132         return 0;
1133 }
1134
1135 static void *add_ip_callback(void *parm, void *data)
1136 {
1137         struct public_ip_list *this_ip = parm;
1138         struct public_ip_list *prev_ip = data;
1139
1140         if (prev_ip == NULL) {
1141                 return parm;
1142         }
1143         if (this_ip->pnn == -1) {
1144                 this_ip->pnn = prev_ip->pnn;
1145         }
1146
1147         return parm;
1148 }
1149
1150 static int getips_count_callback(void *param, void *data)
1151 {
1152         struct public_ip_list **ip_list = (struct public_ip_list **)param;
1153         struct public_ip_list *new_ip = (struct public_ip_list *)data;
1154
1155         new_ip->next = *ip_list;
1156         *ip_list     = new_ip;
1157         return 0;
1158 }
1159
1160 static int verify_remote_ip_allocation(struct ctdb_context *ctdb,
1161                                        struct ctdb_public_ip_list *ips,
1162                                        uint32_t pnn);
1163
1164 static int ctdb_reload_remote_public_ips(struct ctdb_context *ctdb,
1165                                          struct ipalloc_state *ipalloc_state,
1166                                          struct ctdb_node_map_old *nodemap)
1167 {
1168         int j;
1169         int ret;
1170         struct ctdb_public_ip_list_old *ip_list;
1171
1172         if (ipalloc_state->num != nodemap->num) {
1173                 DEBUG(DEBUG_ERR,
1174                       (__location__
1175                        " ipalloc_state->num (%d) != nodemap->num (%d) invalid param\n",
1176                        ipalloc_state->num, nodemap->num));
1177                 return -1;
1178         }
1179
1180         for (j=0; j<nodemap->num; j++) {
1181                 if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) {
1182                         continue;
1183                 }
1184
1185                 /* Retrieve the list of known public IPs from the node */
1186                 ret = ctdb_ctrl_get_public_ips_flags(ctdb,
1187                                         TAKEOVER_TIMEOUT(),
1188                                         j,
1189                                         ipalloc_state->known_public_ips,
1190                                         0,
1191                                         &ip_list);
1192                 if (ret != 0) {
1193                         DEBUG(DEBUG_ERR,
1194                               ("Failed to read known public IPs from node: %u\n",
1195                                j));
1196                         return -1;
1197                 }
1198                 ipalloc_state->known_public_ips[j].num = ip_list->num;
1199                 /* This could be copied and freed.  However, ip_list
1200                  * is allocated off ipalloc_state->known_public_ips,
1201                  * so this is a safe hack.  This will go away in a
1202                  * while anyway... */
1203                 ipalloc_state->known_public_ips[j].ip = &ip_list->ips[0];
1204
1205                 if (ctdb->do_checkpublicip) {
1206                         verify_remote_ip_allocation(
1207                                 ctdb,
1208                                 &ipalloc_state->known_public_ips[j],
1209                                 j);
1210                 }
1211
1212                 /* Retrieve the list of available public IPs from the node */
1213                 ret = ctdb_ctrl_get_public_ips_flags(ctdb,
1214                                         TAKEOVER_TIMEOUT(),
1215                                         j,
1216                                         ipalloc_state->available_public_ips,
1217                                         CTDB_PUBLIC_IP_FLAGS_ONLY_AVAILABLE,
1218                                         &ip_list);
1219                 if (ret != 0) {
1220                         DEBUG(DEBUG_ERR,
1221                               ("Failed to read available public IPs from node: %u\n",
1222                                j));
1223                         return -1;
1224                 }
1225                 ipalloc_state->available_public_ips[j].num = ip_list->num;
1226                 /* This could be copied and freed.  However, ip_list
1227                  * is allocated off ipalloc_state->available_public_ips,
1228                  * so this is a safe hack.  This will go away in a
1229                  * while anyway... */
1230                 ipalloc_state->available_public_ips[j].ip = &ip_list->ips[0];
1231         }
1232
1233         return 0;
1234 }
1235
1236 static struct public_ip_list *
1237 create_merged_ip_list(struct ctdb_context *ctdb, struct ipalloc_state *ipalloc_state)
1238 {
1239         int i, j;
1240         struct public_ip_list *ip_list;
1241         struct ctdb_public_ip_list *public_ips;
1242
1243         TALLOC_FREE(ctdb->ip_tree);
1244         ctdb->ip_tree = trbt_create(ctdb, 0);
1245
1246         for (i=0; i < ctdb->num_nodes; i++) {
1247
1248                 if (ctdb->nodes[i]->flags & NODE_FLAGS_DELETED) {
1249                         continue;
1250                 }
1251
1252                 /* there were no public ips for this node */
1253                 if (ipalloc_state->known_public_ips == NULL) {
1254                         continue;
1255                 }
1256
1257                 public_ips = &ipalloc_state->known_public_ips[i];
1258
1259                 for (j=0; j < public_ips->num; j++) {
1260                         struct public_ip_list *tmp_ip;
1261
1262                         tmp_ip = talloc_zero(ctdb->ip_tree, struct public_ip_list);
1263                         CTDB_NO_MEMORY_NULL(ctdb, tmp_ip);
1264                         /* Do not use information about IP addresses hosted
1265                          * on other nodes, it may not be accurate */
1266                         if (public_ips->ip[j].pnn == ctdb->nodes[i]->pnn) {
1267                                 tmp_ip->pnn = public_ips->ip[j].pnn;
1268                         } else {
1269                                 tmp_ip->pnn = -1;
1270                         }
1271                         tmp_ip->addr = public_ips->ip[j].addr;
1272                         tmp_ip->next = NULL;
1273
1274                         trbt_insertarray32_callback(ctdb->ip_tree,
1275                                 IP_KEYLEN, ip_key(&public_ips->ip[j].addr),
1276                                 add_ip_callback,
1277                                 tmp_ip);
1278                 }
1279         }
1280
1281         ip_list = NULL;
1282         trbt_traversearray32(ctdb->ip_tree, IP_KEYLEN, getips_count_callback, &ip_list);
1283
1284         return ip_list;
1285 }
1286
1287 static bool all_nodes_are_disabled(struct ctdb_node_map_old *nodemap)
1288 {
1289         int i;
1290
1291         for (i=0;i<nodemap->num;i++) {
1292                 if (!(nodemap->nodes[i].flags & (NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED))) {
1293                         /* Found one completely healthy node */
1294                         return false;
1295                 }
1296         }
1297
1298         return true;
1299 }
1300
1301 struct get_tunable_callback_data {
1302         const char *tunable;
1303         uint32_t *out;
1304         bool fatal;
1305 };
1306
1307 static void get_tunable_callback(struct ctdb_context *ctdb, uint32_t pnn,
1308                                  int32_t res, TDB_DATA outdata,
1309                                  void *callback)
1310 {
1311         struct get_tunable_callback_data *cd =
1312                 (struct get_tunable_callback_data *)callback;
1313         int size;
1314
1315         if (res != 0) {
1316                 /* Already handled in fail callback */
1317                 return;
1318         }
1319
1320         if (outdata.dsize != sizeof(uint32_t)) {
1321                 DEBUG(DEBUG_ERR,("Wrong size of returned data when reading \"%s\" tunable from node %d. Expected %d bytes but received %d bytes\n",
1322                                  cd->tunable, pnn, (int)sizeof(uint32_t),
1323                                  (int)outdata.dsize));
1324                 cd->fatal = true;
1325                 return;
1326         }
1327
1328         size = talloc_array_length(cd->out);
1329         if (pnn >= size) {
1330                 DEBUG(DEBUG_ERR,("Got %s reply from node %d but nodemap only has %d entries\n",
1331                                  cd->tunable, pnn, size));
1332                 return;
1333         }
1334
1335                 
1336         cd->out[pnn] = *(uint32_t *)outdata.dptr;
1337 }
1338
1339 static void get_tunable_fail_callback(struct ctdb_context *ctdb, uint32_t pnn,
1340                                        int32_t res, TDB_DATA outdata,
1341                                        void *callback)
1342 {
1343         struct get_tunable_callback_data *cd =
1344                 (struct get_tunable_callback_data *)callback;
1345
1346         switch (res) {
1347         case -ETIME:
1348                 DEBUG(DEBUG_ERR,
1349                       ("Timed out getting tunable \"%s\" from node %d\n",
1350                        cd->tunable, pnn));
1351                 cd->fatal = true;
1352                 break;
1353         case -EINVAL:
1354         case -1:
1355                 DEBUG(DEBUG_WARNING,
1356                       ("Tunable \"%s\" not implemented on node %d\n",
1357                        cd->tunable, pnn));
1358                 break;
1359         default:
1360                 DEBUG(DEBUG_ERR,
1361                       ("Unexpected error getting tunable \"%s\" from node %d\n",
1362                        cd->tunable, pnn));
1363                 cd->fatal = true;
1364         }
1365 }
1366
1367 static uint32_t *get_tunable_from_nodes(struct ctdb_context *ctdb,
1368                                         TALLOC_CTX *tmp_ctx,
1369                                         struct ctdb_node_map_old *nodemap,
1370                                         const char *tunable,
1371                                         uint32_t default_value)
1372 {
1373         TDB_DATA data;
1374         struct ctdb_control_get_tunable *t;
1375         uint32_t *nodes;
1376         uint32_t *tvals;
1377         struct get_tunable_callback_data callback_data;
1378         int i;
1379
1380         tvals = talloc_array(tmp_ctx, uint32_t, nodemap->num);
1381         CTDB_NO_MEMORY_NULL(ctdb, tvals);
1382         for (i=0; i<nodemap->num; i++) {
1383                 tvals[i] = default_value;
1384         }
1385                 
1386         callback_data.out = tvals;
1387         callback_data.tunable = tunable;
1388         callback_data.fatal = false;
1389
1390         data.dsize = offsetof(struct ctdb_control_get_tunable, name) + strlen(tunable) + 1;
1391         data.dptr  = talloc_size(tmp_ctx, data.dsize);
1392         t = (struct ctdb_control_get_tunable *)data.dptr;
1393         t->length = strlen(tunable)+1;
1394         memcpy(t->name, tunable, t->length);
1395         nodes = list_of_connected_nodes(ctdb, nodemap, tmp_ctx, true);
1396         if (ctdb_client_async_control(ctdb, CTDB_CONTROL_GET_TUNABLE,
1397                                       nodes, 0, TAKEOVER_TIMEOUT(),
1398                                       false, data,
1399                                       get_tunable_callback,
1400                                       get_tunable_fail_callback,
1401                                       &callback_data) != 0) {
1402                 if (callback_data.fatal) {
1403                         talloc_free(tvals);
1404                         tvals = NULL;
1405                 }
1406         }
1407         talloc_free(nodes);
1408         talloc_free(data.dptr);
1409
1410         return tvals;
1411 }
1412
1413 /* Set internal flags for IP allocation:
1414  *   Clear ip flags
1415  *   Set NOIPTAKOVER ip flags from per-node NoIPTakeover tunable
1416  *   Set NOIPHOST ip flag for each INACTIVE node
1417  *   if all nodes are disabled:
1418  *     Set NOIPHOST ip flags from per-node NoIPHostOnAllDisabled tunable
1419  *   else
1420  *     Set NOIPHOST ip flags for disabled nodes
1421  */
1422 static void set_ipflags_internal(struct ipalloc_state *ipalloc_state,
1423                                  struct ctdb_node_map_old *nodemap,
1424                                  uint32_t *tval_noiptakeover,
1425                                  uint32_t *tval_noiphostonalldisabled)
1426 {
1427         int i;
1428
1429         for (i=0;i<nodemap->num;i++) {
1430                 /* Can not take IPs on node with NoIPTakeover set */
1431                 if (tval_noiptakeover[i] != 0) {
1432                         ipalloc_state->noiptakeover[i] = true;
1433                 }
1434
1435                 /* Can not host IPs on INACTIVE node */
1436                 if (nodemap->nodes[i].flags & NODE_FLAGS_INACTIVE) {
1437                         ipalloc_state->noiphost[i] = true;
1438                 }
1439         }
1440
1441         if (all_nodes_are_disabled(nodemap)) {
1442                 /* If all nodes are disabled, can not host IPs on node
1443                  * with NoIPHostOnAllDisabled set
1444                  */
1445                 for (i=0;i<nodemap->num;i++) {
1446                         if (tval_noiphostonalldisabled[i] != 0) {
1447                                 ipalloc_state->noiphost[i] = true;
1448                         }
1449                 }
1450         } else {
1451                 /* If some nodes are not disabled, then can not host
1452                  * IPs on DISABLED node
1453                  */
1454                 for (i=0;i<nodemap->num;i++) {
1455                         if (nodemap->nodes[i].flags & NODE_FLAGS_DISABLED) {
1456                                 ipalloc_state->noiphost[i] = true;
1457                         }
1458                 }
1459         }
1460 }
1461
1462 static bool set_ipflags(struct ctdb_context *ctdb,
1463                         struct ipalloc_state *ipalloc_state,
1464                         struct ctdb_node_map_old *nodemap)
1465 {
1466         uint32_t *tval_noiptakeover;
1467         uint32_t *tval_noiphostonalldisabled;
1468
1469         tval_noiptakeover = get_tunable_from_nodes(ctdb, ipalloc_state, nodemap,
1470                                                    "NoIPTakeover", 0);
1471         if (tval_noiptakeover == NULL) {
1472                 return false;
1473         }
1474
1475         tval_noiphostonalldisabled =
1476                 get_tunable_from_nodes(ctdb, ipalloc_state, nodemap,
1477                                        "NoIPHostOnAllDisabled", 0);
1478         if (tval_noiphostonalldisabled == NULL) {
1479                 /* Caller frees tmp_ctx */
1480                 return false;
1481         }
1482
1483         set_ipflags_internal(ipalloc_state, nodemap,
1484                              tval_noiptakeover,
1485                              tval_noiphostonalldisabled);
1486
1487         talloc_free(tval_noiptakeover);
1488         talloc_free(tval_noiphostonalldisabled);
1489
1490         return true;
1491 }
1492
1493 static struct ipalloc_state * ipalloc_state_init(struct ctdb_context *ctdb,
1494                                                  TALLOC_CTX *mem_ctx)
1495 {
1496         struct ipalloc_state *ipalloc_state =
1497                 talloc_zero(mem_ctx, struct ipalloc_state);
1498         if (ipalloc_state == NULL) {
1499                 DEBUG(DEBUG_ERR, (__location__ " Out of memory\n"));
1500                 return NULL;
1501         }
1502
1503         ipalloc_state->num = ctdb->num_nodes;
1504
1505         ipalloc_state->known_public_ips =
1506                 talloc_zero_array(ipalloc_state,
1507                                   struct ctdb_public_ip_list,
1508                                   ipalloc_state->num);
1509         if (ipalloc_state->known_public_ips == NULL) {
1510                 DEBUG(DEBUG_ERR, (__location__ " Out of memory\n"));
1511                 goto fail;
1512         }
1513
1514         ipalloc_state->available_public_ips =
1515                 talloc_zero_array(ipalloc_state,
1516                                   struct ctdb_public_ip_list,
1517                                   ipalloc_state->num);
1518         if (ipalloc_state->available_public_ips == NULL) {
1519                 DEBUG(DEBUG_ERR, (__location__ " Out of memory\n"));
1520                 goto fail;
1521         }
1522         ipalloc_state->noiptakeover =
1523                 talloc_zero_array(ipalloc_state,
1524                                   bool,
1525                                   ipalloc_state->num);
1526         if (ipalloc_state->noiptakeover == NULL) {
1527                 DEBUG(DEBUG_ERR, (__location__ " Out of memory\n"));
1528                 goto fail;
1529         }
1530         ipalloc_state->noiphost =
1531                 talloc_zero_array(ipalloc_state,
1532                                   bool,
1533                                   ipalloc_state->num);
1534         if (ipalloc_state->noiphost == NULL) {
1535                 DEBUG(DEBUG_ERR, (__location__ " Out of memory\n"));
1536                 goto fail;
1537         }
1538
1539         if (1 == ctdb->tunable.lcp2_public_ip_assignment) {
1540                 ipalloc_state->algorithm = IPALLOC_LCP2;
1541         } else if (1 == ctdb->tunable.deterministic_public_ips) {
1542                 ipalloc_state->algorithm = IPALLOC_DETERMINISTIC;
1543         } else {
1544                 ipalloc_state->algorithm = IPALLOC_NONDETERMINISTIC;
1545         }
1546
1547         ipalloc_state->no_ip_failback = ctdb->tunable.no_ip_failback;
1548
1549         return ipalloc_state;
1550 fail:
1551         talloc_free(ipalloc_state);
1552         return NULL;
1553 }
1554
1555 struct takeover_callback_data {
1556         uint32_t num_nodes;
1557         unsigned int *fail_count;
1558 };
1559
1560 static struct takeover_callback_data *
1561 takeover_callback_data_init(TALLOC_CTX *mem_ctx,
1562                             uint32_t num_nodes)
1563 {
1564         static struct takeover_callback_data *takeover_data;
1565
1566         takeover_data = talloc_zero(mem_ctx, struct takeover_callback_data);
1567         if (takeover_data == NULL) {
1568                 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
1569                 return NULL;
1570         }
1571
1572         takeover_data->fail_count = talloc_zero_array(takeover_data,
1573                                                       unsigned int, num_nodes);
1574         if (takeover_data->fail_count == NULL) {
1575                 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
1576                 talloc_free(takeover_data);
1577                 return NULL;
1578         }
1579
1580         takeover_data->num_nodes = num_nodes;
1581
1582         return takeover_data;
1583 }
1584
1585 static void takeover_run_fail_callback(struct ctdb_context *ctdb,
1586                                        uint32_t node_pnn, int32_t res,
1587                                        TDB_DATA outdata, void *callback_data)
1588 {
1589         struct takeover_callback_data *cd =
1590                 talloc_get_type_abort(callback_data,
1591                                       struct takeover_callback_data);
1592
1593         if (node_pnn >= cd->num_nodes) {
1594                 DEBUG(DEBUG_ERR, (__location__ " invalid PNN %u\n", node_pnn));
1595                 return;
1596         }
1597
1598         if (cd->fail_count[node_pnn] == 0) {
1599                 DEBUG(DEBUG_ERR,
1600                       ("Node %u failed the takeover run\n", node_pnn));
1601         }
1602
1603         cd->fail_count[node_pnn]++;
1604 }
1605
1606 static void takeover_run_process_failures(struct ctdb_context *ctdb,
1607                                           struct takeover_callback_data *tcd)
1608 {
1609         unsigned int max_fails = 0;
1610         uint32_t max_pnn = -1;
1611         uint32_t i;
1612
1613         for (i = 0; i < tcd->num_nodes; i++) {
1614                 if (tcd->fail_count[i] > max_fails) {
1615                         max_pnn = i;
1616                         max_fails = tcd->fail_count[i];
1617                 }
1618         }
1619
1620         if (max_fails > 0) {
1621                 int ret;
1622                 TDB_DATA data;
1623
1624                 DEBUG(DEBUG_ERR,
1625                       ("Sending banning credits to %u with fail count %u\n",
1626                        max_pnn, max_fails));
1627
1628                 data.dptr = (uint8_t *)&max_pnn;
1629                 data.dsize = sizeof(uint32_t);
1630                 ret = ctdb_client_send_message(ctdb,
1631                                                CTDB_BROADCAST_CONNECTED,
1632                                                CTDB_SRVID_BANNING,
1633                                                data);
1634                 if (ret != 0) {
1635                         DEBUG(DEBUG_ERR,
1636                               ("Failed to set banning credits for node %u\n",
1637                                max_pnn));
1638                 }
1639         }
1640 }
1641
1642 /*
1643  * Recalculate the allocation of public IPs to nodes and have the
1644  * nodes host their allocated addresses.
1645  *
1646  * - Allocate memory for IP allocation state, including per node
1647  *   arrays
1648  * - Populate IP allocation algorithm in IP allocation state
1649  * - Populate local value of tunable NoIPFailback in IP allocation
1650      state - this is really a cluster-wide configuration variable and
1651      only the value form the master node is used
1652  * - Retrieve tunables NoIPTakeover and NoIPHostOnAllDisabled from all
1653  *   connected nodes - this is done separately so tunable values can
1654  *   be faked in unit testing
1655  * - Populate NoIPTakover tunable in IP allocation state
1656  * - Populate NoIPHost in IP allocation state, derived from node flags
1657  *   and NoIPHostOnAllDisabled tunable
1658  * - Retrieve and populate known and available IP lists in IP
1659  *   allocation state
1660  * - If no available IP addresses then early exit
1661  * - Build list of (known IPs, currently assigned node)
1662  * - Populate list of nodes to force rebalance - internal structure,
1663  *   currently no way to fetch, only used by LCP2 for nodes that have
1664  *   had new IP addresses added
1665  * - Run IP allocation algorithm
1666  * - Send RELEASE_IP to all nodes for IPs they should not host
1667  * - Send TAKE_IP to all nodes for IPs they should host
1668  * - Send IPREALLOCATED to all nodes (with backward compatibility hack)
1669  */
1670 int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map_old *nodemap,
1671                       uint32_t *force_rebalance_nodes,
1672                       client_async_callback fail_callback, void *callback_data)
1673 {
1674         int i, ret;
1675         struct ctdb_public_ip ip;
1676         uint32_t *nodes;
1677         struct public_ip_list *all_ips, *tmp_ip;
1678         TDB_DATA data;
1679         struct timeval timeout;
1680         struct client_async_data *async_data;
1681         struct ctdb_client_control_state *state;
1682         TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
1683         struct ipalloc_state *ipalloc_state;
1684         struct takeover_callback_data *takeover_data;
1685         bool can_host_ips;
1686
1687         /* Initialise fail callback data to be used with
1688          * takeover_run_fail_callback().  A failure in any of the
1689          * following steps will cause an early return, so this can be
1690          * reused for each of those steps without re-initialising. */
1691         takeover_data = takeover_callback_data_init(tmp_ctx,
1692                                                     nodemap->num);
1693         if (takeover_data == NULL) {
1694                 talloc_free(tmp_ctx);
1695                 return -1;
1696         }
1697
1698         /*
1699          * ip failover is completely disabled, just send out the 
1700          * ipreallocated event.
1701          */
1702         if (ctdb->tunable.disable_ip_failover != 0) {
1703                 goto ipreallocated;
1704         }
1705
1706         ipalloc_state = ipalloc_state_init(ctdb, tmp_ctx);
1707         if (ipalloc_state == NULL) {
1708                 talloc_free(tmp_ctx);
1709                 return -1;
1710         }
1711
1712         if (!set_ipflags(ctdb, ipalloc_state, nodemap)) {
1713                 DEBUG(DEBUG_ERR,("Failed to set IP flags - aborting takeover run\n"));
1714                 talloc_free(tmp_ctx);
1715                 return -1;
1716         }
1717
1718         /* Fetch known/available public IPs from each active node */
1719         ret = ctdb_reload_remote_public_ips(ctdb, ipalloc_state, nodemap);
1720         if (ret != 0) {
1721                 talloc_free(tmp_ctx);
1722                 return -1;
1723         }
1724
1725         /* Short-circuit IP allocation if no node has available IPs */
1726         can_host_ips = false;
1727         for (i=0; i < ipalloc_state->num; i++) {
1728                 if (ipalloc_state->available_public_ips[i].num != 0) {
1729                         can_host_ips = true;
1730                 }
1731         }
1732         if (!can_host_ips) {
1733                 DEBUG(DEBUG_WARNING,("No nodes available to host public IPs yet\n"));
1734                 goto ipreallocated;
1735         }
1736
1737         /* since nodes only know about those public addresses that
1738            can be served by that particular node, no single node has
1739            a full list of all public addresses that exist in the cluster.
1740            Walk over all node structures and create a merged list of
1741            all public addresses that exist in the cluster.
1742
1743            keep the tree of ips around as ctdb->ip_tree
1744         */
1745         all_ips = create_merged_ip_list(ctdb, ipalloc_state);
1746         ipalloc_state->all_ips = all_ips;
1747
1748         ipalloc_state->force_rebalance_nodes = force_rebalance_nodes;
1749
1750         /* Do the IP reassignment calculations */
1751         ipalloc(ipalloc_state);
1752
1753         /* Now tell all nodes to release any public IPs should not
1754          * host.  This will be a NOOP on nodes that don't currently
1755          * hold the given IP.
1756          */
1757         async_data = talloc_zero(tmp_ctx, struct client_async_data);
1758         CTDB_NO_MEMORY_FATAL(ctdb, async_data);
1759
1760         async_data->fail_callback = takeover_run_fail_callback;
1761         async_data->callback_data = takeover_data;
1762
1763         ZERO_STRUCT(ip); /* Avoid valgrind warnings for union */
1764
1765         /* Send a RELEASE_IP to all nodes that should not be hosting
1766          * each IP.  For each IP, all but one of these will be
1767          * redundant.  However, the redundant ones are used to tell
1768          * nodes which node should be hosting the IP so that commands
1769          * like "ctdb ip" can display a particular nodes idea of who
1770          * is hosting what. */
1771         for (i=0;i<nodemap->num;i++) {
1772                 /* don't talk to unconnected nodes, but do talk to banned nodes */
1773                 if (nodemap->nodes[i].flags & NODE_FLAGS_DISCONNECTED) {
1774                         continue;
1775                 }
1776
1777                 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1778                         if (tmp_ip->pnn == nodemap->nodes[i].pnn) {
1779                                 /* This node should be serving this
1780                                    vnn so don't tell it to release the ip
1781                                 */
1782                                 continue;
1783                         }
1784                         ip.pnn  = tmp_ip->pnn;
1785                         ip.addr = tmp_ip->addr;
1786
1787                         timeout = TAKEOVER_TIMEOUT();
1788                         data.dsize = sizeof(ip);
1789                         data.dptr  = (uint8_t *)&ip;
1790                         state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
1791                                                   0, CTDB_CONTROL_RELEASE_IP, 0,
1792                                                   data, async_data,
1793                                                   &timeout, NULL);
1794                         if (state == NULL) {
1795                                 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_RELEASE_IP to node %u\n", nodemap->nodes[i].pnn));
1796                                 talloc_free(tmp_ctx);
1797                                 return -1;
1798                         }
1799
1800                         ctdb_client_async_add(async_data, state);
1801                 }
1802         }
1803         if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1804                 DEBUG(DEBUG_ERR,
1805                       ("Async control CTDB_CONTROL_RELEASE_IP failed\n"));
1806                 goto fail;
1807         }
1808         talloc_free(async_data);
1809
1810
1811         /* For each IP, send a TAKOVER_IP to the node that should be
1812          * hosting it.  Many of these will often be redundant (since
1813          * the allocation won't have changed) but they can be useful
1814          * to recover from inconsistencies. */
1815         async_data = talloc_zero(tmp_ctx, struct client_async_data);
1816         CTDB_NO_MEMORY_FATAL(ctdb, async_data);
1817
1818         async_data->fail_callback = takeover_run_fail_callback;
1819         async_data->callback_data = takeover_data;
1820
1821         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1822                 if (tmp_ip->pnn == -1) {
1823                         /* this IP won't be taken over */
1824                         continue;
1825                 }
1826
1827                 ip.pnn  = tmp_ip->pnn;
1828                 ip.addr = tmp_ip->addr;
1829
1830                 timeout = TAKEOVER_TIMEOUT();
1831                 data.dsize = sizeof(ip);
1832                 data.dptr  = (uint8_t *)&ip;
1833                 state = ctdb_control_send(ctdb, tmp_ip->pnn,
1834                                           0, CTDB_CONTROL_TAKEOVER_IP, 0,
1835                                           data, async_data, &timeout, NULL);
1836                 if (state == NULL) {
1837                         DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_TAKEOVER_IP to node %u\n", tmp_ip->pnn));
1838                         talloc_free(tmp_ctx);
1839                         return -1;
1840                 }
1841
1842                 ctdb_client_async_add(async_data, state);
1843         }
1844         if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1845                 DEBUG(DEBUG_ERR,
1846                       ("Async control CTDB_CONTROL_TAKEOVER_IP failed\n"));
1847                 goto fail;
1848         }
1849
1850 ipreallocated:
1851         /*
1852          * Tell all nodes to run eventscripts to process the
1853          * "ipreallocated" event.  This can do a lot of things,
1854          * including restarting services to reconfigure them if public
1855          * IPs have moved.  Once upon a time this event only used to
1856          * update natgw.
1857          */
1858         nodes = list_of_connected_nodes(ctdb, nodemap, tmp_ctx, true);
1859         ret = ctdb_client_async_control(ctdb, CTDB_CONTROL_IPREALLOCATED,
1860                                         nodes, 0, TAKEOVER_TIMEOUT(),
1861                                         false, tdb_null,
1862                                         NULL, takeover_run_fail_callback,
1863                                         takeover_data);
1864         if (ret != 0) {
1865                 DEBUG(DEBUG_ERR,
1866                       ("Async CTDB_CONTROL_IPREALLOCATED control failed\n"));
1867                 goto fail;
1868         }
1869
1870         talloc_free(tmp_ctx);
1871         return ret;
1872
1873 fail:
1874         takeover_run_process_failures(ctdb, takeover_data);
1875         talloc_free(tmp_ctx);
1876         return -1;
1877 }
1878
1879
1880 /*
1881   destroy a ctdb_client_ip structure
1882  */
1883 static int ctdb_client_ip_destructor(struct ctdb_client_ip *ip)
1884 {
1885         DEBUG(DEBUG_DEBUG,("destroying client tcp for %s:%u (client_id %u)\n",
1886                 ctdb_addr_to_str(&ip->addr),
1887                 ntohs(ip->addr.ip.sin_port),
1888                 ip->client_id));
1889
1890         DLIST_REMOVE(ip->ctdb->client_ip_list, ip);
1891         return 0;
1892 }
1893
1894 /*
1895   called by a client to inform us of a TCP connection that it is managing
1896   that should tickled with an ACK when IP takeover is done
1897  */
1898 int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id,
1899                                 TDB_DATA indata)
1900 {
1901         struct ctdb_client *client = reqid_find(ctdb->idr, client_id, struct ctdb_client);
1902         struct ctdb_connection *tcp_sock = NULL;
1903         struct ctdb_tcp_list *tcp;
1904         struct ctdb_connection t;
1905         int ret;
1906         TDB_DATA data;
1907         struct ctdb_client_ip *ip;
1908         struct ctdb_vnn *vnn;
1909         ctdb_sock_addr addr;
1910
1911         /* If we don't have public IPs, tickles are useless */
1912         if (ctdb->vnn == NULL) {
1913                 return 0;
1914         }
1915
1916         tcp_sock = (struct ctdb_connection *)indata.dptr;
1917
1918         addr = tcp_sock->src;
1919         ctdb_canonicalize_ip(&addr,  &tcp_sock->src);
1920         addr = tcp_sock->dst;
1921         ctdb_canonicalize_ip(&addr, &tcp_sock->dst);
1922
1923         ZERO_STRUCT(addr);
1924         memcpy(&addr, &tcp_sock->dst, sizeof(addr));
1925         vnn = find_public_ip_vnn(ctdb, &addr);
1926         if (vnn == NULL) {
1927                 switch (addr.sa.sa_family) {
1928                 case AF_INET:
1929                         if (ntohl(addr.ip.sin_addr.s_addr) != INADDR_LOOPBACK) {
1930                                 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public address.\n", 
1931                                         ctdb_addr_to_str(&addr)));
1932                         }
1933                         break;
1934                 case AF_INET6:
1935                         DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public ipv6 address.\n", 
1936                                 ctdb_addr_to_str(&addr)));
1937                         break;
1938                 default:
1939                         DEBUG(DEBUG_ERR,(__location__ " Unknown family type %d\n", addr.sa.sa_family));
1940                 }
1941
1942                 return 0;
1943         }
1944
1945         if (vnn->pnn != ctdb->pnn) {
1946                 DEBUG(DEBUG_ERR,("Attempt to register tcp client for IP %s we don't hold - failing (client_id %u pid %u)\n",
1947                         ctdb_addr_to_str(&addr),
1948                         client_id, client->pid));
1949                 /* failing this call will tell smbd to die */
1950                 return -1;
1951         }
1952
1953         ip = talloc(client, struct ctdb_client_ip);
1954         CTDB_NO_MEMORY(ctdb, ip);
1955
1956         ip->ctdb      = ctdb;
1957         ip->addr      = addr;
1958         ip->client_id = client_id;
1959         talloc_set_destructor(ip, ctdb_client_ip_destructor);
1960         DLIST_ADD(ctdb->client_ip_list, ip);
1961
1962         tcp = talloc(client, struct ctdb_tcp_list);
1963         CTDB_NO_MEMORY(ctdb, tcp);
1964
1965         tcp->connection.src = tcp_sock->src;
1966         tcp->connection.dst = tcp_sock->dst;
1967
1968         DLIST_ADD(client->tcp_list, tcp);
1969
1970         t.src = tcp_sock->src;
1971         t.dst = tcp_sock->dst;
1972
1973         data.dptr = (uint8_t *)&t;
1974         data.dsize = sizeof(t);
1975
1976         switch (addr.sa.sa_family) {
1977         case AF_INET:
1978                 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1979                         (unsigned)ntohs(tcp_sock->dst.ip.sin_port),
1980                         ctdb_addr_to_str(&tcp_sock->src),
1981                         (unsigned)ntohs(tcp_sock->src.ip.sin_port), client_id, client->pid));
1982                 break;
1983         case AF_INET6:
1984                 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1985                         (unsigned)ntohs(tcp_sock->dst.ip6.sin6_port),
1986                         ctdb_addr_to_str(&tcp_sock->src),
1987                         (unsigned)ntohs(tcp_sock->src.ip6.sin6_port), client_id, client->pid));
1988                 break;
1989         default:
1990                 DEBUG(DEBUG_ERR,(__location__ " Unknown family %d\n", addr.sa.sa_family));
1991         }
1992
1993
1994         /* tell all nodes about this tcp connection */
1995         ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0, 
1996                                        CTDB_CONTROL_TCP_ADD,
1997                                        0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
1998         if (ret != 0) {
1999                 DEBUG(DEBUG_ERR,(__location__ " Failed to send CTDB_CONTROL_TCP_ADD\n"));
2000                 return -1;
2001         }
2002
2003         return 0;
2004 }
2005
2006 /*
2007   find a tcp address on a list
2008  */
2009 static struct ctdb_connection *ctdb_tcp_find(struct ctdb_tcp_array *array,
2010                                            struct ctdb_connection *tcp)
2011 {
2012         int i;
2013
2014         if (array == NULL) {
2015                 return NULL;
2016         }
2017
2018         for (i=0;i<array->num;i++) {
2019                 if (ctdb_same_sockaddr(&array->connections[i].src, &tcp->src) &&
2020                     ctdb_same_sockaddr(&array->connections[i].dst, &tcp->dst)) {
2021                         return &array->connections[i];
2022                 }
2023         }
2024         return NULL;
2025 }
2026
2027
2028
2029 /*
2030   called by a daemon to inform us of a TCP connection that one of its
2031   clients managing that should tickled with an ACK when IP takeover is
2032   done
2033  */
2034 int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata, bool tcp_update_needed)
2035 {
2036         struct ctdb_connection *p = (struct ctdb_connection *)indata.dptr;
2037         struct ctdb_tcp_array *tcparray;
2038         struct ctdb_connection tcp;
2039         struct ctdb_vnn *vnn;
2040
2041         /* If we don't have public IPs, tickles are useless */
2042         if (ctdb->vnn == NULL) {
2043                 return 0;
2044         }
2045
2046         vnn = find_public_ip_vnn(ctdb, &p->dst);
2047         if (vnn == NULL) {
2048                 DEBUG(DEBUG_INFO,(__location__ " got TCP_ADD control for an address which is not a public address '%s'\n",
2049                         ctdb_addr_to_str(&p->dst)));
2050
2051                 return -1;
2052         }
2053
2054
2055         tcparray = vnn->tcp_array;
2056
2057         /* If this is the first tickle */
2058         if (tcparray == NULL) {
2059                 tcparray = talloc(vnn, struct ctdb_tcp_array);
2060                 CTDB_NO_MEMORY(ctdb, tcparray);
2061                 vnn->tcp_array = tcparray;
2062
2063                 tcparray->num = 0;
2064                 tcparray->connections = talloc_size(tcparray, sizeof(struct ctdb_connection));
2065                 CTDB_NO_MEMORY(ctdb, tcparray->connections);
2066
2067                 tcparray->connections[tcparray->num].src = p->src;
2068                 tcparray->connections[tcparray->num].dst = p->dst;
2069                 tcparray->num++;
2070
2071                 if (tcp_update_needed) {
2072                         vnn->tcp_update_needed = true;
2073                 }
2074                 return 0;
2075         }
2076
2077
2078         /* Do we already have this tickle ?*/
2079         tcp.src = p->src;
2080         tcp.dst = p->dst;
2081         if (ctdb_tcp_find(tcparray, &tcp) != NULL) {
2082                 DEBUG(DEBUG_DEBUG,("Already had tickle info for %s:%u for vnn:%u\n",
2083                         ctdb_addr_to_str(&tcp.dst),
2084                         ntohs(tcp.dst.ip.sin_port),
2085                         vnn->pnn));
2086                 return 0;
2087         }
2088
2089         /* A new tickle, we must add it to the array */
2090         tcparray->connections = talloc_realloc(tcparray, tcparray->connections,
2091                                         struct ctdb_connection,
2092                                         tcparray->num+1);
2093         CTDB_NO_MEMORY(ctdb, tcparray->connections);
2094
2095         tcparray->connections[tcparray->num].src = p->src;
2096         tcparray->connections[tcparray->num].dst = p->dst;
2097         tcparray->num++;
2098
2099         DEBUG(DEBUG_INFO,("Added tickle info for %s:%u from vnn %u\n",
2100                 ctdb_addr_to_str(&tcp.dst),
2101                 ntohs(tcp.dst.ip.sin_port),
2102                 vnn->pnn));
2103
2104         if (tcp_update_needed) {
2105                 vnn->tcp_update_needed = true;
2106         }
2107
2108         return 0;
2109 }
2110
2111
2112 static void ctdb_remove_connection(struct ctdb_vnn *vnn, struct ctdb_connection *conn)
2113 {
2114         struct ctdb_connection *tcpp;
2115
2116         if (vnn == NULL) {
2117                 return;
2118         }
2119
2120         /* if the array is empty we cant remove it
2121            and we don't need to do anything
2122          */
2123         if (vnn->tcp_array == NULL) {
2124                 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist (array is empty) %s:%u\n",
2125                         ctdb_addr_to_str(&conn->dst),
2126                         ntohs(conn->dst.ip.sin_port)));
2127                 return;
2128         }
2129
2130
2131         /* See if we know this connection
2132            if we don't know this connection  then we dont need to do anything
2133          */
2134         tcpp = ctdb_tcp_find(vnn->tcp_array, conn);
2135         if (tcpp == NULL) {
2136                 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist %s:%u\n",
2137                         ctdb_addr_to_str(&conn->dst),
2138                         ntohs(conn->dst.ip.sin_port)));
2139                 return;
2140         }
2141
2142
2143         /* We need to remove this entry from the array.
2144            Instead of allocating a new array and copying data to it
2145            we cheat and just copy the last entry in the existing array
2146            to the entry that is to be removed and just shring the 
2147            ->num field
2148          */
2149         *tcpp = vnn->tcp_array->connections[vnn->tcp_array->num - 1];
2150         vnn->tcp_array->num--;
2151
2152         /* If we deleted the last entry we also need to remove the entire array
2153          */
2154         if (vnn->tcp_array->num == 0) {
2155                 talloc_free(vnn->tcp_array);
2156                 vnn->tcp_array = NULL;
2157         }               
2158
2159         vnn->tcp_update_needed = true;
2160
2161         DEBUG(DEBUG_INFO,("Removed tickle info for %s:%u\n",
2162                 ctdb_addr_to_str(&conn->src),
2163                 ntohs(conn->src.ip.sin_port)));
2164 }
2165
2166
2167 /*
2168   called by a daemon to inform us of a TCP connection that one of its
2169   clients used are no longer needed in the tickle database
2170  */
2171 int32_t ctdb_control_tcp_remove(struct ctdb_context *ctdb, TDB_DATA indata)
2172 {
2173         struct ctdb_vnn *vnn;
2174         struct ctdb_connection *conn = (struct ctdb_connection *)indata.dptr;
2175
2176         /* If we don't have public IPs, tickles are useless */
2177         if (ctdb->vnn == NULL) {
2178                 return 0;
2179         }
2180
2181         vnn = find_public_ip_vnn(ctdb, &conn->dst);
2182         if (vnn == NULL) {
2183                 DEBUG(DEBUG_ERR,
2184                       (__location__ " unable to find public address %s\n",
2185                        ctdb_addr_to_str(&conn->dst)));
2186                 return 0;
2187         }
2188
2189         ctdb_remove_connection(vnn, conn);
2190
2191         return 0;
2192 }
2193
2194
2195 /*
2196   Called when another daemon starts - causes all tickles for all
2197   public addresses we are serving to be sent to the new node on the
2198   next check.  This actually causes the next scheduled call to
2199   tdb_update_tcp_tickles() to update all nodes.  This is simple and
2200   doesn't require careful error handling.
2201  */
2202 int32_t ctdb_control_startup(struct ctdb_context *ctdb, uint32_t pnn)
2203 {
2204         struct ctdb_vnn *vnn;
2205
2206         DEBUG(DEBUG_INFO, ("Received startup control from node %lu\n",
2207                            (unsigned long) pnn));
2208
2209         for (vnn = ctdb->vnn; vnn != NULL; vnn = vnn->next) {
2210                 vnn->tcp_update_needed = true;
2211         }
2212
2213         return 0;
2214 }
2215
2216
2217 /*
2218   called when a client structure goes away - hook to remove
2219   elements from the tcp_list in all daemons
2220  */
2221 void ctdb_takeover_client_destructor_hook(struct ctdb_client *client)
2222 {
2223         while (client->tcp_list) {
2224                 struct ctdb_vnn *vnn;
2225                 struct ctdb_tcp_list *tcp = client->tcp_list;
2226                 struct ctdb_connection *conn = &tcp->connection;
2227
2228                 DLIST_REMOVE(client->tcp_list, tcp);
2229
2230                 vnn = find_public_ip_vnn(client->ctdb,
2231                                          &conn->dst);
2232                 if (vnn == NULL) {
2233                         DEBUG(DEBUG_ERR,
2234                               (__location__ " unable to find public address %s\n",
2235                                ctdb_addr_to_str(&conn->dst)));
2236                         continue;
2237                 }
2238
2239                 /* If the IP address is hosted on this node then
2240                  * remove the connection. */
2241                 if (vnn->pnn == client->ctdb->pnn) {
2242                         ctdb_remove_connection(vnn, conn);
2243                 }
2244
2245                 /* Otherwise this function has been called because the
2246                  * server IP address has been released to another node
2247                  * and the client has exited.  This means that we
2248                  * should not delete the connection information.  The
2249                  * takeover node processes connections too. */
2250         }
2251 }
2252
2253
2254 void ctdb_release_all_ips(struct ctdb_context *ctdb)
2255 {
2256         struct ctdb_vnn *vnn;
2257         int count = 0;
2258         TDB_DATA data;
2259
2260         if (ctdb->tunable.disable_ip_failover == 1) {
2261                 return;
2262         }
2263
2264         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2265                 if (!ctdb_sys_have_ip(&vnn->public_address)) {
2266                         ctdb_vnn_unassign_iface(ctdb, vnn);
2267                         continue;
2268                 }
2269                 if (!vnn->iface) {
2270                         continue;
2271                 }
2272
2273                 /* Don't allow multiple releases at once.  Some code,
2274                  * particularly ctdb_tickle_sentenced_connections() is
2275                  * not re-entrant */
2276                 if (vnn->update_in_flight) {
2277                         DEBUG(DEBUG_WARNING,
2278                               (__location__
2279                                " Not releasing IP %s/%u on interface %s, an update is already in progess\n",
2280                                     ctdb_addr_to_str(&vnn->public_address),
2281                                     vnn->public_netmask_bits,
2282                                     ctdb_vnn_iface_string(vnn)));
2283                         continue;
2284                 }
2285                 vnn->update_in_flight = true;
2286
2287                 DEBUG(DEBUG_INFO,("Release of IP %s/%u on interface %s node:-1\n",
2288                                     ctdb_addr_to_str(&vnn->public_address),
2289                                     vnn->public_netmask_bits,
2290                                     ctdb_vnn_iface_string(vnn)));
2291
2292                 ctdb_event_script_args(ctdb, CTDB_EVENT_RELEASE_IP, "%s %s %u",
2293                                   ctdb_vnn_iface_string(vnn),
2294                                   ctdb_addr_to_str(&vnn->public_address),
2295                                   vnn->public_netmask_bits);
2296
2297                 data.dptr = (uint8_t *)talloc_strdup(
2298                                 vnn, ctdb_addr_to_str(&vnn->public_address));
2299                 if (data.dptr != NULL) {
2300                         data.dsize = strlen((char *)data.dptr) + 1;
2301                         ctdb_daemon_send_message(ctdb, ctdb->pnn,
2302                                                  CTDB_SRVID_RELEASE_IP, data);
2303                         talloc_free(data.dptr);
2304                 }
2305
2306                 ctdb_vnn_unassign_iface(ctdb, vnn);
2307                 vnn->update_in_flight = false;
2308                 count++;
2309         }
2310
2311         DEBUG(DEBUG_NOTICE,(__location__ " Released %d public IPs\n", count));
2312 }
2313
2314
2315 /*
2316   get list of public IPs
2317  */
2318 int32_t ctdb_control_get_public_ips(struct ctdb_context *ctdb, 
2319                                     struct ctdb_req_control_old *c, TDB_DATA *outdata)
2320 {
2321         int i, num, len;
2322         struct ctdb_public_ip_list_old *ips;
2323         struct ctdb_vnn *vnn;
2324         bool only_available = false;
2325
2326         if (c->flags & CTDB_PUBLIC_IP_FLAGS_ONLY_AVAILABLE) {
2327                 only_available = true;
2328         }
2329
2330         /* count how many public ip structures we have */
2331         num = 0;
2332         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2333                 num++;
2334         }
2335
2336         len = offsetof(struct ctdb_public_ip_list_old, ips) +
2337                 num*sizeof(struct ctdb_public_ip);
2338         ips = talloc_zero_size(outdata, len);
2339         CTDB_NO_MEMORY(ctdb, ips);
2340
2341         i = 0;
2342         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2343                 if (only_available && !ctdb_vnn_available(ctdb, vnn)) {
2344                         continue;
2345                 }
2346                 ips->ips[i].pnn  = vnn->pnn;
2347                 ips->ips[i].addr = vnn->public_address;
2348                 i++;
2349         }
2350         ips->num = i;
2351         len = offsetof(struct ctdb_public_ip_list_old, ips) +
2352                 i*sizeof(struct ctdb_public_ip);
2353
2354         outdata->dsize = len;
2355         outdata->dptr  = (uint8_t *)ips;
2356
2357         return 0;
2358 }
2359
2360
2361 int32_t ctdb_control_get_public_ip_info(struct ctdb_context *ctdb,
2362                                         struct ctdb_req_control_old *c,
2363                                         TDB_DATA indata,
2364                                         TDB_DATA *outdata)
2365 {
2366         int i, num, len;
2367         ctdb_sock_addr *addr;
2368         struct ctdb_public_ip_info_old *info;
2369         struct ctdb_vnn *vnn;
2370
2371         addr = (ctdb_sock_addr *)indata.dptr;
2372
2373         vnn = find_public_ip_vnn(ctdb, addr);
2374         if (vnn == NULL) {
2375                 DEBUG(DEBUG_ERR,(__location__ " Could not get public ip info, "
2376                                  "'%s'not a public address\n",
2377                                  ctdb_addr_to_str(addr)));
2378                 return -1;
2379         }
2380
2381         /* count how many public ip structures we have */
2382         num = 0;
2383         for (;vnn->ifaces[num];) {
2384                 num++;
2385         }
2386
2387         len = offsetof(struct ctdb_public_ip_info_old, ifaces) +
2388                 num*sizeof(struct ctdb_iface);
2389         info = talloc_zero_size(outdata, len);
2390         CTDB_NO_MEMORY(ctdb, info);
2391
2392         info->ip.addr = vnn->public_address;
2393         info->ip.pnn = vnn->pnn;
2394         info->active_idx = 0xFFFFFFFF;
2395
2396         for (i=0; vnn->ifaces[i]; i++) {
2397                 struct ctdb_interface *cur;
2398
2399                 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
2400                 if (cur == NULL) {
2401                         DEBUG(DEBUG_CRIT, (__location__ " internal error iface[%s] unknown\n",
2402                                            vnn->ifaces[i]));
2403                         return -1;
2404                 }
2405                 if (vnn->iface == cur) {
2406                         info->active_idx = i;
2407                 }
2408                 strncpy(info->ifaces[i].name, cur->name,
2409                         sizeof(info->ifaces[i].name));
2410                 info->ifaces[i].name[sizeof(info->ifaces[i].name)-1] = '\0';
2411                 info->ifaces[i].link_state = cur->link_up;
2412                 info->ifaces[i].references = cur->references;
2413         }
2414         info->num = i;
2415         len = offsetof(struct ctdb_public_ip_info_old, ifaces) +
2416                 i*sizeof(struct ctdb_iface);
2417
2418         outdata->dsize = len;
2419         outdata->dptr  = (uint8_t *)info;
2420
2421         return 0;
2422 }
2423
2424 int32_t ctdb_control_get_ifaces(struct ctdb_context *ctdb,
2425                                 struct ctdb_req_control_old *c,
2426                                 TDB_DATA *outdata)
2427 {
2428         int i, num, len;
2429         struct ctdb_iface_list_old *ifaces;
2430         struct ctdb_interface *cur;
2431
2432         /* count how many public ip structures we have */
2433         num = 0;
2434         for (cur=ctdb->ifaces;cur;cur=cur->next) {
2435                 num++;
2436         }
2437
2438         len = offsetof(struct ctdb_iface_list_old, ifaces) +
2439                 num*sizeof(struct ctdb_iface);
2440         ifaces = talloc_zero_size(outdata, len);
2441         CTDB_NO_MEMORY(ctdb, ifaces);
2442
2443         i = 0;
2444         for (cur=ctdb->ifaces;cur;cur=cur->next) {
2445                 strncpy(ifaces->ifaces[i].name, cur->name,
2446                         sizeof(ifaces->ifaces[i].name));
2447                 ifaces->ifaces[i].name[sizeof(ifaces->ifaces[i].name)-1] = '\0';
2448                 ifaces->ifaces[i].link_state = cur->link_up;
2449                 ifaces->ifaces[i].references = cur->references;
2450                 i++;
2451         }
2452         ifaces->num = i;
2453         len = offsetof(struct ctdb_iface_list_old, ifaces) +
2454                 i*sizeof(struct ctdb_iface);
2455
2456         outdata->dsize = len;
2457         outdata->dptr  = (uint8_t *)ifaces;
2458
2459         return 0;
2460 }
2461
2462 int32_t ctdb_control_set_iface_link(struct ctdb_context *ctdb,
2463                                     struct ctdb_req_control_old *c,
2464                                     TDB_DATA indata)
2465 {
2466         struct ctdb_iface *info;
2467         struct ctdb_interface *iface;
2468         bool link_up = false;
2469
2470         info = (struct ctdb_iface *)indata.dptr;
2471
2472         if (info->name[CTDB_IFACE_SIZE] != '\0') {
2473                 int len = strnlen(info->name, CTDB_IFACE_SIZE);
2474                 DEBUG(DEBUG_ERR, (__location__ " name[%*.*s] not terminated\n",
2475                                   len, len, info->name));
2476                 return -1;
2477         }
2478
2479         switch (info->link_state) {
2480         case 0:
2481                 link_up = false;
2482                 break;
2483         case 1:
2484                 link_up = true;
2485                 break;
2486         default:
2487                 DEBUG(DEBUG_ERR, (__location__ " link_state[%u] invalid\n",
2488                                   (unsigned int)info->link_state));
2489                 return -1;
2490         }
2491
2492         if (info->references != 0) {
2493                 DEBUG(DEBUG_ERR, (__location__ " references[%u] should be 0\n",
2494                                   (unsigned int)info->references));
2495                 return -1;
2496         }
2497
2498         iface = ctdb_find_iface(ctdb, info->name);
2499         if (iface == NULL) {
2500                 return -1;
2501         }
2502
2503         if (link_up == iface->link_up) {
2504                 return 0;
2505         }
2506
2507         DEBUG(iface->link_up?DEBUG_ERR:DEBUG_NOTICE,
2508               ("iface[%s] has changed it's link status %s => %s\n",
2509                iface->name,
2510                iface->link_up?"up":"down",
2511                link_up?"up":"down"));
2512
2513         iface->link_up = link_up;
2514         return 0;
2515 }
2516
2517
2518 /*
2519   called by a daemon to inform us of the entire list of TCP tickles for
2520   a particular public address.
2521   this control should only be sent by the node that is currently serving
2522   that public address.
2523  */
2524 int32_t ctdb_control_set_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata)
2525 {
2526         struct ctdb_tickle_list_old *list = (struct ctdb_tickle_list_old *)indata.dptr;
2527         struct ctdb_tcp_array *tcparray;
2528         struct ctdb_vnn *vnn;
2529
2530         /* We must at least have tickles.num or else we cant verify the size
2531            of the received data blob
2532          */
2533         if (indata.dsize < offsetof(struct ctdb_tickle_list_old, connections)) {
2534                 DEBUG(DEBUG_ERR,("Bad indata in ctdb_tickle_list. Not enough data for the tickle.num field\n"));
2535                 return -1;
2536         }
2537
2538         /* verify that the size of data matches what we expect */
2539         if (indata.dsize < offsetof(struct ctdb_tickle_list_old, connections)
2540                          + sizeof(struct ctdb_connection) * list->num) {
2541                 DEBUG(DEBUG_ERR,("Bad indata in ctdb_tickle_list\n"));
2542                 return -1;
2543         }
2544
2545         DEBUG(DEBUG_INFO, ("Received tickle update for public address %s\n",
2546                            ctdb_addr_to_str(&list->addr)));
2547
2548         vnn = find_public_ip_vnn(ctdb, &list->addr);
2549         if (vnn == NULL) {
2550                 DEBUG(DEBUG_INFO,(__location__ " Could not set tcp tickle list, '%s' is not a public address\n",
2551                         ctdb_addr_to_str(&list->addr)));
2552
2553                 return 1;
2554         }
2555
2556         if (vnn->pnn == ctdb->pnn) {
2557                 DEBUG(DEBUG_INFO,
2558                       ("Ignoring redundant set tcp tickle list, this node hosts '%s'\n",
2559                        ctdb_addr_to_str(&list->addr)));
2560                 return 0;
2561         }
2562
2563         /* remove any old ticklelist we might have */
2564         talloc_free(vnn->tcp_array);
2565         vnn->tcp_array = NULL;
2566
2567         tcparray = talloc(vnn, struct ctdb_tcp_array);
2568         CTDB_NO_MEMORY(ctdb, tcparray);
2569
2570         tcparray->num = list->num;
2571
2572         tcparray->connections = talloc_array(tcparray, struct ctdb_connection, tcparray->num);
2573         CTDB_NO_MEMORY(ctdb, tcparray->connections);
2574
2575         memcpy(tcparray->connections, &list->connections[0],
2576                sizeof(struct ctdb_connection)*tcparray->num);
2577
2578         /* We now have a new fresh tickle list array for this vnn */
2579         vnn->tcp_array = tcparray;
2580
2581         return 0;
2582 }
2583
2584 /*
2585   called to return the full list of tickles for the puclic address associated 
2586   with the provided vnn
2587  */
2588 int32_t ctdb_control_get_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata)
2589 {
2590         ctdb_sock_addr *addr = (ctdb_sock_addr *)indata.dptr;
2591         struct ctdb_tickle_list_old *list;
2592         struct ctdb_tcp_array *tcparray;
2593         int num, i;
2594         struct ctdb_vnn *vnn;
2595         unsigned port;
2596
2597         vnn = find_public_ip_vnn(ctdb, addr);
2598         if (vnn == NULL) {
2599                 DEBUG(DEBUG_ERR,(__location__ " Could not get tcp tickle list, '%s' is not a public address\n",
2600                         ctdb_addr_to_str(addr)));
2601
2602                 return 1;
2603         }
2604
2605         port = ctdb_addr_to_port(addr);
2606
2607         tcparray = vnn->tcp_array;
2608         num = 0;
2609         if (tcparray != NULL) {
2610                 if (port == 0) {
2611                         /* All connections */
2612                         num = tcparray->num;
2613                 } else {
2614                         /* Count connections for port */
2615                         for (i = 0; i < tcparray->num; i++) {
2616                                 if (port == ctdb_addr_to_port(&tcparray->connections[i].dst)) {
2617                                         num++;
2618                                 }
2619                         }
2620                 }
2621         }
2622
2623         outdata->dsize = offsetof(struct ctdb_tickle_list_old, connections)
2624                         + sizeof(struct ctdb_connection) * num;
2625
2626         outdata->dptr  = talloc_size(outdata, outdata->dsize);
2627         CTDB_NO_MEMORY(ctdb, outdata->dptr);
2628         list = (struct ctdb_tickle_list_old *)outdata->dptr;
2629
2630         list->addr = *addr;
2631         list->num = num;
2632
2633         if (num == 0) {
2634                 return 0;
2635         }
2636
2637         num = 0;
2638         for (i = 0; i < tcparray->num; i++) {
2639                 if (port == 0 || \
2640                     port == ctdb_addr_to_port(&tcparray->connections[i].dst)) {
2641                         list->connections[num] = tcparray->connections[i];
2642                         num++;
2643                 }
2644         }
2645
2646         return 0;
2647 }
2648
2649
2650 /*
2651   set the list of all tcp tickles for a public address
2652  */
2653 static int ctdb_send_set_tcp_tickles_for_ip(struct ctdb_context *ctdb,
2654                                             ctdb_sock_addr *addr,
2655                                             struct ctdb_tcp_array *tcparray)
2656 {
2657         int ret, num;
2658         TDB_DATA data;
2659         struct ctdb_tickle_list_old *list;
2660
2661         if (tcparray) {
2662                 num = tcparray->num;
2663         } else {
2664                 num = 0;
2665         }
2666
2667         data.dsize = offsetof(struct ctdb_tickle_list_old, connections) +
2668                         sizeof(struct ctdb_connection) * num;
2669         data.dptr = talloc_size(ctdb, data.dsize);
2670         CTDB_NO_MEMORY(ctdb, data.dptr);
2671
2672         list = (struct ctdb_tickle_list_old *)data.dptr;
2673         list->addr = *addr;
2674         list->num = num;
2675         if (tcparray) {
2676                 memcpy(&list->connections[0], tcparray->connections, sizeof(struct ctdb_connection) * num);
2677         }
2678
2679         ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_ALL, 0,
2680                                        CTDB_CONTROL_SET_TCP_TICKLE_LIST,
2681                                        0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
2682         if (ret != 0) {
2683                 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set tcp tickles failed\n"));
2684                 return -1;
2685         }
2686
2687         talloc_free(data.dptr);
2688
2689         return ret;
2690 }
2691
2692
2693 /*
2694   perform tickle updates if required
2695  */
2696 static void ctdb_update_tcp_tickles(struct tevent_context *ev,
2697                                     struct tevent_timer *te,
2698                                     struct timeval t, void *private_data)
2699 {
2700         struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
2701         int ret;
2702         struct ctdb_vnn *vnn;
2703
2704         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2705                 /* we only send out updates for public addresses that 
2706                    we have taken over
2707                  */
2708                 if (ctdb->pnn != vnn->pnn) {
2709                         continue;
2710                 }
2711                 /* We only send out the updates if we need to */
2712                 if (!vnn->tcp_update_needed) {
2713                         continue;
2714                 }
2715                 ret = ctdb_send_set_tcp_tickles_for_ip(ctdb,
2716                                                        &vnn->public_address,
2717                                                        vnn->tcp_array);
2718                 if (ret != 0) {
2719                         DEBUG(DEBUG_ERR,("Failed to send the tickle update for public address %s\n",
2720                                 ctdb_addr_to_str(&vnn->public_address)));
2721                 } else {
2722                         DEBUG(DEBUG_INFO,
2723                               ("Sent tickle update for public address %s\n",
2724                                ctdb_addr_to_str(&vnn->public_address)));
2725                         vnn->tcp_update_needed = false;
2726                 }
2727         }
2728
2729         tevent_add_timer(ctdb->ev, ctdb->tickle_update_context,
2730                          timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
2731                          ctdb_update_tcp_tickles, ctdb);
2732 }
2733
2734 /*
2735   start periodic update of tcp tickles
2736  */
2737 void ctdb_start_tcp_tickle_update(struct ctdb_context *ctdb)
2738 {
2739         ctdb->tickle_update_context = talloc_new(ctdb);
2740
2741         tevent_add_timer(ctdb->ev, ctdb->tickle_update_context,
2742                          timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
2743                          ctdb_update_tcp_tickles, ctdb);
2744 }
2745
2746
2747
2748
2749 struct control_gratious_arp {
2750         struct ctdb_context *ctdb;
2751         ctdb_sock_addr addr;
2752         const char *iface;
2753         int count;
2754 };
2755
2756 /*
2757   send a control_gratuitous arp
2758  */
2759 static void send_gratious_arp(struct tevent_context *ev,
2760                               struct tevent_timer *te,
2761                               struct timeval t, void *private_data)
2762 {
2763         int ret;
2764         struct control_gratious_arp *arp = talloc_get_type(private_data, 
2765                                                         struct control_gratious_arp);
2766
2767         ret = ctdb_sys_send_arp(&arp->addr, arp->iface);
2768         if (ret != 0) {
2769                 DEBUG(DEBUG_ERR,(__location__ " sending of gratious arp on iface '%s' failed (%s)\n",
2770                                  arp->iface, strerror(errno)));
2771         }
2772
2773
2774         arp->count++;
2775         if (arp->count == CTDB_ARP_REPEAT) {
2776                 talloc_free(arp);
2777                 return;
2778         }
2779
2780         tevent_add_timer(arp->ctdb->ev, arp,
2781                          timeval_current_ofs(CTDB_ARP_INTERVAL, 0),
2782                          send_gratious_arp, arp);
2783 }
2784
2785
2786 /*
2787   send a gratious arp 
2788  */
2789 int32_t ctdb_control_send_gratious_arp(struct ctdb_context *ctdb, TDB_DATA indata)
2790 {
2791         struct ctdb_addr_info_old *gratious_arp = (struct ctdb_addr_info_old *)indata.dptr;
2792         struct control_gratious_arp *arp;
2793
2794         /* verify the size of indata */
2795         if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
2796                 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_gratious_arp structure. Got %u require %u bytes\n", 
2797                                  (unsigned)indata.dsize, 
2798                                  (unsigned)offsetof(struct ctdb_addr_info_old, iface)));
2799                 return -1;
2800         }
2801         if (indata.dsize != 
2802                 ( offsetof(struct ctdb_addr_info_old, iface)
2803                 + gratious_arp->len ) ){
2804
2805                 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2806                         "but should be %u bytes\n", 
2807                          (unsigned)indata.dsize, 
2808                          (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+gratious_arp->len)));
2809                 return -1;
2810         }
2811
2812
2813         arp = talloc(ctdb, struct control_gratious_arp);
2814         CTDB_NO_MEMORY(ctdb, arp);
2815
2816         arp->ctdb  = ctdb;
2817         arp->addr   = gratious_arp->addr;
2818         arp->iface = talloc_strdup(arp, gratious_arp->iface);
2819         CTDB_NO_MEMORY(ctdb, arp->iface);
2820         arp->count = 0;
2821
2822         tevent_add_timer(arp->ctdb->ev, arp,
2823                          timeval_zero(), send_gratious_arp, arp);
2824
2825         return 0;
2826 }
2827
2828 int32_t ctdb_control_add_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2829 {
2830         struct ctdb_addr_info_old *pub = (struct ctdb_addr_info_old *)indata.dptr;
2831         int ret;
2832
2833         /* verify the size of indata */
2834         if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
2835                 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_addr_info structure\n"));
2836                 return -1;
2837         }
2838         if (indata.dsize != 
2839                 ( offsetof(struct ctdb_addr_info_old, iface)
2840                 + pub->len ) ){
2841
2842                 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2843                         "but should be %u bytes\n", 
2844                          (unsigned)indata.dsize, 
2845                          (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+pub->len)));
2846                 return -1;
2847         }
2848
2849         DEBUG(DEBUG_NOTICE,("Add IP %s\n", ctdb_addr_to_str(&pub->addr)));
2850
2851         ret = ctdb_add_public_address(ctdb, &pub->addr, pub->mask, &pub->iface[0], true);
2852
2853         if (ret != 0) {
2854                 DEBUG(DEBUG_ERR,(__location__ " Failed to add public address\n"));
2855                 return -1;
2856         }
2857
2858         return 0;
2859 }
2860
2861 struct delete_ip_callback_state {
2862         struct ctdb_req_control_old *c;
2863 };
2864
2865 /*
2866   called when releaseip event finishes for del_public_address
2867  */
2868 static void delete_ip_callback(struct ctdb_context *ctdb,
2869                                int32_t status, TDB_DATA data,
2870                                const char *errormsg,
2871                                void *private_data)
2872 {
2873         struct delete_ip_callback_state *state =
2874                 talloc_get_type(private_data, struct delete_ip_callback_state);
2875
2876         /* If release failed then fail. */
2877         ctdb_request_control_reply(ctdb, state->c, NULL, status, errormsg);
2878         talloc_free(private_data);
2879 }
2880
2881 int32_t ctdb_control_del_public_address(struct ctdb_context *ctdb,
2882                                         struct ctdb_req_control_old *c,
2883                                         TDB_DATA indata, bool *async_reply)
2884 {
2885         struct ctdb_addr_info_old *pub = (struct ctdb_addr_info_old *)indata.dptr;
2886         struct ctdb_vnn *vnn;
2887
2888         /* verify the size of indata */
2889         if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
2890                 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_addr_info structure\n"));
2891                 return -1;
2892         }
2893         if (indata.dsize != 
2894                 ( offsetof(struct ctdb_addr_info_old, iface)
2895                 + pub->len ) ){
2896
2897                 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2898                         "but should be %u bytes\n", 
2899                          (unsigned)indata.dsize, 
2900                          (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+pub->len)));
2901                 return -1;
2902         }
2903
2904         DEBUG(DEBUG_NOTICE,("Delete IP %s\n", ctdb_addr_to_str(&pub->addr)));
2905
2906         /* walk over all public addresses until we find a match */
2907         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2908                 if (ctdb_same_ip(&vnn->public_address, &pub->addr)) {
2909                         if (vnn->pnn == ctdb->pnn) {
2910                                 struct delete_ip_callback_state *state;
2911                                 struct ctdb_public_ip *ip;
2912                                 TDB_DATA data;
2913                                 int ret;
2914
2915                                 vnn->delete_pending = true;
2916
2917                                 state = talloc(ctdb,
2918                                                struct delete_ip_callback_state);
2919                                 CTDB_NO_MEMORY(ctdb, state);
2920                                 state->c = c;
2921
2922                                 ip = talloc(state, struct ctdb_public_ip);
2923                                 if (ip == NULL) {
2924                                         DEBUG(DEBUG_ERR,
2925                                               (__location__ " Out of memory\n"));
2926                                         talloc_free(state);
2927                                         return -1;
2928                                 }
2929                                 ip->pnn = -1;
2930                                 ip->addr = pub->addr;
2931
2932                                 data.dsize = sizeof(struct ctdb_public_ip);
2933                                 data.dptr = (unsigned char *)ip;
2934
2935                                 ret = ctdb_daemon_send_control(ctdb,
2936                                                                ctdb_get_pnn(ctdb),
2937                                                                0,
2938                                                                CTDB_CONTROL_RELEASE_IP,
2939                                                                0, 0,
2940                                                                data,
2941                                                                delete_ip_callback,
2942                                                                state);
2943                                 if (ret == -1) {
2944                                         DEBUG(DEBUG_ERR,
2945                                               (__location__ "Unable to send "
2946                                                "CTDB_CONTROL_RELEASE_IP\n"));
2947                                         talloc_free(state);
2948                                         return -1;
2949                                 }
2950
2951                                 state->c = talloc_steal(state, c);
2952                                 *async_reply = true;
2953                         } else {
2954                                 /* This IP is not hosted on the
2955                                  * current node so just delete it
2956                                  * now. */
2957                                 do_delete_ip(ctdb, vnn);
2958                         }
2959
2960                         return 0;
2961                 }
2962         }
2963
2964         DEBUG(DEBUG_ERR,("Delete IP of unknown public IP address %s\n",
2965                          ctdb_addr_to_str(&pub->addr)));
2966         return -1;
2967 }
2968
2969
2970 struct ipreallocated_callback_state {
2971         struct ctdb_req_control_old *c;
2972 };
2973
2974 static void ctdb_ipreallocated_callback(struct ctdb_context *ctdb,
2975                                         int status, void *p)
2976 {
2977         struct ipreallocated_callback_state *state =
2978                 talloc_get_type(p, struct ipreallocated_callback_state);
2979
2980         if (status != 0) {
2981                 DEBUG(DEBUG_ERR,
2982                       (" \"ipreallocated\" event script failed (status %d)\n",
2983                        status));
2984                 if (status == -ETIME) {
2985                         ctdb_ban_self(ctdb);
2986                 }
2987         }
2988
2989         ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
2990         talloc_free(state);
2991 }
2992
2993 /* A control to run the ipreallocated event */
2994 int32_t ctdb_control_ipreallocated(struct ctdb_context *ctdb,
2995                                    struct ctdb_req_control_old *c,
2996                                    bool *async_reply)
2997 {
2998         int ret;
2999         struct ipreallocated_callback_state *state;
3000
3001         state = talloc(ctdb, struct ipreallocated_callback_state);
3002         CTDB_NO_MEMORY(ctdb, state);
3003
3004         DEBUG(DEBUG_INFO,(__location__ " Running \"ipreallocated\" event\n"));
3005
3006         ret = ctdb_event_script_callback(ctdb, state,
3007                                          ctdb_ipreallocated_callback, state,
3008                                          CTDB_EVENT_IPREALLOCATED,
3009                                          "%s", "");
3010
3011         if (ret != 0) {
3012                 DEBUG(DEBUG_ERR,("Failed to run \"ipreallocated\" event \n"));
3013                 talloc_free(state);
3014                 return -1;
3015         }
3016
3017         /* tell the control that we will be reply asynchronously */
3018         state->c    = talloc_steal(state, c);
3019         *async_reply = true;
3020
3021         return 0;
3022 }
3023
3024
3025 /* This function is called from the recovery daemon to verify that a remote
3026    node has the expected ip allocation.
3027    This is verified against ctdb->ip_tree
3028 */
3029 static int verify_remote_ip_allocation(struct ctdb_context *ctdb,
3030                                        struct ctdb_public_ip_list *ips,
3031                                        uint32_t pnn)
3032 {
3033         struct public_ip_list *tmp_ip;
3034         int i;
3035
3036         if (ctdb->ip_tree == NULL) {
3037                 /* don't know the expected allocation yet, assume remote node
3038                    is correct. */
3039                 return 0;
3040         }
3041
3042         if (ips == NULL) {
3043                 return 0;
3044         }
3045
3046         for (i=0; i<ips->num; i++) {
3047                 tmp_ip = trbt_lookuparray32(ctdb->ip_tree, IP_KEYLEN, ip_key(&ips->ip[i].addr));
3048                 if (tmp_ip == NULL) {
3049                         DEBUG(DEBUG_ERR,("Node %u has new or unknown public IP %s\n", pnn, ctdb_addr_to_str(&ips->ip[i].addr)));
3050                         return -1;
3051                 }
3052
3053                 if (tmp_ip->pnn == -1 || ips->ip[i].pnn == -1) {
3054                         continue;
3055                 }
3056
3057                 if (tmp_ip->pnn != ips->ip[i].pnn) {
3058                         DEBUG(DEBUG_ERR,
3059                               ("Inconsistent IP allocation - node %u thinks %s is held by node %u while it is assigned to node %u\n",
3060                                pnn,
3061                                ctdb_addr_to_str(&ips->ip[i].addr),
3062                                ips->ip[i].pnn, tmp_ip->pnn));
3063                         return -1;
3064                 }
3065         }
3066
3067         return 0;
3068 }
3069
3070 int update_ip_assignment_tree(struct ctdb_context *ctdb, struct ctdb_public_ip *ip)
3071 {
3072         struct public_ip_list *tmp_ip;
3073
3074         /* IP tree is never built if DisableIPFailover is set */
3075         if (ctdb->tunable.disable_ip_failover != 0) {
3076                 return 0;
3077         }
3078
3079         if (ctdb->ip_tree == NULL) {
3080                 DEBUG(DEBUG_ERR,("No ctdb->ip_tree yet. Failed to update ip assignment\n"));
3081                 return -1;
3082         }
3083
3084         tmp_ip = trbt_lookuparray32(ctdb->ip_tree, IP_KEYLEN, ip_key(&ip->addr));
3085         if (tmp_ip == NULL) {
3086                 DEBUG(DEBUG_ERR,(__location__ " Could not find record for address %s, update ip\n", ctdb_addr_to_str(&ip->addr)));
3087                 return -1;
3088         }
3089
3090         DEBUG(DEBUG_NOTICE,("Updated ip assignment tree for ip : %s from node %u to node %u\n", ctdb_addr_to_str(&ip->addr), tmp_ip->pnn, ip->pnn));
3091         tmp_ip->pnn = ip->pnn;
3092
3093         return 0;
3094 }
3095
3096 void clear_ip_assignment_tree(struct ctdb_context *ctdb)
3097 {
3098         TALLOC_FREE(ctdb->ip_tree);
3099 }
3100
3101 struct ctdb_reloadips_handle {
3102         struct ctdb_context *ctdb;
3103         struct ctdb_req_control_old *c;
3104         int status;
3105         int fd[2];
3106         pid_t child;
3107         struct tevent_fd *fde;
3108 };
3109
3110 static int ctdb_reloadips_destructor(struct ctdb_reloadips_handle *h)
3111 {
3112         if (h == h->ctdb->reload_ips) {
3113                 h->ctdb->reload_ips = NULL;
3114         }
3115         if (h->c != NULL) {
3116                 ctdb_request_control_reply(h->ctdb, h->c, NULL, h->status, NULL);
3117                 h->c = NULL;
3118         }
3119         ctdb_kill(h->ctdb, h->child, SIGKILL);
3120         return 0;
3121 }
3122
3123 static void ctdb_reloadips_timeout_event(struct tevent_context *ev,
3124                                          struct tevent_timer *te,
3125                                          struct timeval t, void *private_data)
3126 {
3127         struct ctdb_reloadips_handle *h = talloc_get_type(private_data, struct ctdb_reloadips_handle);
3128
3129         talloc_free(h);
3130 }
3131
3132 static void ctdb_reloadips_child_handler(struct tevent_context *ev,
3133                                          struct tevent_fd *fde,
3134                                          uint16_t flags, void *private_data)
3135 {
3136         struct ctdb_reloadips_handle *h = talloc_get_type(private_data, struct ctdb_reloadips_handle);
3137
3138         char res;
3139         int ret;
3140
3141         ret = sys_read(h->fd[0], &res, 1);
3142         if (ret < 1 || res != 0) {
3143                 DEBUG(DEBUG_ERR, (__location__ " Reloadips child process returned error\n"));
3144                 res = 1;
3145         }
3146         h->status = res;
3147
3148         talloc_free(h);
3149 }
3150
3151 static int ctdb_reloadips_child(struct ctdb_context *ctdb)
3152 {
3153         TALLOC_CTX *mem_ctx = talloc_new(NULL);
3154         struct ctdb_public_ip_list_old *ips;
3155         struct ctdb_vnn *vnn;
3156         struct client_async_data *async_data;
3157         struct timeval timeout;
3158         TDB_DATA data;
3159         struct ctdb_client_control_state *state;
3160         bool first_add;
3161         int i, ret;
3162
3163         CTDB_NO_MEMORY(ctdb, mem_ctx);
3164
3165         /* Read IPs from local node */
3166         ret = ctdb_ctrl_get_public_ips(ctdb, TAKEOVER_TIMEOUT(),
3167                                        CTDB_CURRENT_NODE, mem_ctx, &ips);
3168         if (ret != 0) {
3169                 DEBUG(DEBUG_ERR,
3170                       ("Unable to fetch public IPs from local node\n"));
3171                 talloc_free(mem_ctx);
3172                 return -1;
3173         }
3174
3175         /* Read IPs file - this is safe since this is a child process */
3176         ctdb->vnn = NULL;
3177         if (ctdb_set_public_addresses(ctdb, false) != 0) {
3178                 DEBUG(DEBUG_ERR,("Failed to re-read public addresses file\n"));
3179                 talloc_free(mem_ctx);
3180                 return -1;
3181         }
3182
3183         async_data = talloc_zero(mem_ctx, struct client_async_data);
3184         CTDB_NO_MEMORY(ctdb, async_data);
3185
3186         /* Compare IPs between node and file for IPs to be deleted */
3187         for (i = 0; i < ips->num; i++) {
3188                 /* */
3189                 for (vnn = ctdb->vnn; vnn; vnn = vnn->next) {
3190                         if (ctdb_same_ip(&vnn->public_address,
3191                                          &ips->ips[i].addr)) {
3192                                 /* IP is still in file */
3193                                 break;
3194                         }
3195                 }
3196
3197                 if (vnn == NULL) {
3198                         /* Delete IP ips->ips[i] */
3199                         struct ctdb_addr_info_old *pub;
3200
3201                         DEBUG(DEBUG_NOTICE,
3202                               ("IP %s no longer configured, deleting it\n",
3203                                ctdb_addr_to_str(&ips->ips[i].addr)));
3204
3205                         pub = talloc_zero(mem_ctx, struct ctdb_addr_info_old);
3206                         CTDB_NO_MEMORY(ctdb, pub);
3207
3208                         pub->addr  = ips->ips[i].addr;
3209                         pub->mask  = 0;
3210                         pub->len   = 0;
3211
3212                         timeout = TAKEOVER_TIMEOUT();
3213
3214                         data.dsize = offsetof(struct ctdb_addr_info_old,
3215                                               iface) + pub->len;
3216                         data.dptr = (uint8_t *)pub;
3217
3218                         state = ctdb_control_send(ctdb, CTDB_CURRENT_NODE, 0,
3219                                                   CTDB_CONTROL_DEL_PUBLIC_IP,
3220                                                   0, data, async_data,
3221                                                   &timeout, NULL);
3222                         if (state == NULL) {
3223                                 DEBUG(DEBUG_ERR,
3224                                       (__location__
3225                                        " failed sending CTDB_CONTROL_DEL_PUBLIC_IP\n"));
3226                                 goto failed;
3227                         }
3228
3229                         ctdb_client_async_add(async_data, state);
3230                 }
3231         }
3232
3233         /* Compare IPs between node and file for IPs to be added */
3234         first_add = true;
3235         for (vnn = ctdb->vnn; vnn; vnn = vnn->next) {
3236                 for (i = 0; i < ips->num; i++) {
3237                         if (ctdb_same_ip(&vnn->public_address,
3238                                          &ips->ips[i].addr)) {
3239                                 /* IP already on node */
3240                                 break;
3241                         }
3242                 }
3243                 if (i == ips->num) {
3244                         /* Add IP ips->ips[i] */
3245                         struct ctdb_addr_info_old *pub;
3246                         const char *ifaces = NULL;
3247                         uint32_t len;
3248                         int iface = 0;
3249
3250                         DEBUG(DEBUG_NOTICE,
3251                               ("New IP %s configured, adding it\n",
3252                                ctdb_addr_to_str(&vnn->public_address)));
3253                         if (first_add) {
3254                                 uint32_t pnn = ctdb_get_pnn(ctdb);
3255
3256                                 data.dsize = sizeof(pnn);
3257                                 data.dptr  = (uint8_t *)&pnn;
3258
3259                                 ret = ctdb_client_send_message(
3260                                         ctdb,
3261                                         CTDB_BROADCAST_CONNECTED,
3262                                         CTDB_SRVID_REBALANCE_NODE,
3263                                         data);
3264                                 if (ret != 0) {
3265                                         DEBUG(DEBUG_WARNING,
3266                                               ("Failed to send message to force node reallocation - IPs may be unbalanced\n"));
3267                                 }
3268
3269                                 first_add = false;
3270                         }
3271
3272                         ifaces = vnn->ifaces[0];
3273                         iface = 1;
3274                         while (vnn->ifaces[iface] != NULL) {
3275                                 ifaces = talloc_asprintf(vnn, "%s,%s", ifaces,
3276                                                          vnn->ifaces[iface]);
3277                                 iface++;
3278                         }
3279
3280                         len   = strlen(ifaces) + 1;
3281                         pub = talloc_zero_size(mem_ctx,
3282                                                offsetof(struct ctdb_addr_info_old, iface) + len);
3283                         CTDB_NO_MEMORY(ctdb, pub);
3284
3285                         pub->addr  = vnn->public_address;
3286                         pub->mask  = vnn->public_netmask_bits;
3287                         pub->len   = len;
3288                         memcpy(&pub->iface[0], ifaces, pub->len);
3289
3290                         timeout = TAKEOVER_TIMEOUT();
3291
3292                         data.dsize = offsetof(struct ctdb_addr_info_old,
3293                                               iface) + pub->len;
3294                         data.dptr = (uint8_t *)pub;
3295
3296                         state = ctdb_control_send(ctdb, CTDB_CURRENT_NODE, 0,
3297                                                   CTDB_CONTROL_ADD_PUBLIC_IP,
3298                                                   0, data, async_data,
3299                                                   &timeout, NULL);
3300                         if (state == NULL) {
3301                                 DEBUG(DEBUG_ERR,
3302                                       (__location__
3303                                        " failed sending CTDB_CONTROL_ADD_PUBLIC_IP\n"));
3304                                 goto failed;
3305                         }
3306
3307                         ctdb_client_async_add(async_data, state);
3308                 }
3309         }
3310
3311         if (ctdb_client_async_wait(ctdb, async_data) != 0) {
3312                 DEBUG(DEBUG_ERR,(__location__ " Add/delete IPs failed\n"));
3313                 goto failed;
3314         }
3315
3316         talloc_free(mem_ctx);
3317         return 0;
3318
3319 failed:
3320         talloc_free(mem_ctx);
3321         return -1;
3322 }
3323
3324 /* This control is sent to force the node to re-read the public addresses file
3325    and drop any addresses we should nnot longer host, and add new addresses
3326    that we are now able to host
3327 */
3328 int32_t ctdb_control_reload_public_ips(struct ctdb_context *ctdb, struct ctdb_req_control_old *c, bool *async_reply)
3329 {
3330         struct ctdb_reloadips_handle *h;
3331         pid_t parent = getpid();
3332
3333         if (ctdb->reload_ips != NULL) {
3334                 talloc_free(ctdb->reload_ips);
3335                 ctdb->reload_ips = NULL;
3336         }
3337
3338         h = talloc(ctdb, struct ctdb_reloadips_handle);
3339         CTDB_NO_MEMORY(ctdb, h);
3340         h->ctdb     = ctdb;
3341         h->c        = NULL;
3342         h->status   = -1;
3343         
3344         if (pipe(h->fd) == -1) {
3345                 DEBUG(DEBUG_ERR,("Failed to create pipe for ctdb_freeze_lock\n"));
3346                 talloc_free(h);
3347                 return -1;
3348         }
3349
3350         h->child = ctdb_fork(ctdb);
3351         if (h->child == (pid_t)-1) {
3352                 DEBUG(DEBUG_ERR, ("Failed to fork a child for reloadips\n"));
3353                 close(h->fd[0]);
3354                 close(h->fd[1]);
3355                 talloc_free(h);
3356                 return -1;
3357         }
3358
3359         /* child process */
3360         if (h->child == 0) {
3361                 signed char res = 0;
3362
3363                 close(h->fd[0]);
3364                 debug_extra = talloc_asprintf(NULL, "reloadips:");
3365
3366                 prctl_set_comment("ctdb_reloadips");
3367                 if (switch_from_server_to_client(ctdb, "reloadips-child") != 0) {
3368                         DEBUG(DEBUG_CRIT,("ERROR: Failed to switch reloadips child into client mode\n"));
3369                         res = -1;
3370                 } else {
3371                         res = ctdb_reloadips_child(ctdb);
3372                         if (res != 0) {
3373                                 DEBUG(DEBUG_ERR,("Failed to reload ips on local node\n"));
3374                         }
3375                 }
3376
3377                 sys_write(h->fd[1], &res, 1);
3378                 ctdb_wait_for_process_to_exit(parent);
3379                 _exit(0);
3380         }
3381
3382         h->c             = talloc_steal(h, c);
3383
3384         close(h->fd[1]);
3385         set_close_on_exec(h->fd[0]);
3386
3387         talloc_set_destructor(h, ctdb_reloadips_destructor);
3388
3389
3390         h->fde = tevent_add_fd(ctdb->ev, h, h->fd[0], TEVENT_FD_READ,
3391                                ctdb_reloadips_child_handler, (void *)h);
3392         tevent_fd_set_auto_close(h->fde);
3393
3394         tevent_add_timer(ctdb->ev, h, timeval_current_ofs(120, 0),
3395                          ctdb_reloadips_timeout_event, h);
3396
3397         /* we reply later */
3398         *async_reply = true;
3399         return 0;
3400 }