ctdb-takeover: New function takeover_callback_data_init()
[metze/samba/wip.git] / ctdb / server / ctdb_takeover.c
1 /* 
2    ctdb ip takeover code
3
4    Copyright (C) Ronnie Sahlberg  2007
5    Copyright (C) Andrew Tridgell  2007
6    Copyright (C) Martin Schwenke  2011
7
8    This program is free software; you can redistribute it and/or modify
9    it under the terms of the GNU General Public License as published by
10    the Free Software Foundation; either version 3 of the License, or
11    (at your option) any later version.
12    
13    This program is distributed in the hope that it will be useful,
14    but WITHOUT ANY WARRANTY; without even the implied warranty of
15    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16    GNU General Public License for more details.
17    
18    You should have received a copy of the GNU General Public License
19    along with this program; if not, see <http://www.gnu.org/licenses/>.
20 */
21 #include "replace.h"
22 #include "system/network.h"
23 #include "system/filesys.h"
24 #include "system/time.h"
25 #include "system/wait.h"
26
27 #include <talloc.h>
28 #include <tevent.h>
29
30 #include "lib/util/dlinklist.h"
31 #include "lib/util/debug.h"
32 #include "lib/util/samba_util.h"
33 #include "lib/util/util_process.h"
34
35 #include "ctdb_private.h"
36 #include "ctdb_client.h"
37
38 #include "common/rb_tree.h"
39 #include "common/reqid.h"
40 #include "common/system.h"
41 #include "common/common.h"
42 #include "common/logging.h"
43
44 #include "server/ipalloc.h"
45
46 #define TAKEOVER_TIMEOUT() timeval_current_ofs(ctdb->tunable.takeover_timeout,0)
47
48 #define CTDB_ARP_INTERVAL 1
49 #define CTDB_ARP_REPEAT   3
50
51 struct ctdb_interface {
52         struct ctdb_interface *prev, *next;
53         const char *name;
54         bool link_up;
55         uint32_t references;
56 };
57
58 static const char *ctdb_vnn_iface_string(const struct ctdb_vnn *vnn)
59 {
60         if (vnn->iface) {
61                 return vnn->iface->name;
62         }
63
64         return "__none__";
65 }
66
67 static int ctdb_add_local_iface(struct ctdb_context *ctdb, const char *iface)
68 {
69         struct ctdb_interface *i;
70
71         if (strlen(iface) > CTDB_IFACE_SIZE) {
72                 DEBUG(DEBUG_ERR, ("Interface name too long \"%s\"\n", iface));
73                 return -1;
74         }
75
76         /* Verify that we don't have an entry for this ip yet */
77         for (i=ctdb->ifaces;i;i=i->next) {
78                 if (strcmp(i->name, iface) == 0) {
79                         return 0;
80                 }
81         }
82
83         /* create a new structure for this interface */
84         i = talloc_zero(ctdb, struct ctdb_interface);
85         CTDB_NO_MEMORY_FATAL(ctdb, i);
86         i->name = talloc_strdup(i, iface);
87         CTDB_NO_MEMORY(ctdb, i->name);
88
89         i->link_up = true;
90
91         DLIST_ADD(ctdb->ifaces, i);
92
93         return 0;
94 }
95
96 static bool vnn_has_interface_with_name(struct ctdb_vnn *vnn,
97                                         const char *name)
98 {
99         int n;
100
101         for (n = 0; vnn->ifaces[n] != NULL; n++) {
102                 if (strcmp(name, vnn->ifaces[n]) == 0) {
103                         return true;
104                 }
105         }
106
107         return false;
108 }
109
110 /* If any interfaces now have no possible IPs then delete them.  This
111  * implementation is naive (i.e. simple) rather than clever
112  * (i.e. complex).  Given that this is run on delip and that operation
113  * is rare, this doesn't need to be efficient - it needs to be
114  * foolproof.  One alternative is reference counting, where the logic
115  * is distributed and can, therefore, be broken in multiple places.
116  * Another alternative is to build a red-black tree of interfaces that
117  * can have addresses (by walking ctdb->vnn once) and then walking
118  * ctdb->ifaces once and deleting those not in the tree.  Let's go to
119  * one of those if the naive implementation causes problems...  :-)
120  */
121 static void ctdb_remove_orphaned_ifaces(struct ctdb_context *ctdb,
122                                         struct ctdb_vnn *vnn)
123 {
124         struct ctdb_interface *i, *next;
125
126         /* For each interface, check if there's an IP using it. */
127         for (i = ctdb->ifaces; i != NULL; i = next) {
128                 struct ctdb_vnn *tv;
129                 bool found;
130                 next = i->next;
131
132                 /* Only consider interfaces named in the given VNN. */
133                 if (!vnn_has_interface_with_name(vnn, i->name)) {
134                         continue;
135                 }
136
137                 /* Search for a vnn with this interface. */
138                 found = false;
139                 for (tv=ctdb->vnn; tv; tv=tv->next) {
140                         if (vnn_has_interface_with_name(tv, i->name)) {
141                                 found = true;
142                                 break;
143                         }
144                 }
145
146                 if (!found) {
147                         /* None of the VNNs are using this interface. */
148                         DLIST_REMOVE(ctdb->ifaces, i);
149                         talloc_free(i);
150                 }
151         }
152 }
153
154
155 static struct ctdb_interface *ctdb_find_iface(struct ctdb_context *ctdb,
156                                               const char *iface)
157 {
158         struct ctdb_interface *i;
159
160         for (i=ctdb->ifaces;i;i=i->next) {
161                 if (strcmp(i->name, iface) == 0) {
162                         return i;
163                 }
164         }
165
166         return NULL;
167 }
168
169 static struct ctdb_interface *ctdb_vnn_best_iface(struct ctdb_context *ctdb,
170                                                   struct ctdb_vnn *vnn)
171 {
172         int i;
173         struct ctdb_interface *cur = NULL;
174         struct ctdb_interface *best = NULL;
175
176         for (i=0; vnn->ifaces[i]; i++) {
177
178                 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
179                 if (cur == NULL) {
180                         continue;
181                 }
182
183                 if (!cur->link_up) {
184                         continue;
185                 }
186
187                 if (best == NULL) {
188                         best = cur;
189                         continue;
190                 }
191
192                 if (cur->references < best->references) {
193                         best = cur;
194                         continue;
195                 }
196         }
197
198         return best;
199 }
200
201 static int32_t ctdb_vnn_assign_iface(struct ctdb_context *ctdb,
202                                      struct ctdb_vnn *vnn)
203 {
204         struct ctdb_interface *best = NULL;
205
206         if (vnn->iface) {
207                 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
208                                    "still assigned to iface '%s'\n",
209                                    ctdb_addr_to_str(&vnn->public_address),
210                                    ctdb_vnn_iface_string(vnn)));
211                 return 0;
212         }
213
214         best = ctdb_vnn_best_iface(ctdb, vnn);
215         if (best == NULL) {
216                 DEBUG(DEBUG_ERR, (__location__ " public address '%s' "
217                                   "cannot assign to iface any iface\n",
218                                   ctdb_addr_to_str(&vnn->public_address)));
219                 return -1;
220         }
221
222         vnn->iface = best;
223         best->references++;
224         vnn->pnn = ctdb->pnn;
225
226         DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
227                            "now assigned to iface '%s' refs[%d]\n",
228                            ctdb_addr_to_str(&vnn->public_address),
229                            ctdb_vnn_iface_string(vnn),
230                            best->references));
231         return 0;
232 }
233
234 static void ctdb_vnn_unassign_iface(struct ctdb_context *ctdb,
235                                     struct ctdb_vnn *vnn)
236 {
237         DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
238                            "now unassigned (old iface '%s' refs[%d])\n",
239                            ctdb_addr_to_str(&vnn->public_address),
240                            ctdb_vnn_iface_string(vnn),
241                            vnn->iface?vnn->iface->references:0));
242         if (vnn->iface) {
243                 vnn->iface->references--;
244         }
245         vnn->iface = NULL;
246         if (vnn->pnn == ctdb->pnn) {
247                 vnn->pnn = -1;
248         }
249 }
250
251 static bool ctdb_vnn_available(struct ctdb_context *ctdb,
252                                struct ctdb_vnn *vnn)
253 {
254         int i;
255
256         /* Nodes that are not RUNNING can not host IPs */
257         if (ctdb->runstate != CTDB_RUNSTATE_RUNNING) {
258                 return false;
259         }
260
261         if (vnn->delete_pending) {
262                 return false;
263         }
264
265         if (vnn->iface && vnn->iface->link_up) {
266                 return true;
267         }
268
269         for (i=0; vnn->ifaces[i]; i++) {
270                 struct ctdb_interface *cur;
271
272                 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
273                 if (cur == NULL) {
274                         continue;
275                 }
276
277                 if (cur->link_up) {
278                         return true;
279                 }
280         }
281
282         return false;
283 }
284
285 struct ctdb_takeover_arp {
286         struct ctdb_context *ctdb;
287         uint32_t count;
288         ctdb_sock_addr addr;
289         struct ctdb_tcp_array *tcparray;
290         struct ctdb_vnn *vnn;
291 };
292
293
294 /*
295   lists of tcp endpoints
296  */
297 struct ctdb_tcp_list {
298         struct ctdb_tcp_list *prev, *next;
299         struct ctdb_connection connection;
300 };
301
302 /*
303   list of clients to kill on IP release
304  */
305 struct ctdb_client_ip {
306         struct ctdb_client_ip *prev, *next;
307         struct ctdb_context *ctdb;
308         ctdb_sock_addr addr;
309         uint32_t client_id;
310 };
311
312
313 /*
314   send a gratuitous arp
315  */
316 static void ctdb_control_send_arp(struct tevent_context *ev,
317                                   struct tevent_timer *te,
318                                   struct timeval t, void *private_data)
319 {
320         struct ctdb_takeover_arp *arp = talloc_get_type(private_data, 
321                                                         struct ctdb_takeover_arp);
322         int i, ret;
323         struct ctdb_tcp_array *tcparray;
324         const char *iface = ctdb_vnn_iface_string(arp->vnn);
325
326         ret = ctdb_sys_send_arp(&arp->addr, iface);
327         if (ret != 0) {
328                 DEBUG(DEBUG_CRIT,(__location__ " sending of arp failed on iface '%s' (%s)\n",
329                                   iface, strerror(errno)));
330         }
331
332         tcparray = arp->tcparray;
333         if (tcparray) {
334                 for (i=0;i<tcparray->num;i++) {
335                         struct ctdb_connection *tcon;
336
337                         tcon = &tcparray->connections[i];
338                         DEBUG(DEBUG_INFO,("sending tcp tickle ack for %u->%s:%u\n",
339                                 (unsigned)ntohs(tcon->dst.ip.sin_port),
340                                 ctdb_addr_to_str(&tcon->src),
341                                 (unsigned)ntohs(tcon->src.ip.sin_port)));
342                         ret = ctdb_sys_send_tcp(
343                                 &tcon->src,
344                                 &tcon->dst,
345                                 0, 0, 0);
346                         if (ret != 0) {
347                                 DEBUG(DEBUG_CRIT,(__location__ " Failed to send tcp tickle ack for %s\n",
348                                         ctdb_addr_to_str(&tcon->src)));
349                         }
350                 }
351         }
352
353         arp->count++;
354
355         if (arp->count == CTDB_ARP_REPEAT) {
356                 talloc_free(arp);
357                 return;
358         }
359
360         tevent_add_timer(arp->ctdb->ev, arp->vnn->takeover_ctx,
361                          timeval_current_ofs(CTDB_ARP_INTERVAL, 100000),
362                          ctdb_control_send_arp, arp);
363 }
364
365 static int32_t ctdb_announce_vnn_iface(struct ctdb_context *ctdb,
366                                        struct ctdb_vnn *vnn)
367 {
368         struct ctdb_takeover_arp *arp;
369         struct ctdb_tcp_array *tcparray;
370
371         if (!vnn->takeover_ctx) {
372                 vnn->takeover_ctx = talloc_new(vnn);
373                 if (!vnn->takeover_ctx) {
374                         return -1;
375                 }
376         }
377
378         arp = talloc_zero(vnn->takeover_ctx, struct ctdb_takeover_arp);
379         if (!arp) {
380                 return -1;
381         }
382
383         arp->ctdb = ctdb;
384         arp->addr = vnn->public_address;
385         arp->vnn  = vnn;
386
387         tcparray = vnn->tcp_array;
388         if (tcparray) {
389                 /* add all of the known tcp connections for this IP to the
390                    list of tcp connections to send tickle acks for */
391                 arp->tcparray = talloc_steal(arp, tcparray);
392
393                 vnn->tcp_array = NULL;
394                 vnn->tcp_update_needed = true;
395         }
396
397         tevent_add_timer(arp->ctdb->ev, vnn->takeover_ctx,
398                          timeval_zero(), ctdb_control_send_arp, arp);
399
400         return 0;
401 }
402
403 struct takeover_callback_state {
404         struct ctdb_req_control_old *c;
405         ctdb_sock_addr *addr;
406         struct ctdb_vnn *vnn;
407 };
408
409 struct ctdb_do_takeip_state {
410         struct ctdb_req_control_old *c;
411         struct ctdb_vnn *vnn;
412 };
413
414 /*
415   called when takeip event finishes
416  */
417 static void ctdb_do_takeip_callback(struct ctdb_context *ctdb, int status,
418                                     void *private_data)
419 {
420         struct ctdb_do_takeip_state *state =
421                 talloc_get_type(private_data, struct ctdb_do_takeip_state);
422         int32_t ret;
423         TDB_DATA data;
424
425         if (status != 0) {
426                 struct ctdb_node *node = ctdb->nodes[ctdb->pnn];
427         
428                 if (status == -ETIME) {
429                         ctdb_ban_self(ctdb);
430                 }
431                 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
432                                  ctdb_addr_to_str(&state->vnn->public_address),
433                                  ctdb_vnn_iface_string(state->vnn)));
434                 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
435
436                 node->flags |= NODE_FLAGS_UNHEALTHY;
437                 talloc_free(state);
438                 return;
439         }
440
441         if (ctdb->do_checkpublicip) {
442
443         ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
444         if (ret != 0) {
445                 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
446                 talloc_free(state);
447                 return;
448         }
449
450         }
451
452         data.dptr  = (uint8_t *)ctdb_addr_to_str(&state->vnn->public_address);
453         data.dsize = strlen((char *)data.dptr) + 1;
454         DEBUG(DEBUG_INFO,(__location__ " sending TAKE_IP for '%s'\n", data.dptr));
455
456         ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_TAKE_IP, data);
457
458
459         /* the control succeeded */
460         ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
461         talloc_free(state);
462         return;
463 }
464
465 static int ctdb_takeip_destructor(struct ctdb_do_takeip_state *state)
466 {
467         state->vnn->update_in_flight = false;
468         return 0;
469 }
470
471 /*
472   take over an ip address
473  */
474 static int32_t ctdb_do_takeip(struct ctdb_context *ctdb,
475                               struct ctdb_req_control_old *c,
476                               struct ctdb_vnn *vnn)
477 {
478         int ret;
479         struct ctdb_do_takeip_state *state;
480
481         if (vnn->update_in_flight) {
482                 DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u rejected "
483                                     "update for this IP already in flight\n",
484                                     ctdb_addr_to_str(&vnn->public_address),
485                                     vnn->public_netmask_bits));
486                 return -1;
487         }
488
489         ret = ctdb_vnn_assign_iface(ctdb, vnn);
490         if (ret != 0) {
491                 DEBUG(DEBUG_ERR,("Takeover of IP %s/%u failed to "
492                                  "assign a usable interface\n",
493                                  ctdb_addr_to_str(&vnn->public_address),
494                                  vnn->public_netmask_bits));
495                 return -1;
496         }
497
498         state = talloc(vnn, struct ctdb_do_takeip_state);
499         CTDB_NO_MEMORY(ctdb, state);
500
501         state->c = talloc_steal(ctdb, c);
502         state->vnn   = vnn;
503
504         vnn->update_in_flight = true;
505         talloc_set_destructor(state, ctdb_takeip_destructor);
506
507         DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u on interface %s\n",
508                             ctdb_addr_to_str(&vnn->public_address),
509                             vnn->public_netmask_bits,
510                             ctdb_vnn_iface_string(vnn)));
511
512         ret = ctdb_event_script_callback(ctdb,
513                                          state,
514                                          ctdb_do_takeip_callback,
515                                          state,
516                                          CTDB_EVENT_TAKE_IP,
517                                          "%s %s %u",
518                                          ctdb_vnn_iface_string(vnn),
519                                          ctdb_addr_to_str(&vnn->public_address),
520                                          vnn->public_netmask_bits);
521
522         if (ret != 0) {
523                 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
524                         ctdb_addr_to_str(&vnn->public_address),
525                         ctdb_vnn_iface_string(vnn)));
526                 talloc_free(state);
527                 return -1;
528         }
529
530         return 0;
531 }
532
533 struct ctdb_do_updateip_state {
534         struct ctdb_req_control_old *c;
535         struct ctdb_interface *old;
536         struct ctdb_vnn *vnn;
537 };
538
539 /*
540   called when updateip event finishes
541  */
542 static void ctdb_do_updateip_callback(struct ctdb_context *ctdb, int status,
543                                       void *private_data)
544 {
545         struct ctdb_do_updateip_state *state =
546                 talloc_get_type(private_data, struct ctdb_do_updateip_state);
547         int32_t ret;
548
549         if (status != 0) {
550                 if (status == -ETIME) {
551                         ctdb_ban_self(ctdb);
552                 }
553                 DEBUG(DEBUG_ERR,(__location__ " Failed to move IP %s from interface %s to %s\n",
554                         ctdb_addr_to_str(&state->vnn->public_address),
555                         state->old->name,
556                         ctdb_vnn_iface_string(state->vnn)));
557
558                 /*
559                  * All we can do is reset the old interface
560                  * and let the next run fix it
561                  */
562                 ctdb_vnn_unassign_iface(ctdb, state->vnn);
563                 state->vnn->iface = state->old;
564                 state->vnn->iface->references++;
565
566                 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
567                 talloc_free(state);
568                 return;
569         }
570
571         if (ctdb->do_checkpublicip) {
572
573         ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
574         if (ret != 0) {
575                 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
576                 talloc_free(state);
577                 return;
578         }
579
580         }
581
582         /* the control succeeded */
583         ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
584         talloc_free(state);
585         return;
586 }
587
588 static int ctdb_updateip_destructor(struct ctdb_do_updateip_state *state)
589 {
590         state->vnn->update_in_flight = false;
591         return 0;
592 }
593
594 /*
595   update (move) an ip address
596  */
597 static int32_t ctdb_do_updateip(struct ctdb_context *ctdb,
598                                 struct ctdb_req_control_old *c,
599                                 struct ctdb_vnn *vnn)
600 {
601         int ret;
602         struct ctdb_do_updateip_state *state;
603         struct ctdb_interface *old = vnn->iface;
604         const char *new_name;
605
606         if (vnn->update_in_flight) {
607                 DEBUG(DEBUG_NOTICE,("Update of IP %s/%u rejected "
608                                     "update for this IP already in flight\n",
609                                     ctdb_addr_to_str(&vnn->public_address),
610                                     vnn->public_netmask_bits));
611                 return -1;
612         }
613
614         ctdb_vnn_unassign_iface(ctdb, vnn);
615         ret = ctdb_vnn_assign_iface(ctdb, vnn);
616         if (ret != 0) {
617                 DEBUG(DEBUG_ERR,("update of IP %s/%u failed to "
618                                  "assin a usable interface (old iface '%s')\n",
619                                  ctdb_addr_to_str(&vnn->public_address),
620                                  vnn->public_netmask_bits,
621                                  old->name));
622                 return -1;
623         }
624
625         new_name = ctdb_vnn_iface_string(vnn);
626         if (old->name != NULL && new_name != NULL && !strcmp(old->name, new_name)) {
627                 /* A benign update from one interface onto itself.
628                  * no need to run the eventscripts in this case, just return
629                  * success.
630                  */
631                 ctdb_request_control_reply(ctdb, c, NULL, 0, NULL);
632                 return 0;
633         }
634
635         state = talloc(vnn, struct ctdb_do_updateip_state);
636         CTDB_NO_MEMORY(ctdb, state);
637
638         state->c = talloc_steal(ctdb, c);
639         state->old = old;
640         state->vnn = vnn;
641
642         vnn->update_in_flight = true;
643         talloc_set_destructor(state, ctdb_updateip_destructor);
644
645         DEBUG(DEBUG_NOTICE,("Update of IP %s/%u from "
646                             "interface %s to %s\n",
647                             ctdb_addr_to_str(&vnn->public_address),
648                             vnn->public_netmask_bits,
649                             old->name,
650                             new_name));
651
652         ret = ctdb_event_script_callback(ctdb,
653                                          state,
654                                          ctdb_do_updateip_callback,
655                                          state,
656                                          CTDB_EVENT_UPDATE_IP,
657                                          "%s %s %s %u",
658                                          state->old->name,
659                                          new_name,
660                                          ctdb_addr_to_str(&vnn->public_address),
661                                          vnn->public_netmask_bits);
662         if (ret != 0) {
663                 DEBUG(DEBUG_ERR,(__location__ " Failed update IP %s from interface %s to %s\n",
664                                  ctdb_addr_to_str(&vnn->public_address),
665                                  old->name, new_name));
666                 talloc_free(state);
667                 return -1;
668         }
669
670         return 0;
671 }
672
673 /*
674   Find the vnn of the node that has a public ip address
675   returns -1 if the address is not known as a public address
676  */
677 static struct ctdb_vnn *find_public_ip_vnn(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
678 {
679         struct ctdb_vnn *vnn;
680
681         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
682                 if (ctdb_same_ip(&vnn->public_address, addr)) {
683                         return vnn;
684                 }
685         }
686
687         return NULL;
688 }
689
690 /*
691   take over an ip address
692  */
693 int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb,
694                                  struct ctdb_req_control_old *c,
695                                  TDB_DATA indata,
696                                  bool *async_reply)
697 {
698         int ret;
699         struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
700         struct ctdb_vnn *vnn;
701         bool have_ip = false;
702         bool do_updateip = false;
703         bool do_takeip = false;
704         struct ctdb_interface *best_iface = NULL;
705
706         if (pip->pnn != ctdb->pnn) {
707                 DEBUG(DEBUG_ERR,(__location__" takeoverip called for an ip '%s' "
708                                  "with pnn %d, but we're node %d\n",
709                                  ctdb_addr_to_str(&pip->addr),
710                                  pip->pnn, ctdb->pnn));
711                 return -1;
712         }
713
714         /* update out vnn list */
715         vnn = find_public_ip_vnn(ctdb, &pip->addr);
716         if (vnn == NULL) {
717                 DEBUG(DEBUG_INFO,("takeoverip called for an ip '%s' that is not a public address\n",
718                         ctdb_addr_to_str(&pip->addr)));
719                 return 0;
720         }
721
722         if (ctdb->tunable.disable_ip_failover == 0 && ctdb->do_checkpublicip) {
723                 have_ip = ctdb_sys_have_ip(&pip->addr);
724         }
725         best_iface = ctdb_vnn_best_iface(ctdb, vnn);
726         if (best_iface == NULL) {
727                 DEBUG(DEBUG_ERR,("takeoverip of IP %s/%u failed to find"
728                                  "a usable interface (old %s, have_ip %d)\n",
729                                  ctdb_addr_to_str(&vnn->public_address),
730                                  vnn->public_netmask_bits,
731                                  ctdb_vnn_iface_string(vnn),
732                                  have_ip));
733                 return -1;
734         }
735
736         if (vnn->iface == NULL && vnn->pnn == -1 && have_ip && best_iface != NULL) {
737                 DEBUG(DEBUG_ERR,("Taking over newly created ip\n"));
738                 have_ip = false;
739         }
740
741
742         if (vnn->iface == NULL && have_ip) {
743                 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
744                                   "but we have no interface assigned, has someone manually configured it? Ignore for now.\n",
745                                  ctdb_addr_to_str(&vnn->public_address)));
746                 return 0;
747         }
748
749         if (vnn->pnn != ctdb->pnn && have_ip && vnn->pnn != -1) {
750                 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
751                                   "and we have it on iface[%s], but it was assigned to node %d"
752                                   "and we are node %d, banning ourself\n",
753                                  ctdb_addr_to_str(&vnn->public_address),
754                                  ctdb_vnn_iface_string(vnn), vnn->pnn, ctdb->pnn));
755                 ctdb_ban_self(ctdb);
756                 return -1;
757         }
758
759         if (vnn->pnn == -1 && have_ip) {
760                 vnn->pnn = ctdb->pnn;
761                 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
762                                   "and we already have it on iface[%s], update local daemon\n",
763                                  ctdb_addr_to_str(&vnn->public_address),
764                                   ctdb_vnn_iface_string(vnn)));
765                 return 0;
766         }
767
768         if (vnn->iface) {
769                 if (vnn->iface != best_iface) {
770                         if (!vnn->iface->link_up) {
771                                 do_updateip = true;
772                         } else if (vnn->iface->references > (best_iface->references + 1)) {
773                                 /* only move when the rebalance gains something */
774                                         do_updateip = true;
775                         }
776                 }
777         }
778
779         if (!have_ip) {
780                 if (do_updateip) {
781                         ctdb_vnn_unassign_iface(ctdb, vnn);
782                         do_updateip = false;
783                 }
784                 do_takeip = true;
785         }
786
787         if (do_takeip) {
788                 ret = ctdb_do_takeip(ctdb, c, vnn);
789                 if (ret != 0) {
790                         return -1;
791                 }
792         } else if (do_updateip) {
793                 ret = ctdb_do_updateip(ctdb, c, vnn);
794                 if (ret != 0) {
795                         return -1;
796                 }
797         } else {
798                 /*
799                  * The interface is up and the kernel known the ip
800                  * => do nothing
801                  */
802                 DEBUG(DEBUG_INFO,("Redundant takeover of IP %s/%u on interface %s (ip already held)\n",
803                         ctdb_addr_to_str(&pip->addr),
804                         vnn->public_netmask_bits,
805                         ctdb_vnn_iface_string(vnn)));
806                 return 0;
807         }
808
809         /* tell ctdb_control.c that we will be replying asynchronously */
810         *async_reply = true;
811
812         return 0;
813 }
814
815 static void do_delete_ip(struct ctdb_context *ctdb, struct ctdb_vnn *vnn)
816 {
817         DLIST_REMOVE(ctdb->vnn, vnn);
818         ctdb_vnn_unassign_iface(ctdb, vnn);
819         ctdb_remove_orphaned_ifaces(ctdb, vnn);
820         talloc_free(vnn);
821 }
822
823 /*
824   called when releaseip event finishes
825  */
826 static void release_ip_callback(struct ctdb_context *ctdb, int status, 
827                                 void *private_data)
828 {
829         struct takeover_callback_state *state = 
830                 talloc_get_type(private_data, struct takeover_callback_state);
831         TDB_DATA data;
832
833         if (status == -ETIME) {
834                 ctdb_ban_self(ctdb);
835         }
836
837         if (ctdb->tunable.disable_ip_failover == 0 && ctdb->do_checkpublicip) {
838                 if  (ctdb_sys_have_ip(state->addr)) {
839                         DEBUG(DEBUG_ERR,
840                               ("IP %s still hosted during release IP callback, failing\n",
841                                ctdb_addr_to_str(state->addr)));
842                         ctdb_request_control_reply(ctdb, state->c,
843                                                    NULL, -1, NULL);
844                         talloc_free(state);
845                         return;
846                 }
847         }
848
849         /* send a message to all clients of this node telling them
850            that the cluster has been reconfigured and they should
851            release any sockets on this IP */
852         data.dptr = (uint8_t *)talloc_strdup(state, ctdb_addr_to_str(state->addr));
853         CTDB_NO_MEMORY_VOID(ctdb, data.dptr);
854         data.dsize = strlen((char *)data.dptr)+1;
855
856         DEBUG(DEBUG_INFO,(__location__ " sending RELEASE_IP for '%s'\n", data.dptr));
857
858         ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_RELEASE_IP, data);
859
860         ctdb_vnn_unassign_iface(ctdb, state->vnn);
861
862         /* Process the IP if it has been marked for deletion */
863         if (state->vnn->delete_pending) {
864                 do_delete_ip(ctdb, state->vnn);
865                 state->vnn = NULL;
866         }
867
868         /* the control succeeded */
869         ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
870         talloc_free(state);
871 }
872
873 static int ctdb_releaseip_destructor(struct takeover_callback_state *state)
874 {
875         if (state->vnn != NULL) {
876                 state->vnn->update_in_flight = false;
877         }
878         return 0;
879 }
880
881 /*
882   release an ip address
883  */
884 int32_t ctdb_control_release_ip(struct ctdb_context *ctdb, 
885                                 struct ctdb_req_control_old *c,
886                                 TDB_DATA indata, 
887                                 bool *async_reply)
888 {
889         int ret;
890         struct takeover_callback_state *state;
891         struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
892         struct ctdb_vnn *vnn;
893         char *iface;
894
895         /* update our vnn list */
896         vnn = find_public_ip_vnn(ctdb, &pip->addr);
897         if (vnn == NULL) {
898                 DEBUG(DEBUG_INFO,("releaseip called for an ip '%s' that is not a public address\n",
899                         ctdb_addr_to_str(&pip->addr)));
900                 return 0;
901         }
902         vnn->pnn = pip->pnn;
903
904         /* stop any previous arps */
905         talloc_free(vnn->takeover_ctx);
906         vnn->takeover_ctx = NULL;
907
908         /* Some ctdb tool commands (e.g. moveip) send
909          * lazy multicast to drop an IP from any node that isn't the
910          * intended new node.  The following causes makes ctdbd ignore
911          * a release for any address it doesn't host.
912          */
913         if (ctdb->tunable.disable_ip_failover == 0 && ctdb->do_checkpublicip) {
914                 if (!ctdb_sys_have_ip(&pip->addr)) {
915                         DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u on interface %s (ip not held)\n",
916                                 ctdb_addr_to_str(&pip->addr),
917                                 vnn->public_netmask_bits,
918                                 ctdb_vnn_iface_string(vnn)));
919                         ctdb_vnn_unassign_iface(ctdb, vnn);
920                         return 0;
921                 }
922         } else {
923                 if (vnn->iface == NULL) {
924                         DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u (ip not held)\n",
925                                            ctdb_addr_to_str(&pip->addr),
926                                            vnn->public_netmask_bits));
927                         return 0;
928                 }
929         }
930
931         /* There is a potential race between take_ip and us because we
932          * update the VNN via a callback that run when the
933          * eventscripts have been run.  Avoid the race by allowing one
934          * update to be in flight at a time.
935          */
936         if (vnn->update_in_flight) {
937                 DEBUG(DEBUG_NOTICE,("Release of IP %s/%u rejected "
938                                     "update for this IP already in flight\n",
939                                     ctdb_addr_to_str(&vnn->public_address),
940                                     vnn->public_netmask_bits));
941                 return -1;
942         }
943
944         iface = strdup(ctdb_vnn_iface_string(vnn));
945
946         DEBUG(DEBUG_NOTICE,("Release of IP %s/%u on interface %s  node:%d\n",
947                 ctdb_addr_to_str(&pip->addr),
948                 vnn->public_netmask_bits,
949                 iface,
950                 pip->pnn));
951
952         state = talloc(ctdb, struct takeover_callback_state);
953         if (state == NULL) {
954                 ctdb_set_error(ctdb, "Out of memory at %s:%d",
955                                __FILE__, __LINE__);
956                 free(iface);
957                 return -1;
958         }
959
960         state->c = talloc_steal(state, c);
961         state->addr = talloc(state, ctdb_sock_addr);       
962         if (state->addr == NULL) {
963                 ctdb_set_error(ctdb, "Out of memory at %s:%d",
964                                __FILE__, __LINE__);
965                 free(iface);
966                 talloc_free(state);
967                 return -1;
968         }
969         *state->addr = pip->addr;
970         state->vnn   = vnn;
971
972         vnn->update_in_flight = true;
973         talloc_set_destructor(state, ctdb_releaseip_destructor);
974
975         ret = ctdb_event_script_callback(ctdb, 
976                                          state, release_ip_callback, state,
977                                          CTDB_EVENT_RELEASE_IP,
978                                          "%s %s %u",
979                                          iface,
980                                          ctdb_addr_to_str(&pip->addr),
981                                          vnn->public_netmask_bits);
982         free(iface);
983         if (ret != 0) {
984                 DEBUG(DEBUG_ERR,(__location__ " Failed to release IP %s on interface %s\n",
985                         ctdb_addr_to_str(&pip->addr),
986                         ctdb_vnn_iface_string(vnn)));
987                 talloc_free(state);
988                 return -1;
989         }
990
991         /* tell the control that we will be reply asynchronously */
992         *async_reply = true;
993         return 0;
994 }
995
996 static int ctdb_add_public_address(struct ctdb_context *ctdb,
997                                    ctdb_sock_addr *addr,
998                                    unsigned mask, const char *ifaces,
999                                    bool check_address)
1000 {
1001         struct ctdb_vnn      *vnn;
1002         uint32_t num = 0;
1003         char *tmp;
1004         const char *iface;
1005         int i;
1006         int ret;
1007
1008         tmp = strdup(ifaces);
1009         for (iface = strtok(tmp, ","); iface; iface = strtok(NULL, ",")) {
1010                 if (!ctdb_sys_check_iface_exists(iface)) {
1011                         DEBUG(DEBUG_CRIT,("Interface %s does not exist. Can not add public-address : %s\n", iface, ctdb_addr_to_str(addr)));
1012                         free(tmp);
1013                         return -1;
1014                 }
1015         }
1016         free(tmp);
1017
1018         /* Verify that we don't have an entry for this ip yet */
1019         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1020                 if (ctdb_same_sockaddr(addr, &vnn->public_address)) {
1021                         DEBUG(DEBUG_CRIT,("Same ip '%s' specified multiple times in the public address list \n", 
1022                                 ctdb_addr_to_str(addr)));
1023                         return -1;
1024                 }               
1025         }
1026
1027         /* create a new vnn structure for this ip address */
1028         vnn = talloc_zero(ctdb, struct ctdb_vnn);
1029         CTDB_NO_MEMORY_FATAL(ctdb, vnn);
1030         vnn->ifaces = talloc_array(vnn, const char *, num + 2);
1031         tmp = talloc_strdup(vnn, ifaces);
1032         CTDB_NO_MEMORY_FATAL(ctdb, tmp);
1033         for (iface = strtok(tmp, ","); iface; iface = strtok(NULL, ",")) {
1034                 vnn->ifaces = talloc_realloc(vnn, vnn->ifaces, const char *, num + 2);
1035                 CTDB_NO_MEMORY_FATAL(ctdb, vnn->ifaces);
1036                 vnn->ifaces[num] = talloc_strdup(vnn, iface);
1037                 CTDB_NO_MEMORY_FATAL(ctdb, vnn->ifaces[num]);
1038                 num++;
1039         }
1040         talloc_free(tmp);
1041         vnn->ifaces[num] = NULL;
1042         vnn->public_address      = *addr;
1043         vnn->public_netmask_bits = mask;
1044         vnn->pnn                 = -1;
1045         if (check_address) {
1046                 if (ctdb_sys_have_ip(addr)) {
1047                         DEBUG(DEBUG_ERR,("We are already hosting public address '%s'. setting PNN to ourself:%d\n", ctdb_addr_to_str(addr), ctdb->pnn));
1048                         vnn->pnn = ctdb->pnn;
1049                 }
1050         }
1051
1052         for (i=0; vnn->ifaces[i]; i++) {
1053                 ret = ctdb_add_local_iface(ctdb, vnn->ifaces[i]);
1054                 if (ret != 0) {
1055                         DEBUG(DEBUG_CRIT, (__location__ " failed to add iface[%s] "
1056                                            "for public_address[%s]\n",
1057                                            vnn->ifaces[i], ctdb_addr_to_str(addr)));
1058                         talloc_free(vnn);
1059                         return -1;
1060                 }
1061         }
1062
1063         DLIST_ADD(ctdb->vnn, vnn);
1064
1065         return 0;
1066 }
1067
1068 /*
1069   setup the public address lists from a file
1070 */
1071 int ctdb_set_public_addresses(struct ctdb_context *ctdb, bool check_addresses)
1072 {
1073         char **lines;
1074         int nlines;
1075         int i;
1076
1077         lines = file_lines_load(ctdb->public_addresses_file, &nlines, 0, ctdb);
1078         if (lines == NULL) {
1079                 ctdb_set_error(ctdb, "Failed to load public address list '%s'\n", ctdb->public_addresses_file);
1080                 return -1;
1081         }
1082         while (nlines > 0 && strcmp(lines[nlines-1], "") == 0) {
1083                 nlines--;
1084         }
1085
1086         for (i=0;i<nlines;i++) {
1087                 unsigned mask;
1088                 ctdb_sock_addr addr;
1089                 const char *addrstr;
1090                 const char *ifaces;
1091                 char *tok, *line;
1092
1093                 line = lines[i];
1094                 while ((*line == ' ') || (*line == '\t')) {
1095                         line++;
1096                 }
1097                 if (*line == '#') {
1098                         continue;
1099                 }
1100                 if (strcmp(line, "") == 0) {
1101                         continue;
1102                 }
1103                 tok = strtok(line, " \t");
1104                 addrstr = tok;
1105                 tok = strtok(NULL, " \t");
1106                 if (tok == NULL) {
1107                         if (NULL == ctdb->default_public_interface) {
1108                                 DEBUG(DEBUG_CRIT,("No default public interface and no interface specified at line %u of public address list\n",
1109                                          i+1));
1110                                 talloc_free(lines);
1111                                 return -1;
1112                         }
1113                         ifaces = ctdb->default_public_interface;
1114                 } else {
1115                         ifaces = tok;
1116                 }
1117
1118                 if (!addrstr || !parse_ip_mask(addrstr, ifaces, &addr, &mask)) {
1119                         DEBUG(DEBUG_CRIT,("Badly formed line %u in public address list\n", i+1));
1120                         talloc_free(lines);
1121                         return -1;
1122                 }
1123                 if (ctdb_add_public_address(ctdb, &addr, mask, ifaces, check_addresses)) {
1124                         DEBUG(DEBUG_CRIT,("Failed to add line %u to the public address list\n", i+1));
1125                         talloc_free(lines);
1126                         return -1;
1127                 }
1128         }
1129
1130
1131         talloc_free(lines);
1132         return 0;
1133 }
1134
1135 static void *add_ip_callback(void *parm, void *data)
1136 {
1137         struct public_ip_list *this_ip = parm;
1138         struct public_ip_list *prev_ip = data;
1139
1140         if (prev_ip == NULL) {
1141                 return parm;
1142         }
1143         if (this_ip->pnn == -1) {
1144                 this_ip->pnn = prev_ip->pnn;
1145         }
1146
1147         return parm;
1148 }
1149
1150 static int getips_count_callback(void *param, void *data)
1151 {
1152         struct public_ip_list **ip_list = (struct public_ip_list **)param;
1153         struct public_ip_list *new_ip = (struct public_ip_list *)data;
1154
1155         new_ip->next = *ip_list;
1156         *ip_list     = new_ip;
1157         return 0;
1158 }
1159
1160 static int verify_remote_ip_allocation(struct ctdb_context *ctdb,
1161                                        struct ctdb_public_ip_list *ips,
1162                                        uint32_t pnn);
1163
1164 static int ctdb_reload_remote_public_ips(struct ctdb_context *ctdb,
1165                                          struct ipalloc_state *ipalloc_state,
1166                                          struct ctdb_node_map_old *nodemap)
1167 {
1168         int j;
1169         int ret;
1170         struct ctdb_public_ip_list_old *ip_list;
1171
1172         if (ipalloc_state->num != nodemap->num) {
1173                 DEBUG(DEBUG_ERR,
1174                       (__location__
1175                        " ipalloc_state->num (%d) != nodemap->num (%d) invalid param\n",
1176                        ipalloc_state->num, nodemap->num));
1177                 return -1;
1178         }
1179
1180         for (j=0; j<nodemap->num; j++) {
1181                 if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) {
1182                         continue;
1183                 }
1184
1185                 /* Retrieve the list of known public IPs from the node */
1186                 ret = ctdb_ctrl_get_public_ips_flags(ctdb,
1187                                         TAKEOVER_TIMEOUT(),
1188                                         j,
1189                                         ipalloc_state->known_public_ips,
1190                                         0,
1191                                         &ip_list);
1192                 if (ret != 0) {
1193                         DEBUG(DEBUG_ERR,
1194                               ("Failed to read known public IPs from node: %u\n",
1195                                j));
1196                         return -1;
1197                 }
1198                 ipalloc_state->known_public_ips[j].num = ip_list->num;
1199                 /* This could be copied and freed.  However, ip_list
1200                  * is allocated off ipalloc_state->known_public_ips,
1201                  * so this is a safe hack.  This will go away in a
1202                  * while anyway... */
1203                 ipalloc_state->known_public_ips[j].ip = &ip_list->ips[0];
1204
1205                 if (ctdb->do_checkpublicip) {
1206                         verify_remote_ip_allocation(
1207                                 ctdb,
1208                                 &ipalloc_state->known_public_ips[j],
1209                                 j);
1210                 }
1211
1212                 /* Retrieve the list of available public IPs from the node */
1213                 ret = ctdb_ctrl_get_public_ips_flags(ctdb,
1214                                         TAKEOVER_TIMEOUT(),
1215                                         j,
1216                                         ipalloc_state->available_public_ips,
1217                                         CTDB_PUBLIC_IP_FLAGS_ONLY_AVAILABLE,
1218                                         &ip_list);
1219                 if (ret != 0) {
1220                         DEBUG(DEBUG_ERR,
1221                               ("Failed to read available public IPs from node: %u\n",
1222                                j));
1223                         return -1;
1224                 }
1225                 ipalloc_state->available_public_ips[j].num = ip_list->num;
1226                 /* This could be copied and freed.  However, ip_list
1227                  * is allocated off ipalloc_state->available_public_ips,
1228                  * so this is a safe hack.  This will go away in a
1229                  * while anyway... */
1230                 ipalloc_state->available_public_ips[j].ip = &ip_list->ips[0];
1231         }
1232
1233         return 0;
1234 }
1235
1236 static struct public_ip_list *
1237 create_merged_ip_list(struct ctdb_context *ctdb, struct ipalloc_state *ipalloc_state)
1238 {
1239         int i, j;
1240         struct public_ip_list *ip_list;
1241         struct ctdb_public_ip_list *public_ips;
1242
1243         TALLOC_FREE(ctdb->ip_tree);
1244         ctdb->ip_tree = trbt_create(ctdb, 0);
1245
1246         for (i=0; i < ctdb->num_nodes; i++) {
1247
1248                 if (ctdb->nodes[i]->flags & NODE_FLAGS_DELETED) {
1249                         continue;
1250                 }
1251
1252                 /* there were no public ips for this node */
1253                 if (ipalloc_state->known_public_ips == NULL) {
1254                         continue;
1255                 }
1256
1257                 public_ips = &ipalloc_state->known_public_ips[i];
1258
1259                 for (j=0; j < public_ips->num; j++) {
1260                         struct public_ip_list *tmp_ip;
1261
1262                         tmp_ip = talloc_zero(ctdb->ip_tree, struct public_ip_list);
1263                         CTDB_NO_MEMORY_NULL(ctdb, tmp_ip);
1264                         /* Do not use information about IP addresses hosted
1265                          * on other nodes, it may not be accurate */
1266                         if (public_ips->ip[j].pnn == ctdb->nodes[i]->pnn) {
1267                                 tmp_ip->pnn = public_ips->ip[j].pnn;
1268                         } else {
1269                                 tmp_ip->pnn = -1;
1270                         }
1271                         tmp_ip->addr = public_ips->ip[j].addr;
1272                         tmp_ip->next = NULL;
1273
1274                         trbt_insertarray32_callback(ctdb->ip_tree,
1275                                 IP_KEYLEN, ip_key(&public_ips->ip[j].addr),
1276                                 add_ip_callback,
1277                                 tmp_ip);
1278                 }
1279         }
1280
1281         ip_list = NULL;
1282         trbt_traversearray32(ctdb->ip_tree, IP_KEYLEN, getips_count_callback, &ip_list);
1283
1284         return ip_list;
1285 }
1286
1287 static bool all_nodes_are_disabled(struct ctdb_node_map_old *nodemap)
1288 {
1289         int i;
1290
1291         for (i=0;i<nodemap->num;i++) {
1292                 if (!(nodemap->nodes[i].flags & (NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED))) {
1293                         /* Found one completely healthy node */
1294                         return false;
1295                 }
1296         }
1297
1298         return true;
1299 }
1300
1301 struct get_tunable_callback_data {
1302         const char *tunable;
1303         uint32_t *out;
1304         bool fatal;
1305 };
1306
1307 static void get_tunable_callback(struct ctdb_context *ctdb, uint32_t pnn,
1308                                  int32_t res, TDB_DATA outdata,
1309                                  void *callback)
1310 {
1311         struct get_tunable_callback_data *cd =
1312                 (struct get_tunable_callback_data *)callback;
1313         int size;
1314
1315         if (res != 0) {
1316                 /* Already handled in fail callback */
1317                 return;
1318         }
1319
1320         if (outdata.dsize != sizeof(uint32_t)) {
1321                 DEBUG(DEBUG_ERR,("Wrong size of returned data when reading \"%s\" tunable from node %d. Expected %d bytes but received %d bytes\n",
1322                                  cd->tunable, pnn, (int)sizeof(uint32_t),
1323                                  (int)outdata.dsize));
1324                 cd->fatal = true;
1325                 return;
1326         }
1327
1328         size = talloc_array_length(cd->out);
1329         if (pnn >= size) {
1330                 DEBUG(DEBUG_ERR,("Got %s reply from node %d but nodemap only has %d entries\n",
1331                                  cd->tunable, pnn, size));
1332                 return;
1333         }
1334
1335                 
1336         cd->out[pnn] = *(uint32_t *)outdata.dptr;
1337 }
1338
1339 static void get_tunable_fail_callback(struct ctdb_context *ctdb, uint32_t pnn,
1340                                        int32_t res, TDB_DATA outdata,
1341                                        void *callback)
1342 {
1343         struct get_tunable_callback_data *cd =
1344                 (struct get_tunable_callback_data *)callback;
1345
1346         switch (res) {
1347         case -ETIME:
1348                 DEBUG(DEBUG_ERR,
1349                       ("Timed out getting tunable \"%s\" from node %d\n",
1350                        cd->tunable, pnn));
1351                 cd->fatal = true;
1352                 break;
1353         case -EINVAL:
1354         case -1:
1355                 DEBUG(DEBUG_WARNING,
1356                       ("Tunable \"%s\" not implemented on node %d\n",
1357                        cd->tunable, pnn));
1358                 break;
1359         default:
1360                 DEBUG(DEBUG_ERR,
1361                       ("Unexpected error getting tunable \"%s\" from node %d\n",
1362                        cd->tunable, pnn));
1363                 cd->fatal = true;
1364         }
1365 }
1366
1367 static uint32_t *get_tunable_from_nodes(struct ctdb_context *ctdb,
1368                                         TALLOC_CTX *tmp_ctx,
1369                                         struct ctdb_node_map_old *nodemap,
1370                                         const char *tunable,
1371                                         uint32_t default_value)
1372 {
1373         TDB_DATA data;
1374         struct ctdb_control_get_tunable *t;
1375         uint32_t *nodes;
1376         uint32_t *tvals;
1377         struct get_tunable_callback_data callback_data;
1378         int i;
1379
1380         tvals = talloc_array(tmp_ctx, uint32_t, nodemap->num);
1381         CTDB_NO_MEMORY_NULL(ctdb, tvals);
1382         for (i=0; i<nodemap->num; i++) {
1383                 tvals[i] = default_value;
1384         }
1385                 
1386         callback_data.out = tvals;
1387         callback_data.tunable = tunable;
1388         callback_data.fatal = false;
1389
1390         data.dsize = offsetof(struct ctdb_control_get_tunable, name) + strlen(tunable) + 1;
1391         data.dptr  = talloc_size(tmp_ctx, data.dsize);
1392         t = (struct ctdb_control_get_tunable *)data.dptr;
1393         t->length = strlen(tunable)+1;
1394         memcpy(t->name, tunable, t->length);
1395         nodes = list_of_connected_nodes(ctdb, nodemap, tmp_ctx, true);
1396         if (ctdb_client_async_control(ctdb, CTDB_CONTROL_GET_TUNABLE,
1397                                       nodes, 0, TAKEOVER_TIMEOUT(),
1398                                       false, data,
1399                                       get_tunable_callback,
1400                                       get_tunable_fail_callback,
1401                                       &callback_data) != 0) {
1402                 if (callback_data.fatal) {
1403                         talloc_free(tvals);
1404                         tvals = NULL;
1405                 }
1406         }
1407         talloc_free(nodes);
1408         talloc_free(data.dptr);
1409
1410         return tvals;
1411 }
1412
1413 /* Set internal flags for IP allocation:
1414  *   Clear ip flags
1415  *   Set NOIPTAKOVER ip flags from per-node NoIPTakeover tunable
1416  *   Set NOIPHOST ip flag for each INACTIVE node
1417  *   if all nodes are disabled:
1418  *     Set NOIPHOST ip flags from per-node NoIPHostOnAllDisabled tunable
1419  *   else
1420  *     Set NOIPHOST ip flags for disabled nodes
1421  */
1422 static void set_ipflags_internal(struct ipalloc_state *ipalloc_state,
1423                                  struct ctdb_node_map_old *nodemap,
1424                                  uint32_t *tval_noiptakeover,
1425                                  uint32_t *tval_noiphostonalldisabled)
1426 {
1427         int i;
1428
1429         for (i=0;i<nodemap->num;i++) {
1430                 /* Can not take IPs on node with NoIPTakeover set */
1431                 if (tval_noiptakeover[i] != 0) {
1432                         ipalloc_state->noiptakeover[i] = true;
1433                 }
1434
1435                 /* Can not host IPs on INACTIVE node */
1436                 if (nodemap->nodes[i].flags & NODE_FLAGS_INACTIVE) {
1437                         ipalloc_state->noiphost[i] = true;
1438                 }
1439         }
1440
1441         if (all_nodes_are_disabled(nodemap)) {
1442                 /* If all nodes are disabled, can not host IPs on node
1443                  * with NoIPHostOnAllDisabled set
1444                  */
1445                 for (i=0;i<nodemap->num;i++) {
1446                         if (tval_noiphostonalldisabled[i] != 0) {
1447                                 ipalloc_state->noiphost[i] = true;
1448                         }
1449                 }
1450         } else {
1451                 /* If some nodes are not disabled, then can not host
1452                  * IPs on DISABLED node
1453                  */
1454                 for (i=0;i<nodemap->num;i++) {
1455                         if (nodemap->nodes[i].flags & NODE_FLAGS_DISABLED) {
1456                                 ipalloc_state->noiphost[i] = true;
1457                         }
1458                 }
1459         }
1460 }
1461
1462 static bool set_ipflags(struct ctdb_context *ctdb,
1463                         struct ipalloc_state *ipalloc_state,
1464                         struct ctdb_node_map_old *nodemap)
1465 {
1466         uint32_t *tval_noiptakeover;
1467         uint32_t *tval_noiphostonalldisabled;
1468
1469         tval_noiptakeover = get_tunable_from_nodes(ctdb, ipalloc_state, nodemap,
1470                                                    "NoIPTakeover", 0);
1471         if (tval_noiptakeover == NULL) {
1472                 return false;
1473         }
1474
1475         tval_noiphostonalldisabled =
1476                 get_tunable_from_nodes(ctdb, ipalloc_state, nodemap,
1477                                        "NoIPHostOnAllDisabled", 0);
1478         if (tval_noiphostonalldisabled == NULL) {
1479                 /* Caller frees tmp_ctx */
1480                 return false;
1481         }
1482
1483         set_ipflags_internal(ipalloc_state, nodemap,
1484                              tval_noiptakeover,
1485                              tval_noiphostonalldisabled);
1486
1487         talloc_free(tval_noiptakeover);
1488         talloc_free(tval_noiphostonalldisabled);
1489
1490         return true;
1491 }
1492
1493 static struct ipalloc_state * ipalloc_state_init(struct ctdb_context *ctdb,
1494                                                  TALLOC_CTX *mem_ctx)
1495 {
1496         struct ipalloc_state *ipalloc_state =
1497                 talloc_zero(mem_ctx, struct ipalloc_state);
1498         if (ipalloc_state == NULL) {
1499                 DEBUG(DEBUG_ERR, (__location__ " Out of memory\n"));
1500                 return NULL;
1501         }
1502
1503         ipalloc_state->num = ctdb->num_nodes;
1504
1505         ipalloc_state->known_public_ips =
1506                 talloc_zero_array(ipalloc_state,
1507                                   struct ctdb_public_ip_list,
1508                                   ipalloc_state->num);
1509         if (ipalloc_state->known_public_ips == NULL) {
1510                 DEBUG(DEBUG_ERR, (__location__ " Out of memory\n"));
1511                 goto fail;
1512         }
1513
1514         ipalloc_state->available_public_ips =
1515                 talloc_zero_array(ipalloc_state,
1516                                   struct ctdb_public_ip_list,
1517                                   ipalloc_state->num);
1518         if (ipalloc_state->available_public_ips == NULL) {
1519                 DEBUG(DEBUG_ERR, (__location__ " Out of memory\n"));
1520                 goto fail;
1521         }
1522         ipalloc_state->noiptakeover =
1523                 talloc_zero_array(ipalloc_state,
1524                                   bool,
1525                                   ipalloc_state->num);
1526         if (ipalloc_state->noiptakeover == NULL) {
1527                 DEBUG(DEBUG_ERR, (__location__ " Out of memory\n"));
1528                 goto fail;
1529         }
1530         ipalloc_state->noiphost =
1531                 talloc_zero_array(ipalloc_state,
1532                                   bool,
1533                                   ipalloc_state->num);
1534         if (ipalloc_state->noiphost == NULL) {
1535                 DEBUG(DEBUG_ERR, (__location__ " Out of memory\n"));
1536                 goto fail;
1537         }
1538
1539         if (1 == ctdb->tunable.lcp2_public_ip_assignment) {
1540                 ipalloc_state->algorithm = IPALLOC_LCP2;
1541         } else if (1 == ctdb->tunable.deterministic_public_ips) {
1542                 ipalloc_state->algorithm = IPALLOC_DETERMINISTIC;
1543         } else {
1544                 ipalloc_state->algorithm = IPALLOC_NONDETERMINISTIC;
1545         }
1546
1547         ipalloc_state->no_ip_failback = ctdb->tunable.no_ip_failback;
1548
1549         return ipalloc_state;
1550 fail:
1551         talloc_free(ipalloc_state);
1552         return NULL;
1553 }
1554
1555 struct takeover_callback_data {
1556         uint32_t num_nodes;
1557         bool *node_failed;
1558         client_async_callback fail_callback;
1559         void *fail_callback_data;
1560 };
1561
1562 static struct takeover_callback_data *
1563 takeover_callback_data_init(TALLOC_CTX *mem_ctx,
1564                             uint32_t num_nodes,
1565                             client_async_callback fail_callback,
1566                             void *callback_data)
1567 {
1568         static struct takeover_callback_data *takeover_data;
1569
1570         takeover_data = talloc_zero(mem_ctx, struct takeover_callback_data);
1571         if (takeover_data == NULL) {
1572                 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
1573                 return NULL;
1574         }
1575
1576         takeover_data->node_failed = talloc_zero_array(takeover_data,
1577                                                        bool, num_nodes);
1578         if (takeover_data->node_failed == NULL) {
1579                 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
1580                 talloc_free(takeover_data);
1581                 return NULL;
1582         }
1583
1584         takeover_data->num_nodes = num_nodes;
1585         takeover_data->fail_callback = fail_callback;
1586         takeover_data->fail_callback_data = callback_data;
1587
1588         return takeover_data;
1589 }
1590
1591 static void takeover_run_fail_callback(struct ctdb_context *ctdb,
1592                                        uint32_t node_pnn, int32_t res,
1593                                        TDB_DATA outdata, void *callback_data)
1594 {
1595         struct takeover_callback_data *cd =
1596                 talloc_get_type_abort(callback_data,
1597                                       struct takeover_callback_data);
1598
1599         if (node_pnn >= cd->num_nodes) {
1600                 DEBUG(DEBUG_ERR, (__location__ " invalid PNN %u\n", node_pnn));
1601                 return;
1602         }
1603
1604         if (!cd->node_failed[node_pnn]) {
1605                 cd->node_failed[node_pnn] = true;
1606                 cd->fail_callback(ctdb, node_pnn, res, outdata,
1607                                   cd->fail_callback_data);
1608         }
1609 }
1610
1611 /*
1612  * Recalculate the allocation of public IPs to nodes and have the
1613  * nodes host their allocated addresses.
1614  *
1615  * - Allocate memory for IP allocation state, including per node
1616  *   arrays
1617  * - Populate IP allocation algorithm in IP allocation state
1618  * - Populate local value of tunable NoIPFailback in IP allocation
1619      state - this is really a cluster-wide configuration variable and
1620      only the value form the master node is used
1621  * - Retrieve tunables NoIPTakeover and NoIPHostOnAllDisabled from all
1622  *   connected nodes - this is done separately so tunable values can
1623  *   be faked in unit testing
1624  * - Populate NoIPTakover tunable in IP allocation state
1625  * - Populate NoIPHost in IP allocation state, derived from node flags
1626  *   and NoIPHostOnAllDisabled tunable
1627  * - Retrieve and populate known and available IP lists in IP
1628  *   allocation state
1629  * - If no available IP addresses then early exit
1630  * - Build list of (known IPs, currently assigned node)
1631  * - Populate list of nodes to force rebalance - internal structure,
1632  *   currently no way to fetch, only used by LCP2 for nodes that have
1633  *   had new IP addresses added
1634  * - Run IP allocation algorithm
1635  * - Send RELEASE_IP to all nodes for IPs they should not host
1636  * - Send TAKE_IP to all nodes for IPs they should host
1637  * - Send IPREALLOCATED to all nodes (with backward compatibility hack)
1638  */
1639 int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map_old *nodemap,
1640                       uint32_t *force_rebalance_nodes,
1641                       client_async_callback fail_callback, void *callback_data)
1642 {
1643         int i, ret;
1644         struct ctdb_public_ip ip;
1645         uint32_t *nodes;
1646         struct public_ip_list *all_ips, *tmp_ip;
1647         TDB_DATA data;
1648         struct timeval timeout;
1649         struct client_async_data *async_data;
1650         struct ctdb_client_control_state *state;
1651         TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
1652         struct ipalloc_state *ipalloc_state;
1653         struct takeover_callback_data *takeover_data;
1654         bool can_host_ips;
1655
1656         /*
1657          * ip failover is completely disabled, just send out the 
1658          * ipreallocated event.
1659          */
1660         if (ctdb->tunable.disable_ip_failover != 0) {
1661                 goto ipreallocated;
1662         }
1663
1664         ipalloc_state = ipalloc_state_init(ctdb, tmp_ctx);
1665         if (ipalloc_state == NULL) {
1666                 talloc_free(tmp_ctx);
1667                 return -1;
1668         }
1669
1670         if (!set_ipflags(ctdb, ipalloc_state, nodemap)) {
1671                 DEBUG(DEBUG_ERR,("Failed to set IP flags - aborting takeover run\n"));
1672                 talloc_free(tmp_ctx);
1673                 return -1;
1674         }
1675
1676         /* Fetch known/available public IPs from each active node */
1677         ret = ctdb_reload_remote_public_ips(ctdb, ipalloc_state, nodemap);
1678         if (ret != 0) {
1679                 talloc_free(tmp_ctx);
1680                 return -1;
1681         }
1682
1683         /* Short-circuit IP allocation if no node has available IPs */
1684         can_host_ips = false;
1685         for (i=0; i < ipalloc_state->num; i++) {
1686                 if (ipalloc_state->available_public_ips[i].num != 0) {
1687                         can_host_ips = true;
1688                 }
1689         }
1690         if (!can_host_ips) {
1691                 DEBUG(DEBUG_WARNING,("No nodes available to host public IPs yet\n"));
1692                 goto ipreallocated;
1693         }
1694
1695         /* since nodes only know about those public addresses that
1696            can be served by that particular node, no single node has
1697            a full list of all public addresses that exist in the cluster.
1698            Walk over all node structures and create a merged list of
1699            all public addresses that exist in the cluster.
1700
1701            keep the tree of ips around as ctdb->ip_tree
1702         */
1703         all_ips = create_merged_ip_list(ctdb, ipalloc_state);
1704         ipalloc_state->all_ips = all_ips;
1705
1706         ipalloc_state->force_rebalance_nodes = force_rebalance_nodes;
1707
1708         /* Do the IP reassignment calculations */
1709         ipalloc(ipalloc_state);
1710
1711         /* Now tell all nodes to release any public IPs should not
1712          * host.  This will be a NOOP on nodes that don't currently
1713          * hold the given IP.
1714          */
1715         takeover_data = takeover_callback_data_init(tmp_ctx,
1716                                                     nodemap->num,
1717                                                     fail_callback,
1718                                                     callback_data);
1719         if (takeover_data == NULL) {
1720                 talloc_free(tmp_ctx);
1721                 return -1;
1722         }
1723
1724         async_data = talloc_zero(tmp_ctx, struct client_async_data);
1725         CTDB_NO_MEMORY_FATAL(ctdb, async_data);
1726
1727         async_data->fail_callback = takeover_run_fail_callback;
1728         async_data->callback_data = takeover_data;
1729
1730         ZERO_STRUCT(ip); /* Avoid valgrind warnings for union */
1731
1732         /* Send a RELEASE_IP to all nodes that should not be hosting
1733          * each IP.  For each IP, all but one of these will be
1734          * redundant.  However, the redundant ones are used to tell
1735          * nodes which node should be hosting the IP so that commands
1736          * like "ctdb ip" can display a particular nodes idea of who
1737          * is hosting what. */
1738         for (i=0;i<nodemap->num;i++) {
1739                 /* don't talk to unconnected nodes, but do talk to banned nodes */
1740                 if (nodemap->nodes[i].flags & NODE_FLAGS_DISCONNECTED) {
1741                         continue;
1742                 }
1743
1744                 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1745                         if (tmp_ip->pnn == nodemap->nodes[i].pnn) {
1746                                 /* This node should be serving this
1747                                    vnn so don't tell it to release the ip
1748                                 */
1749                                 continue;
1750                         }
1751                         ip.pnn  = tmp_ip->pnn;
1752                         ip.addr = tmp_ip->addr;
1753
1754                         timeout = TAKEOVER_TIMEOUT();
1755                         data.dsize = sizeof(ip);
1756                         data.dptr  = (uint8_t *)&ip;
1757                         state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
1758                                                   0, CTDB_CONTROL_RELEASE_IP, 0,
1759                                                   data, async_data,
1760                                                   &timeout, NULL);
1761                         if (state == NULL) {
1762                                 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_RELEASE_IP to node %u\n", nodemap->nodes[i].pnn));
1763                                 talloc_free(tmp_ctx);
1764                                 return -1;
1765                         }
1766
1767                         ctdb_client_async_add(async_data, state);
1768                 }
1769         }
1770         if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1771                 DEBUG(DEBUG_ERR,(__location__ " Async control CTDB_CONTROL_RELEASE_IP failed\n"));
1772                 talloc_free(tmp_ctx);
1773                 return -1;
1774         }
1775         talloc_free(async_data);
1776
1777
1778         /* For each IP, send a TAKOVER_IP to the node that should be
1779          * hosting it.  Many of these will often be redundant (since
1780          * the allocation won't have changed) but they can be useful
1781          * to recover from inconsistencies. */
1782         async_data = talloc_zero(tmp_ctx, struct client_async_data);
1783         CTDB_NO_MEMORY_FATAL(ctdb, async_data);
1784
1785         async_data->fail_callback = fail_callback;
1786         async_data->callback_data = callback_data;
1787
1788         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1789                 if (tmp_ip->pnn == -1) {
1790                         /* this IP won't be taken over */
1791                         continue;
1792                 }
1793
1794                 ip.pnn  = tmp_ip->pnn;
1795                 ip.addr = tmp_ip->addr;
1796
1797                 timeout = TAKEOVER_TIMEOUT();
1798                 data.dsize = sizeof(ip);
1799                 data.dptr  = (uint8_t *)&ip;
1800                 state = ctdb_control_send(ctdb, tmp_ip->pnn,
1801                                           0, CTDB_CONTROL_TAKEOVER_IP, 0,
1802                                           data, async_data, &timeout, NULL);
1803                 if (state == NULL) {
1804                         DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_TAKEOVER_IP to node %u\n", tmp_ip->pnn));
1805                         talloc_free(tmp_ctx);
1806                         return -1;
1807                 }
1808
1809                 ctdb_client_async_add(async_data, state);
1810         }
1811         if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1812                 DEBUG(DEBUG_ERR,(__location__ " Async control CTDB_CONTROL_TAKEOVER_IP failed\n"));
1813                 talloc_free(tmp_ctx);
1814                 return -1;
1815         }
1816
1817 ipreallocated:
1818         /*
1819          * Tell all nodes to run eventscripts to process the
1820          * "ipreallocated" event.  This can do a lot of things,
1821          * including restarting services to reconfigure them if public
1822          * IPs have moved.  Once upon a time this event only used to
1823          * update natgw.
1824          */
1825         nodes = list_of_connected_nodes(ctdb, nodemap, tmp_ctx, true);
1826         ret = ctdb_client_async_control(ctdb, CTDB_CONTROL_IPREALLOCATED,
1827                                         nodes, 0, TAKEOVER_TIMEOUT(),
1828                                         false, tdb_null,
1829                                         NULL, fail_callback,
1830                                         &callback_data);
1831         if (ret != 0) {
1832                 DEBUG(DEBUG_ERR,
1833                       ("Async CTDB_CONTROL_IPREALLOCATED control failed\n"));
1834         }
1835
1836         talloc_free(tmp_ctx);
1837         return ret;
1838 }
1839
1840
1841 /*
1842   destroy a ctdb_client_ip structure
1843  */
1844 static int ctdb_client_ip_destructor(struct ctdb_client_ip *ip)
1845 {
1846         DEBUG(DEBUG_DEBUG,("destroying client tcp for %s:%u (client_id %u)\n",
1847                 ctdb_addr_to_str(&ip->addr),
1848                 ntohs(ip->addr.ip.sin_port),
1849                 ip->client_id));
1850
1851         DLIST_REMOVE(ip->ctdb->client_ip_list, ip);
1852         return 0;
1853 }
1854
1855 /*
1856   called by a client to inform us of a TCP connection that it is managing
1857   that should tickled with an ACK when IP takeover is done
1858  */
1859 int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id,
1860                                 TDB_DATA indata)
1861 {
1862         struct ctdb_client *client = reqid_find(ctdb->idr, client_id, struct ctdb_client);
1863         struct ctdb_connection *tcp_sock = NULL;
1864         struct ctdb_tcp_list *tcp;
1865         struct ctdb_connection t;
1866         int ret;
1867         TDB_DATA data;
1868         struct ctdb_client_ip *ip;
1869         struct ctdb_vnn *vnn;
1870         ctdb_sock_addr addr;
1871
1872         /* If we don't have public IPs, tickles are useless */
1873         if (ctdb->vnn == NULL) {
1874                 return 0;
1875         }
1876
1877         tcp_sock = (struct ctdb_connection *)indata.dptr;
1878
1879         addr = tcp_sock->src;
1880         ctdb_canonicalize_ip(&addr,  &tcp_sock->src);
1881         addr = tcp_sock->dst;
1882         ctdb_canonicalize_ip(&addr, &tcp_sock->dst);
1883
1884         ZERO_STRUCT(addr);
1885         memcpy(&addr, &tcp_sock->dst, sizeof(addr));
1886         vnn = find_public_ip_vnn(ctdb, &addr);
1887         if (vnn == NULL) {
1888                 switch (addr.sa.sa_family) {
1889                 case AF_INET:
1890                         if (ntohl(addr.ip.sin_addr.s_addr) != INADDR_LOOPBACK) {
1891                                 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public address.\n", 
1892                                         ctdb_addr_to_str(&addr)));
1893                         }
1894                         break;
1895                 case AF_INET6:
1896                         DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public ipv6 address.\n", 
1897                                 ctdb_addr_to_str(&addr)));
1898                         break;
1899                 default:
1900                         DEBUG(DEBUG_ERR,(__location__ " Unknown family type %d\n", addr.sa.sa_family));
1901                 }
1902
1903                 return 0;
1904         }
1905
1906         if (vnn->pnn != ctdb->pnn) {
1907                 DEBUG(DEBUG_ERR,("Attempt to register tcp client for IP %s we don't hold - failing (client_id %u pid %u)\n",
1908                         ctdb_addr_to_str(&addr),
1909                         client_id, client->pid));
1910                 /* failing this call will tell smbd to die */
1911                 return -1;
1912         }
1913
1914         ip = talloc(client, struct ctdb_client_ip);
1915         CTDB_NO_MEMORY(ctdb, ip);
1916
1917         ip->ctdb      = ctdb;
1918         ip->addr      = addr;
1919         ip->client_id = client_id;
1920         talloc_set_destructor(ip, ctdb_client_ip_destructor);
1921         DLIST_ADD(ctdb->client_ip_list, ip);
1922
1923         tcp = talloc(client, struct ctdb_tcp_list);
1924         CTDB_NO_MEMORY(ctdb, tcp);
1925
1926         tcp->connection.src = tcp_sock->src;
1927         tcp->connection.dst = tcp_sock->dst;
1928
1929         DLIST_ADD(client->tcp_list, tcp);
1930
1931         t.src = tcp_sock->src;
1932         t.dst = tcp_sock->dst;
1933
1934         data.dptr = (uint8_t *)&t;
1935         data.dsize = sizeof(t);
1936
1937         switch (addr.sa.sa_family) {
1938         case AF_INET:
1939                 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1940                         (unsigned)ntohs(tcp_sock->dst.ip.sin_port),
1941                         ctdb_addr_to_str(&tcp_sock->src),
1942                         (unsigned)ntohs(tcp_sock->src.ip.sin_port), client_id, client->pid));
1943                 break;
1944         case AF_INET6:
1945                 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1946                         (unsigned)ntohs(tcp_sock->dst.ip6.sin6_port),
1947                         ctdb_addr_to_str(&tcp_sock->src),
1948                         (unsigned)ntohs(tcp_sock->src.ip6.sin6_port), client_id, client->pid));
1949                 break;
1950         default:
1951                 DEBUG(DEBUG_ERR,(__location__ " Unknown family %d\n", addr.sa.sa_family));
1952         }
1953
1954
1955         /* tell all nodes about this tcp connection */
1956         ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0, 
1957                                        CTDB_CONTROL_TCP_ADD,
1958                                        0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
1959         if (ret != 0) {
1960                 DEBUG(DEBUG_ERR,(__location__ " Failed to send CTDB_CONTROL_TCP_ADD\n"));
1961                 return -1;
1962         }
1963
1964         return 0;
1965 }
1966
1967 /*
1968   find a tcp address on a list
1969  */
1970 static struct ctdb_connection *ctdb_tcp_find(struct ctdb_tcp_array *array,
1971                                            struct ctdb_connection *tcp)
1972 {
1973         int i;
1974
1975         if (array == NULL) {
1976                 return NULL;
1977         }
1978
1979         for (i=0;i<array->num;i++) {
1980                 if (ctdb_same_sockaddr(&array->connections[i].src, &tcp->src) &&
1981                     ctdb_same_sockaddr(&array->connections[i].dst, &tcp->dst)) {
1982                         return &array->connections[i];
1983                 }
1984         }
1985         return NULL;
1986 }
1987
1988
1989
1990 /*
1991   called by a daemon to inform us of a TCP connection that one of its
1992   clients managing that should tickled with an ACK when IP takeover is
1993   done
1994  */
1995 int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata, bool tcp_update_needed)
1996 {
1997         struct ctdb_connection *p = (struct ctdb_connection *)indata.dptr;
1998         struct ctdb_tcp_array *tcparray;
1999         struct ctdb_connection tcp;
2000         struct ctdb_vnn *vnn;
2001
2002         /* If we don't have public IPs, tickles are useless */
2003         if (ctdb->vnn == NULL) {
2004                 return 0;
2005         }
2006
2007         vnn = find_public_ip_vnn(ctdb, &p->dst);
2008         if (vnn == NULL) {
2009                 DEBUG(DEBUG_INFO,(__location__ " got TCP_ADD control for an address which is not a public address '%s'\n",
2010                         ctdb_addr_to_str(&p->dst)));
2011
2012                 return -1;
2013         }
2014
2015
2016         tcparray = vnn->tcp_array;
2017
2018         /* If this is the first tickle */
2019         if (tcparray == NULL) {
2020                 tcparray = talloc(vnn, struct ctdb_tcp_array);
2021                 CTDB_NO_MEMORY(ctdb, tcparray);
2022                 vnn->tcp_array = tcparray;
2023
2024                 tcparray->num = 0;
2025                 tcparray->connections = talloc_size(tcparray, sizeof(struct ctdb_connection));
2026                 CTDB_NO_MEMORY(ctdb, tcparray->connections);
2027
2028                 tcparray->connections[tcparray->num].src = p->src;
2029                 tcparray->connections[tcparray->num].dst = p->dst;
2030                 tcparray->num++;
2031
2032                 if (tcp_update_needed) {
2033                         vnn->tcp_update_needed = true;
2034                 }
2035                 return 0;
2036         }
2037
2038
2039         /* Do we already have this tickle ?*/
2040         tcp.src = p->src;
2041         tcp.dst = p->dst;
2042         if (ctdb_tcp_find(tcparray, &tcp) != NULL) {
2043                 DEBUG(DEBUG_DEBUG,("Already had tickle info for %s:%u for vnn:%u\n",
2044                         ctdb_addr_to_str(&tcp.dst),
2045                         ntohs(tcp.dst.ip.sin_port),
2046                         vnn->pnn));
2047                 return 0;
2048         }
2049
2050         /* A new tickle, we must add it to the array */
2051         tcparray->connections = talloc_realloc(tcparray, tcparray->connections,
2052                                         struct ctdb_connection,
2053                                         tcparray->num+1);
2054         CTDB_NO_MEMORY(ctdb, tcparray->connections);
2055
2056         tcparray->connections[tcparray->num].src = p->src;
2057         tcparray->connections[tcparray->num].dst = p->dst;
2058         tcparray->num++;
2059
2060         DEBUG(DEBUG_INFO,("Added tickle info for %s:%u from vnn %u\n",
2061                 ctdb_addr_to_str(&tcp.dst),
2062                 ntohs(tcp.dst.ip.sin_port),
2063                 vnn->pnn));
2064
2065         if (tcp_update_needed) {
2066                 vnn->tcp_update_needed = true;
2067         }
2068
2069         return 0;
2070 }
2071
2072
2073 static void ctdb_remove_connection(struct ctdb_vnn *vnn, struct ctdb_connection *conn)
2074 {
2075         struct ctdb_connection *tcpp;
2076
2077         if (vnn == NULL) {
2078                 return;
2079         }
2080
2081         /* if the array is empty we cant remove it
2082            and we don't need to do anything
2083          */
2084         if (vnn->tcp_array == NULL) {
2085                 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist (array is empty) %s:%u\n",
2086                         ctdb_addr_to_str(&conn->dst),
2087                         ntohs(conn->dst.ip.sin_port)));
2088                 return;
2089         }
2090
2091
2092         /* See if we know this connection
2093            if we don't know this connection  then we dont need to do anything
2094          */
2095         tcpp = ctdb_tcp_find(vnn->tcp_array, conn);
2096         if (tcpp == NULL) {
2097                 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist %s:%u\n",
2098                         ctdb_addr_to_str(&conn->dst),
2099                         ntohs(conn->dst.ip.sin_port)));
2100                 return;
2101         }
2102
2103
2104         /* We need to remove this entry from the array.
2105            Instead of allocating a new array and copying data to it
2106            we cheat and just copy the last entry in the existing array
2107            to the entry that is to be removed and just shring the 
2108            ->num field
2109          */
2110         *tcpp = vnn->tcp_array->connections[vnn->tcp_array->num - 1];
2111         vnn->tcp_array->num--;
2112
2113         /* If we deleted the last entry we also need to remove the entire array
2114          */
2115         if (vnn->tcp_array->num == 0) {
2116                 talloc_free(vnn->tcp_array);
2117                 vnn->tcp_array = NULL;
2118         }               
2119
2120         vnn->tcp_update_needed = true;
2121
2122         DEBUG(DEBUG_INFO,("Removed tickle info for %s:%u\n",
2123                 ctdb_addr_to_str(&conn->src),
2124                 ntohs(conn->src.ip.sin_port)));
2125 }
2126
2127
2128 /*
2129   called by a daemon to inform us of a TCP connection that one of its
2130   clients used are no longer needed in the tickle database
2131  */
2132 int32_t ctdb_control_tcp_remove(struct ctdb_context *ctdb, TDB_DATA indata)
2133 {
2134         struct ctdb_vnn *vnn;
2135         struct ctdb_connection *conn = (struct ctdb_connection *)indata.dptr;
2136
2137         /* If we don't have public IPs, tickles are useless */
2138         if (ctdb->vnn == NULL) {
2139                 return 0;
2140         }
2141
2142         vnn = find_public_ip_vnn(ctdb, &conn->dst);
2143         if (vnn == NULL) {
2144                 DEBUG(DEBUG_ERR,
2145                       (__location__ " unable to find public address %s\n",
2146                        ctdb_addr_to_str(&conn->dst)));
2147                 return 0;
2148         }
2149
2150         ctdb_remove_connection(vnn, conn);
2151
2152         return 0;
2153 }
2154
2155
2156 /*
2157   Called when another daemon starts - causes all tickles for all
2158   public addresses we are serving to be sent to the new node on the
2159   next check.  This actually causes the next scheduled call to
2160   tdb_update_tcp_tickles() to update all nodes.  This is simple and
2161   doesn't require careful error handling.
2162  */
2163 int32_t ctdb_control_startup(struct ctdb_context *ctdb, uint32_t pnn)
2164 {
2165         struct ctdb_vnn *vnn;
2166
2167         DEBUG(DEBUG_INFO, ("Received startup control from node %lu\n",
2168                            (unsigned long) pnn));
2169
2170         for (vnn = ctdb->vnn; vnn != NULL; vnn = vnn->next) {
2171                 vnn->tcp_update_needed = true;
2172         }
2173
2174         return 0;
2175 }
2176
2177
2178 /*
2179   called when a client structure goes away - hook to remove
2180   elements from the tcp_list in all daemons
2181  */
2182 void ctdb_takeover_client_destructor_hook(struct ctdb_client *client)
2183 {
2184         while (client->tcp_list) {
2185                 struct ctdb_vnn *vnn;
2186                 struct ctdb_tcp_list *tcp = client->tcp_list;
2187                 struct ctdb_connection *conn = &tcp->connection;
2188
2189                 DLIST_REMOVE(client->tcp_list, tcp);
2190
2191                 vnn = find_public_ip_vnn(client->ctdb,
2192                                          &conn->dst);
2193                 if (vnn == NULL) {
2194                         DEBUG(DEBUG_ERR,
2195                               (__location__ " unable to find public address %s\n",
2196                                ctdb_addr_to_str(&conn->dst)));
2197                         continue;
2198                 }
2199
2200                 /* If the IP address is hosted on this node then
2201                  * remove the connection. */
2202                 if (vnn->pnn == client->ctdb->pnn) {
2203                         ctdb_remove_connection(vnn, conn);
2204                 }
2205
2206                 /* Otherwise this function has been called because the
2207                  * server IP address has been released to another node
2208                  * and the client has exited.  This means that we
2209                  * should not delete the connection information.  The
2210                  * takeover node processes connections too. */
2211         }
2212 }
2213
2214
2215 void ctdb_release_all_ips(struct ctdb_context *ctdb)
2216 {
2217         struct ctdb_vnn *vnn;
2218         int count = 0;
2219         TDB_DATA data;
2220
2221         if (ctdb->tunable.disable_ip_failover == 1) {
2222                 return;
2223         }
2224
2225         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2226                 if (!ctdb_sys_have_ip(&vnn->public_address)) {
2227                         ctdb_vnn_unassign_iface(ctdb, vnn);
2228                         continue;
2229                 }
2230                 if (!vnn->iface) {
2231                         continue;
2232                 }
2233
2234                 /* Don't allow multiple releases at once.  Some code,
2235                  * particularly ctdb_tickle_sentenced_connections() is
2236                  * not re-entrant */
2237                 if (vnn->update_in_flight) {
2238                         DEBUG(DEBUG_WARNING,
2239                               (__location__
2240                                " Not releasing IP %s/%u on interface %s, an update is already in progess\n",
2241                                     ctdb_addr_to_str(&vnn->public_address),
2242                                     vnn->public_netmask_bits,
2243                                     ctdb_vnn_iface_string(vnn)));
2244                         continue;
2245                 }
2246                 vnn->update_in_flight = true;
2247
2248                 DEBUG(DEBUG_INFO,("Release of IP %s/%u on interface %s node:-1\n",
2249                                     ctdb_addr_to_str(&vnn->public_address),
2250                                     vnn->public_netmask_bits,
2251                                     ctdb_vnn_iface_string(vnn)));
2252
2253                 ctdb_event_script_args(ctdb, CTDB_EVENT_RELEASE_IP, "%s %s %u",
2254                                   ctdb_vnn_iface_string(vnn),
2255                                   ctdb_addr_to_str(&vnn->public_address),
2256                                   vnn->public_netmask_bits);
2257
2258                 data.dptr = (uint8_t *)talloc_strdup(
2259                                 vnn, ctdb_addr_to_str(&vnn->public_address));
2260                 if (data.dptr != NULL) {
2261                         data.dsize = strlen((char *)data.dptr) + 1;
2262                         ctdb_daemon_send_message(ctdb, ctdb->pnn,
2263                                                  CTDB_SRVID_RELEASE_IP, data);
2264                         talloc_free(data.dptr);
2265                 }
2266
2267                 ctdb_vnn_unassign_iface(ctdb, vnn);
2268                 vnn->update_in_flight = false;
2269                 count++;
2270         }
2271
2272         DEBUG(DEBUG_NOTICE,(__location__ " Released %d public IPs\n", count));
2273 }
2274
2275
2276 /*
2277   get list of public IPs
2278  */
2279 int32_t ctdb_control_get_public_ips(struct ctdb_context *ctdb, 
2280                                     struct ctdb_req_control_old *c, TDB_DATA *outdata)
2281 {
2282         int i, num, len;
2283         struct ctdb_public_ip_list_old *ips;
2284         struct ctdb_vnn *vnn;
2285         bool only_available = false;
2286
2287         if (c->flags & CTDB_PUBLIC_IP_FLAGS_ONLY_AVAILABLE) {
2288                 only_available = true;
2289         }
2290
2291         /* count how many public ip structures we have */
2292         num = 0;
2293         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2294                 num++;
2295         }
2296
2297         len = offsetof(struct ctdb_public_ip_list_old, ips) +
2298                 num*sizeof(struct ctdb_public_ip);
2299         ips = talloc_zero_size(outdata, len);
2300         CTDB_NO_MEMORY(ctdb, ips);
2301
2302         i = 0;
2303         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2304                 if (only_available && !ctdb_vnn_available(ctdb, vnn)) {
2305                         continue;
2306                 }
2307                 ips->ips[i].pnn  = vnn->pnn;
2308                 ips->ips[i].addr = vnn->public_address;
2309                 i++;
2310         }
2311         ips->num = i;
2312         len = offsetof(struct ctdb_public_ip_list_old, ips) +
2313                 i*sizeof(struct ctdb_public_ip);
2314
2315         outdata->dsize = len;
2316         outdata->dptr  = (uint8_t *)ips;
2317
2318         return 0;
2319 }
2320
2321
2322 int32_t ctdb_control_get_public_ip_info(struct ctdb_context *ctdb,
2323                                         struct ctdb_req_control_old *c,
2324                                         TDB_DATA indata,
2325                                         TDB_DATA *outdata)
2326 {
2327         int i, num, len;
2328         ctdb_sock_addr *addr;
2329         struct ctdb_public_ip_info_old *info;
2330         struct ctdb_vnn *vnn;
2331
2332         addr = (ctdb_sock_addr *)indata.dptr;
2333
2334         vnn = find_public_ip_vnn(ctdb, addr);
2335         if (vnn == NULL) {
2336                 DEBUG(DEBUG_ERR,(__location__ " Could not get public ip info, "
2337                                  "'%s'not a public address\n",
2338                                  ctdb_addr_to_str(addr)));
2339                 return -1;
2340         }
2341
2342         /* count how many public ip structures we have */
2343         num = 0;
2344         for (;vnn->ifaces[num];) {
2345                 num++;
2346         }
2347
2348         len = offsetof(struct ctdb_public_ip_info_old, ifaces) +
2349                 num*sizeof(struct ctdb_iface);
2350         info = talloc_zero_size(outdata, len);
2351         CTDB_NO_MEMORY(ctdb, info);
2352
2353         info->ip.addr = vnn->public_address;
2354         info->ip.pnn = vnn->pnn;
2355         info->active_idx = 0xFFFFFFFF;
2356
2357         for (i=0; vnn->ifaces[i]; i++) {
2358                 struct ctdb_interface *cur;
2359
2360                 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
2361                 if (cur == NULL) {
2362                         DEBUG(DEBUG_CRIT, (__location__ " internal error iface[%s] unknown\n",
2363                                            vnn->ifaces[i]));
2364                         return -1;
2365                 }
2366                 if (vnn->iface == cur) {
2367                         info->active_idx = i;
2368                 }
2369                 strncpy(info->ifaces[i].name, cur->name,
2370                         sizeof(info->ifaces[i].name));
2371                 info->ifaces[i].name[sizeof(info->ifaces[i].name)-1] = '\0';
2372                 info->ifaces[i].link_state = cur->link_up;
2373                 info->ifaces[i].references = cur->references;
2374         }
2375         info->num = i;
2376         len = offsetof(struct ctdb_public_ip_info_old, ifaces) +
2377                 i*sizeof(struct ctdb_iface);
2378
2379         outdata->dsize = len;
2380         outdata->dptr  = (uint8_t *)info;
2381
2382         return 0;
2383 }
2384
2385 int32_t ctdb_control_get_ifaces(struct ctdb_context *ctdb,
2386                                 struct ctdb_req_control_old *c,
2387                                 TDB_DATA *outdata)
2388 {
2389         int i, num, len;
2390         struct ctdb_iface_list_old *ifaces;
2391         struct ctdb_interface *cur;
2392
2393         /* count how many public ip structures we have */
2394         num = 0;
2395         for (cur=ctdb->ifaces;cur;cur=cur->next) {
2396                 num++;
2397         }
2398
2399         len = offsetof(struct ctdb_iface_list_old, ifaces) +
2400                 num*sizeof(struct ctdb_iface);
2401         ifaces = talloc_zero_size(outdata, len);
2402         CTDB_NO_MEMORY(ctdb, ifaces);
2403
2404         i = 0;
2405         for (cur=ctdb->ifaces;cur;cur=cur->next) {
2406                 strncpy(ifaces->ifaces[i].name, cur->name,
2407                         sizeof(ifaces->ifaces[i].name));
2408                 ifaces->ifaces[i].name[sizeof(ifaces->ifaces[i].name)-1] = '\0';
2409                 ifaces->ifaces[i].link_state = cur->link_up;
2410                 ifaces->ifaces[i].references = cur->references;
2411                 i++;
2412         }
2413         ifaces->num = i;
2414         len = offsetof(struct ctdb_iface_list_old, ifaces) +
2415                 i*sizeof(struct ctdb_iface);
2416
2417         outdata->dsize = len;
2418         outdata->dptr  = (uint8_t *)ifaces;
2419
2420         return 0;
2421 }
2422
2423 int32_t ctdb_control_set_iface_link(struct ctdb_context *ctdb,
2424                                     struct ctdb_req_control_old *c,
2425                                     TDB_DATA indata)
2426 {
2427         struct ctdb_iface *info;
2428         struct ctdb_interface *iface;
2429         bool link_up = false;
2430
2431         info = (struct ctdb_iface *)indata.dptr;
2432
2433         if (info->name[CTDB_IFACE_SIZE] != '\0') {
2434                 int len = strnlen(info->name, CTDB_IFACE_SIZE);
2435                 DEBUG(DEBUG_ERR, (__location__ " name[%*.*s] not terminated\n",
2436                                   len, len, info->name));
2437                 return -1;
2438         }
2439
2440         switch (info->link_state) {
2441         case 0:
2442                 link_up = false;
2443                 break;
2444         case 1:
2445                 link_up = true;
2446                 break;
2447         default:
2448                 DEBUG(DEBUG_ERR, (__location__ " link_state[%u] invalid\n",
2449                                   (unsigned int)info->link_state));
2450                 return -1;
2451         }
2452
2453         if (info->references != 0) {
2454                 DEBUG(DEBUG_ERR, (__location__ " references[%u] should be 0\n",
2455                                   (unsigned int)info->references));
2456                 return -1;
2457         }
2458
2459         iface = ctdb_find_iface(ctdb, info->name);
2460         if (iface == NULL) {
2461                 return -1;
2462         }
2463
2464         if (link_up == iface->link_up) {
2465                 return 0;
2466         }
2467
2468         DEBUG(iface->link_up?DEBUG_ERR:DEBUG_NOTICE,
2469               ("iface[%s] has changed it's link status %s => %s\n",
2470                iface->name,
2471                iface->link_up?"up":"down",
2472                link_up?"up":"down"));
2473
2474         iface->link_up = link_up;
2475         return 0;
2476 }
2477
2478
2479 /*
2480   called by a daemon to inform us of the entire list of TCP tickles for
2481   a particular public address.
2482   this control should only be sent by the node that is currently serving
2483   that public address.
2484  */
2485 int32_t ctdb_control_set_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata)
2486 {
2487         struct ctdb_tickle_list_old *list = (struct ctdb_tickle_list_old *)indata.dptr;
2488         struct ctdb_tcp_array *tcparray;
2489         struct ctdb_vnn *vnn;
2490
2491         /* We must at least have tickles.num or else we cant verify the size
2492            of the received data blob
2493          */
2494         if (indata.dsize < offsetof(struct ctdb_tickle_list_old, connections)) {
2495                 DEBUG(DEBUG_ERR,("Bad indata in ctdb_tickle_list. Not enough data for the tickle.num field\n"));
2496                 return -1;
2497         }
2498
2499         /* verify that the size of data matches what we expect */
2500         if (indata.dsize < offsetof(struct ctdb_tickle_list_old, connections)
2501                          + sizeof(struct ctdb_connection) * list->num) {
2502                 DEBUG(DEBUG_ERR,("Bad indata in ctdb_tickle_list\n"));
2503                 return -1;
2504         }
2505
2506         DEBUG(DEBUG_INFO, ("Received tickle update for public address %s\n",
2507                            ctdb_addr_to_str(&list->addr)));
2508
2509         vnn = find_public_ip_vnn(ctdb, &list->addr);
2510         if (vnn == NULL) {
2511                 DEBUG(DEBUG_INFO,(__location__ " Could not set tcp tickle list, '%s' is not a public address\n",
2512                         ctdb_addr_to_str(&list->addr)));
2513
2514                 return 1;
2515         }
2516
2517         if (vnn->pnn == ctdb->pnn) {
2518                 DEBUG(DEBUG_INFO,
2519                       ("Ignoring redundant set tcp tickle list, this node hosts '%s'\n",
2520                        ctdb_addr_to_str(&list->addr)));
2521                 return 0;
2522         }
2523
2524         /* remove any old ticklelist we might have */
2525         talloc_free(vnn->tcp_array);
2526         vnn->tcp_array = NULL;
2527
2528         tcparray = talloc(vnn, struct ctdb_tcp_array);
2529         CTDB_NO_MEMORY(ctdb, tcparray);
2530
2531         tcparray->num = list->num;
2532
2533         tcparray->connections = talloc_array(tcparray, struct ctdb_connection, tcparray->num);
2534         CTDB_NO_MEMORY(ctdb, tcparray->connections);
2535
2536         memcpy(tcparray->connections, &list->connections[0],
2537                sizeof(struct ctdb_connection)*tcparray->num);
2538
2539         /* We now have a new fresh tickle list array for this vnn */
2540         vnn->tcp_array = tcparray;
2541
2542         return 0;
2543 }
2544
2545 /*
2546   called to return the full list of tickles for the puclic address associated 
2547   with the provided vnn
2548  */
2549 int32_t ctdb_control_get_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata)
2550 {
2551         ctdb_sock_addr *addr = (ctdb_sock_addr *)indata.dptr;
2552         struct ctdb_tickle_list_old *list;
2553         struct ctdb_tcp_array *tcparray;
2554         int num, i;
2555         struct ctdb_vnn *vnn;
2556         unsigned port;
2557
2558         vnn = find_public_ip_vnn(ctdb, addr);
2559         if (vnn == NULL) {
2560                 DEBUG(DEBUG_ERR,(__location__ " Could not get tcp tickle list, '%s' is not a public address\n",
2561                         ctdb_addr_to_str(addr)));
2562
2563                 return 1;
2564         }
2565
2566         port = ctdb_addr_to_port(addr);
2567
2568         tcparray = vnn->tcp_array;
2569         num = 0;
2570         if (tcparray != NULL) {
2571                 if (port == 0) {
2572                         /* All connections */
2573                         num = tcparray->num;
2574                 } else {
2575                         /* Count connections for port */
2576                         for (i = 0; i < tcparray->num; i++) {
2577                                 if (port == ctdb_addr_to_port(&tcparray->connections[i].dst)) {
2578                                         num++;
2579                                 }
2580                         }
2581                 }
2582         }
2583
2584         outdata->dsize = offsetof(struct ctdb_tickle_list_old, connections)
2585                         + sizeof(struct ctdb_connection) * num;
2586
2587         outdata->dptr  = talloc_size(outdata, outdata->dsize);
2588         CTDB_NO_MEMORY(ctdb, outdata->dptr);
2589         list = (struct ctdb_tickle_list_old *)outdata->dptr;
2590
2591         list->addr = *addr;
2592         list->num = num;
2593
2594         if (num == 0) {
2595                 return 0;
2596         }
2597
2598         num = 0;
2599         for (i = 0; i < tcparray->num; i++) {
2600                 if (port == 0 || \
2601                     port == ctdb_addr_to_port(&tcparray->connections[i].dst)) {
2602                         list->connections[num] = tcparray->connections[i];
2603                         num++;
2604                 }
2605         }
2606
2607         return 0;
2608 }
2609
2610
2611 /*
2612   set the list of all tcp tickles for a public address
2613  */
2614 static int ctdb_send_set_tcp_tickles_for_ip(struct ctdb_context *ctdb,
2615                                             ctdb_sock_addr *addr,
2616                                             struct ctdb_tcp_array *tcparray)
2617 {
2618         int ret, num;
2619         TDB_DATA data;
2620         struct ctdb_tickle_list_old *list;
2621
2622         if (tcparray) {
2623                 num = tcparray->num;
2624         } else {
2625                 num = 0;
2626         }
2627
2628         data.dsize = offsetof(struct ctdb_tickle_list_old, connections) +
2629                         sizeof(struct ctdb_connection) * num;
2630         data.dptr = talloc_size(ctdb, data.dsize);
2631         CTDB_NO_MEMORY(ctdb, data.dptr);
2632
2633         list = (struct ctdb_tickle_list_old *)data.dptr;
2634         list->addr = *addr;
2635         list->num = num;
2636         if (tcparray) {
2637                 memcpy(&list->connections[0], tcparray->connections, sizeof(struct ctdb_connection) * num);
2638         }
2639
2640         ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_ALL, 0,
2641                                        CTDB_CONTROL_SET_TCP_TICKLE_LIST,
2642                                        0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
2643         if (ret != 0) {
2644                 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set tcp tickles failed\n"));
2645                 return -1;
2646         }
2647
2648         talloc_free(data.dptr);
2649
2650         return ret;
2651 }
2652
2653
2654 /*
2655   perform tickle updates if required
2656  */
2657 static void ctdb_update_tcp_tickles(struct tevent_context *ev,
2658                                     struct tevent_timer *te,
2659                                     struct timeval t, void *private_data)
2660 {
2661         struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
2662         int ret;
2663         struct ctdb_vnn *vnn;
2664
2665         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2666                 /* we only send out updates for public addresses that 
2667                    we have taken over
2668                  */
2669                 if (ctdb->pnn != vnn->pnn) {
2670                         continue;
2671                 }
2672                 /* We only send out the updates if we need to */
2673                 if (!vnn->tcp_update_needed) {
2674                         continue;
2675                 }
2676                 ret = ctdb_send_set_tcp_tickles_for_ip(ctdb,
2677                                                        &vnn->public_address,
2678                                                        vnn->tcp_array);
2679                 if (ret != 0) {
2680                         DEBUG(DEBUG_ERR,("Failed to send the tickle update for public address %s\n",
2681                                 ctdb_addr_to_str(&vnn->public_address)));
2682                 } else {
2683                         DEBUG(DEBUG_INFO,
2684                               ("Sent tickle update for public address %s\n",
2685                                ctdb_addr_to_str(&vnn->public_address)));
2686                         vnn->tcp_update_needed = false;
2687                 }
2688         }
2689
2690         tevent_add_timer(ctdb->ev, ctdb->tickle_update_context,
2691                          timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
2692                          ctdb_update_tcp_tickles, ctdb);
2693 }
2694
2695 /*
2696   start periodic update of tcp tickles
2697  */
2698 void ctdb_start_tcp_tickle_update(struct ctdb_context *ctdb)
2699 {
2700         ctdb->tickle_update_context = talloc_new(ctdb);
2701
2702         tevent_add_timer(ctdb->ev, ctdb->tickle_update_context,
2703                          timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
2704                          ctdb_update_tcp_tickles, ctdb);
2705 }
2706
2707
2708
2709
2710 struct control_gratious_arp {
2711         struct ctdb_context *ctdb;
2712         ctdb_sock_addr addr;
2713         const char *iface;
2714         int count;
2715 };
2716
2717 /*
2718   send a control_gratuitous arp
2719  */
2720 static void send_gratious_arp(struct tevent_context *ev,
2721                               struct tevent_timer *te,
2722                               struct timeval t, void *private_data)
2723 {
2724         int ret;
2725         struct control_gratious_arp *arp = talloc_get_type(private_data, 
2726                                                         struct control_gratious_arp);
2727
2728         ret = ctdb_sys_send_arp(&arp->addr, arp->iface);
2729         if (ret != 0) {
2730                 DEBUG(DEBUG_ERR,(__location__ " sending of gratious arp on iface '%s' failed (%s)\n",
2731                                  arp->iface, strerror(errno)));
2732         }
2733
2734
2735         arp->count++;
2736         if (arp->count == CTDB_ARP_REPEAT) {
2737                 talloc_free(arp);
2738                 return;
2739         }
2740
2741         tevent_add_timer(arp->ctdb->ev, arp,
2742                          timeval_current_ofs(CTDB_ARP_INTERVAL, 0),
2743                          send_gratious_arp, arp);
2744 }
2745
2746
2747 /*
2748   send a gratious arp 
2749  */
2750 int32_t ctdb_control_send_gratious_arp(struct ctdb_context *ctdb, TDB_DATA indata)
2751 {
2752         struct ctdb_addr_info_old *gratious_arp = (struct ctdb_addr_info_old *)indata.dptr;
2753         struct control_gratious_arp *arp;
2754
2755         /* verify the size of indata */
2756         if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
2757                 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_gratious_arp structure. Got %u require %u bytes\n", 
2758                                  (unsigned)indata.dsize, 
2759                                  (unsigned)offsetof(struct ctdb_addr_info_old, iface)));
2760                 return -1;
2761         }
2762         if (indata.dsize != 
2763                 ( offsetof(struct ctdb_addr_info_old, iface)
2764                 + gratious_arp->len ) ){
2765
2766                 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2767                         "but should be %u bytes\n", 
2768                          (unsigned)indata.dsize, 
2769                          (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+gratious_arp->len)));
2770                 return -1;
2771         }
2772
2773
2774         arp = talloc(ctdb, struct control_gratious_arp);
2775         CTDB_NO_MEMORY(ctdb, arp);
2776
2777         arp->ctdb  = ctdb;
2778         arp->addr   = gratious_arp->addr;
2779         arp->iface = talloc_strdup(arp, gratious_arp->iface);
2780         CTDB_NO_MEMORY(ctdb, arp->iface);
2781         arp->count = 0;
2782
2783         tevent_add_timer(arp->ctdb->ev, arp,
2784                          timeval_zero(), send_gratious_arp, arp);
2785
2786         return 0;
2787 }
2788
2789 int32_t ctdb_control_add_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2790 {
2791         struct ctdb_addr_info_old *pub = (struct ctdb_addr_info_old *)indata.dptr;
2792         int ret;
2793
2794         /* verify the size of indata */
2795         if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
2796                 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_addr_info structure\n"));
2797                 return -1;
2798         }
2799         if (indata.dsize != 
2800                 ( offsetof(struct ctdb_addr_info_old, iface)
2801                 + pub->len ) ){
2802
2803                 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2804                         "but should be %u bytes\n", 
2805                          (unsigned)indata.dsize, 
2806                          (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+pub->len)));
2807                 return -1;
2808         }
2809
2810         DEBUG(DEBUG_NOTICE,("Add IP %s\n", ctdb_addr_to_str(&pub->addr)));
2811
2812         ret = ctdb_add_public_address(ctdb, &pub->addr, pub->mask, &pub->iface[0], true);
2813
2814         if (ret != 0) {
2815                 DEBUG(DEBUG_ERR,(__location__ " Failed to add public address\n"));
2816                 return -1;
2817         }
2818
2819         return 0;
2820 }
2821
2822 struct delete_ip_callback_state {
2823         struct ctdb_req_control_old *c;
2824 };
2825
2826 /*
2827   called when releaseip event finishes for del_public_address
2828  */
2829 static void delete_ip_callback(struct ctdb_context *ctdb,
2830                                int32_t status, TDB_DATA data,
2831                                const char *errormsg,
2832                                void *private_data)
2833 {
2834         struct delete_ip_callback_state *state =
2835                 talloc_get_type(private_data, struct delete_ip_callback_state);
2836
2837         /* If release failed then fail. */
2838         ctdb_request_control_reply(ctdb, state->c, NULL, status, errormsg);
2839         talloc_free(private_data);
2840 }
2841
2842 int32_t ctdb_control_del_public_address(struct ctdb_context *ctdb,
2843                                         struct ctdb_req_control_old *c,
2844                                         TDB_DATA indata, bool *async_reply)
2845 {
2846         struct ctdb_addr_info_old *pub = (struct ctdb_addr_info_old *)indata.dptr;
2847         struct ctdb_vnn *vnn;
2848
2849         /* verify the size of indata */
2850         if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
2851                 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_addr_info structure\n"));
2852                 return -1;
2853         }
2854         if (indata.dsize != 
2855                 ( offsetof(struct ctdb_addr_info_old, iface)
2856                 + pub->len ) ){
2857
2858                 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2859                         "but should be %u bytes\n", 
2860                          (unsigned)indata.dsize, 
2861                          (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+pub->len)));
2862                 return -1;
2863         }
2864
2865         DEBUG(DEBUG_NOTICE,("Delete IP %s\n", ctdb_addr_to_str(&pub->addr)));
2866
2867         /* walk over all public addresses until we find a match */
2868         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2869                 if (ctdb_same_ip(&vnn->public_address, &pub->addr)) {
2870                         if (vnn->pnn == ctdb->pnn) {
2871                                 struct delete_ip_callback_state *state;
2872                                 struct ctdb_public_ip *ip;
2873                                 TDB_DATA data;
2874                                 int ret;
2875
2876                                 vnn->delete_pending = true;
2877
2878                                 state = talloc(ctdb,
2879                                                struct delete_ip_callback_state);
2880                                 CTDB_NO_MEMORY(ctdb, state);
2881                                 state->c = c;
2882
2883                                 ip = talloc(state, struct ctdb_public_ip);
2884                                 if (ip == NULL) {
2885                                         DEBUG(DEBUG_ERR,
2886                                               (__location__ " Out of memory\n"));
2887                                         talloc_free(state);
2888                                         return -1;
2889                                 }
2890                                 ip->pnn = -1;
2891                                 ip->addr = pub->addr;
2892
2893                                 data.dsize = sizeof(struct ctdb_public_ip);
2894                                 data.dptr = (unsigned char *)ip;
2895
2896                                 ret = ctdb_daemon_send_control(ctdb,
2897                                                                ctdb_get_pnn(ctdb),
2898                                                                0,
2899                                                                CTDB_CONTROL_RELEASE_IP,
2900                                                                0, 0,
2901                                                                data,
2902                                                                delete_ip_callback,
2903                                                                state);
2904                                 if (ret == -1) {
2905                                         DEBUG(DEBUG_ERR,
2906                                               (__location__ "Unable to send "
2907                                                "CTDB_CONTROL_RELEASE_IP\n"));
2908                                         talloc_free(state);
2909                                         return -1;
2910                                 }
2911
2912                                 state->c = talloc_steal(state, c);
2913                                 *async_reply = true;
2914                         } else {
2915                                 /* This IP is not hosted on the
2916                                  * current node so just delete it
2917                                  * now. */
2918                                 do_delete_ip(ctdb, vnn);
2919                         }
2920
2921                         return 0;
2922                 }
2923         }
2924
2925         DEBUG(DEBUG_ERR,("Delete IP of unknown public IP address %s\n",
2926                          ctdb_addr_to_str(&pub->addr)));
2927         return -1;
2928 }
2929
2930
2931 struct ipreallocated_callback_state {
2932         struct ctdb_req_control_old *c;
2933 };
2934
2935 static void ctdb_ipreallocated_callback(struct ctdb_context *ctdb,
2936                                         int status, void *p)
2937 {
2938         struct ipreallocated_callback_state *state =
2939                 talloc_get_type(p, struct ipreallocated_callback_state);
2940
2941         if (status != 0) {
2942                 DEBUG(DEBUG_ERR,
2943                       (" \"ipreallocated\" event script failed (status %d)\n",
2944                        status));
2945                 if (status == -ETIME) {
2946                         ctdb_ban_self(ctdb);
2947                 }
2948         }
2949
2950         ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
2951         talloc_free(state);
2952 }
2953
2954 /* A control to run the ipreallocated event */
2955 int32_t ctdb_control_ipreallocated(struct ctdb_context *ctdb,
2956                                    struct ctdb_req_control_old *c,
2957                                    bool *async_reply)
2958 {
2959         int ret;
2960         struct ipreallocated_callback_state *state;
2961
2962         state = talloc(ctdb, struct ipreallocated_callback_state);
2963         CTDB_NO_MEMORY(ctdb, state);
2964
2965         DEBUG(DEBUG_INFO,(__location__ " Running \"ipreallocated\" event\n"));
2966
2967         ret = ctdb_event_script_callback(ctdb, state,
2968                                          ctdb_ipreallocated_callback, state,
2969                                          CTDB_EVENT_IPREALLOCATED,
2970                                          "%s", "");
2971
2972         if (ret != 0) {
2973                 DEBUG(DEBUG_ERR,("Failed to run \"ipreallocated\" event \n"));
2974                 talloc_free(state);
2975                 return -1;
2976         }
2977
2978         /* tell the control that we will be reply asynchronously */
2979         state->c    = talloc_steal(state, c);
2980         *async_reply = true;
2981
2982         return 0;
2983 }
2984
2985
2986 /* This function is called from the recovery daemon to verify that a remote
2987    node has the expected ip allocation.
2988    This is verified against ctdb->ip_tree
2989 */
2990 static int verify_remote_ip_allocation(struct ctdb_context *ctdb,
2991                                        struct ctdb_public_ip_list *ips,
2992                                        uint32_t pnn)
2993 {
2994         struct public_ip_list *tmp_ip;
2995         int i;
2996
2997         if (ctdb->ip_tree == NULL) {
2998                 /* don't know the expected allocation yet, assume remote node
2999                    is correct. */
3000                 return 0;
3001         }
3002
3003         if (ips == NULL) {
3004                 return 0;
3005         }
3006
3007         for (i=0; i<ips->num; i++) {
3008                 tmp_ip = trbt_lookuparray32(ctdb->ip_tree, IP_KEYLEN, ip_key(&ips->ip[i].addr));
3009                 if (tmp_ip == NULL) {
3010                         DEBUG(DEBUG_ERR,("Node %u has new or unknown public IP %s\n", pnn, ctdb_addr_to_str(&ips->ip[i].addr)));
3011                         return -1;
3012                 }
3013
3014                 if (tmp_ip->pnn == -1 || ips->ip[i].pnn == -1) {
3015                         continue;
3016                 }
3017
3018                 if (tmp_ip->pnn != ips->ip[i].pnn) {
3019                         DEBUG(DEBUG_ERR,
3020                               ("Inconsistent IP allocation - node %u thinks %s is held by node %u while it is assigned to node %u\n",
3021                                pnn,
3022                                ctdb_addr_to_str(&ips->ip[i].addr),
3023                                ips->ip[i].pnn, tmp_ip->pnn));
3024                         return -1;
3025                 }
3026         }
3027
3028         return 0;
3029 }
3030
3031 int update_ip_assignment_tree(struct ctdb_context *ctdb, struct ctdb_public_ip *ip)
3032 {
3033         struct public_ip_list *tmp_ip;
3034
3035         /* IP tree is never built if DisableIPFailover is set */
3036         if (ctdb->tunable.disable_ip_failover != 0) {
3037                 return 0;
3038         }
3039
3040         if (ctdb->ip_tree == NULL) {
3041                 DEBUG(DEBUG_ERR,("No ctdb->ip_tree yet. Failed to update ip assignment\n"));
3042                 return -1;
3043         }
3044
3045         tmp_ip = trbt_lookuparray32(ctdb->ip_tree, IP_KEYLEN, ip_key(&ip->addr));
3046         if (tmp_ip == NULL) {
3047                 DEBUG(DEBUG_ERR,(__location__ " Could not find record for address %s, update ip\n", ctdb_addr_to_str(&ip->addr)));
3048                 return -1;
3049         }
3050
3051         DEBUG(DEBUG_NOTICE,("Updated ip assignment tree for ip : %s from node %u to node %u\n", ctdb_addr_to_str(&ip->addr), tmp_ip->pnn, ip->pnn));
3052         tmp_ip->pnn = ip->pnn;
3053
3054         return 0;
3055 }
3056
3057 void clear_ip_assignment_tree(struct ctdb_context *ctdb)
3058 {
3059         TALLOC_FREE(ctdb->ip_tree);
3060 }
3061
3062 struct ctdb_reloadips_handle {
3063         struct ctdb_context *ctdb;
3064         struct ctdb_req_control_old *c;
3065         int status;
3066         int fd[2];
3067         pid_t child;
3068         struct tevent_fd *fde;
3069 };
3070
3071 static int ctdb_reloadips_destructor(struct ctdb_reloadips_handle *h)
3072 {
3073         if (h == h->ctdb->reload_ips) {
3074                 h->ctdb->reload_ips = NULL;
3075         }
3076         if (h->c != NULL) {
3077                 ctdb_request_control_reply(h->ctdb, h->c, NULL, h->status, NULL);
3078                 h->c = NULL;
3079         }
3080         ctdb_kill(h->ctdb, h->child, SIGKILL);
3081         return 0;
3082 }
3083
3084 static void ctdb_reloadips_timeout_event(struct tevent_context *ev,
3085                                          struct tevent_timer *te,
3086                                          struct timeval t, void *private_data)
3087 {
3088         struct ctdb_reloadips_handle *h = talloc_get_type(private_data, struct ctdb_reloadips_handle);
3089
3090         talloc_free(h);
3091 }
3092
3093 static void ctdb_reloadips_child_handler(struct tevent_context *ev,
3094                                          struct tevent_fd *fde,
3095                                          uint16_t flags, void *private_data)
3096 {
3097         struct ctdb_reloadips_handle *h = talloc_get_type(private_data, struct ctdb_reloadips_handle);
3098
3099         char res;
3100         int ret;
3101
3102         ret = sys_read(h->fd[0], &res, 1);
3103         if (ret < 1 || res != 0) {
3104                 DEBUG(DEBUG_ERR, (__location__ " Reloadips child process returned error\n"));
3105                 res = 1;
3106         }
3107         h->status = res;
3108
3109         talloc_free(h);
3110 }
3111
3112 static int ctdb_reloadips_child(struct ctdb_context *ctdb)
3113 {
3114         TALLOC_CTX *mem_ctx = talloc_new(NULL);
3115         struct ctdb_public_ip_list_old *ips;
3116         struct ctdb_vnn *vnn;
3117         struct client_async_data *async_data;
3118         struct timeval timeout;
3119         TDB_DATA data;
3120         struct ctdb_client_control_state *state;
3121         bool first_add;
3122         int i, ret;
3123
3124         CTDB_NO_MEMORY(ctdb, mem_ctx);
3125
3126         /* Read IPs from local node */
3127         ret = ctdb_ctrl_get_public_ips(ctdb, TAKEOVER_TIMEOUT(),
3128                                        CTDB_CURRENT_NODE, mem_ctx, &ips);
3129         if (ret != 0) {
3130                 DEBUG(DEBUG_ERR,
3131                       ("Unable to fetch public IPs from local node\n"));
3132                 talloc_free(mem_ctx);
3133                 return -1;
3134         }
3135
3136         /* Read IPs file - this is safe since this is a child process */
3137         ctdb->vnn = NULL;
3138         if (ctdb_set_public_addresses(ctdb, false) != 0) {
3139                 DEBUG(DEBUG_ERR,("Failed to re-read public addresses file\n"));
3140                 talloc_free(mem_ctx);
3141                 return -1;
3142         }
3143
3144         async_data = talloc_zero(mem_ctx, struct client_async_data);
3145         CTDB_NO_MEMORY(ctdb, async_data);
3146
3147         /* Compare IPs between node and file for IPs to be deleted */
3148         for (i = 0; i < ips->num; i++) {
3149                 /* */
3150                 for (vnn = ctdb->vnn; vnn; vnn = vnn->next) {
3151                         if (ctdb_same_ip(&vnn->public_address,
3152                                          &ips->ips[i].addr)) {
3153                                 /* IP is still in file */
3154                                 break;
3155                         }
3156                 }
3157
3158                 if (vnn == NULL) {
3159                         /* Delete IP ips->ips[i] */
3160                         struct ctdb_addr_info_old *pub;
3161
3162                         DEBUG(DEBUG_NOTICE,
3163                               ("IP %s no longer configured, deleting it\n",
3164                                ctdb_addr_to_str(&ips->ips[i].addr)));
3165
3166                         pub = talloc_zero(mem_ctx, struct ctdb_addr_info_old);
3167                         CTDB_NO_MEMORY(ctdb, pub);
3168
3169                         pub->addr  = ips->ips[i].addr;
3170                         pub->mask  = 0;
3171                         pub->len   = 0;
3172
3173                         timeout = TAKEOVER_TIMEOUT();
3174
3175                         data.dsize = offsetof(struct ctdb_addr_info_old,
3176                                               iface) + pub->len;
3177                         data.dptr = (uint8_t *)pub;
3178
3179                         state = ctdb_control_send(ctdb, CTDB_CURRENT_NODE, 0,
3180                                                   CTDB_CONTROL_DEL_PUBLIC_IP,
3181                                                   0, data, async_data,
3182                                                   &timeout, NULL);
3183                         if (state == NULL) {
3184                                 DEBUG(DEBUG_ERR,
3185                                       (__location__
3186                                        " failed sending CTDB_CONTROL_DEL_PUBLIC_IP\n"));
3187                                 goto failed;
3188                         }
3189
3190                         ctdb_client_async_add(async_data, state);
3191                 }
3192         }
3193
3194         /* Compare IPs between node and file for IPs to be added */
3195         first_add = true;
3196         for (vnn = ctdb->vnn; vnn; vnn = vnn->next) {
3197                 for (i = 0; i < ips->num; i++) {
3198                         if (ctdb_same_ip(&vnn->public_address,
3199                                          &ips->ips[i].addr)) {
3200                                 /* IP already on node */
3201                                 break;
3202                         }
3203                 }
3204                 if (i == ips->num) {
3205                         /* Add IP ips->ips[i] */
3206                         struct ctdb_addr_info_old *pub;
3207                         const char *ifaces = NULL;
3208                         uint32_t len;
3209                         int iface = 0;
3210
3211                         DEBUG(DEBUG_NOTICE,
3212                               ("New IP %s configured, adding it\n",
3213                                ctdb_addr_to_str(&vnn->public_address)));
3214                         if (first_add) {
3215                                 uint32_t pnn = ctdb_get_pnn(ctdb);
3216
3217                                 data.dsize = sizeof(pnn);
3218                                 data.dptr  = (uint8_t *)&pnn;
3219
3220                                 ret = ctdb_client_send_message(
3221                                         ctdb,
3222                                         CTDB_BROADCAST_CONNECTED,
3223                                         CTDB_SRVID_REBALANCE_NODE,
3224                                         data);
3225                                 if (ret != 0) {
3226                                         DEBUG(DEBUG_WARNING,
3227                                               ("Failed to send message to force node reallocation - IPs may be unbalanced\n"));
3228                                 }
3229
3230                                 first_add = false;
3231                         }
3232
3233                         ifaces = vnn->ifaces[0];
3234                         iface = 1;
3235                         while (vnn->ifaces[iface] != NULL) {
3236                                 ifaces = talloc_asprintf(vnn, "%s,%s", ifaces,
3237                                                          vnn->ifaces[iface]);
3238                                 iface++;
3239                         }
3240
3241                         len   = strlen(ifaces) + 1;
3242                         pub = talloc_zero_size(mem_ctx,
3243                                                offsetof(struct ctdb_addr_info_old, iface) + len);
3244                         CTDB_NO_MEMORY(ctdb, pub);
3245
3246                         pub->addr  = vnn->public_address;
3247                         pub->mask  = vnn->public_netmask_bits;
3248                         pub->len   = len;
3249                         memcpy(&pub->iface[0], ifaces, pub->len);
3250
3251                         timeout = TAKEOVER_TIMEOUT();
3252
3253                         data.dsize = offsetof(struct ctdb_addr_info_old,
3254                                               iface) + pub->len;
3255                         data.dptr = (uint8_t *)pub;
3256
3257                         state = ctdb_control_send(ctdb, CTDB_CURRENT_NODE, 0,
3258                                                   CTDB_CONTROL_ADD_PUBLIC_IP,
3259                                                   0, data, async_data,
3260                                                   &timeout, NULL);
3261                         if (state == NULL) {
3262                                 DEBUG(DEBUG_ERR,
3263                                       (__location__
3264                                        " failed sending CTDB_CONTROL_ADD_PUBLIC_IP\n"));
3265                                 goto failed;
3266                         }
3267
3268                         ctdb_client_async_add(async_data, state);
3269                 }
3270         }
3271
3272         if (ctdb_client_async_wait(ctdb, async_data) != 0) {
3273                 DEBUG(DEBUG_ERR,(__location__ " Add/delete IPs failed\n"));
3274                 goto failed;
3275         }
3276
3277         talloc_free(mem_ctx);
3278         return 0;
3279
3280 failed:
3281         talloc_free(mem_ctx);
3282         return -1;
3283 }
3284
3285 /* This control is sent to force the node to re-read the public addresses file
3286    and drop any addresses we should nnot longer host, and add new addresses
3287    that we are now able to host
3288 */
3289 int32_t ctdb_control_reload_public_ips(struct ctdb_context *ctdb, struct ctdb_req_control_old *c, bool *async_reply)
3290 {
3291         struct ctdb_reloadips_handle *h;
3292         pid_t parent = getpid();
3293
3294         if (ctdb->reload_ips != NULL) {
3295                 talloc_free(ctdb->reload_ips);
3296                 ctdb->reload_ips = NULL;
3297         }
3298
3299         h = talloc(ctdb, struct ctdb_reloadips_handle);
3300         CTDB_NO_MEMORY(ctdb, h);
3301         h->ctdb     = ctdb;
3302         h->c        = NULL;
3303         h->status   = -1;
3304         
3305         if (pipe(h->fd) == -1) {
3306                 DEBUG(DEBUG_ERR,("Failed to create pipe for ctdb_freeze_lock\n"));
3307                 talloc_free(h);
3308                 return -1;
3309         }
3310
3311         h->child = ctdb_fork(ctdb);
3312         if (h->child == (pid_t)-1) {
3313                 DEBUG(DEBUG_ERR, ("Failed to fork a child for reloadips\n"));
3314                 close(h->fd[0]);
3315                 close(h->fd[1]);
3316                 talloc_free(h);
3317                 return -1;
3318         }
3319
3320         /* child process */
3321         if (h->child == 0) {
3322                 signed char res = 0;
3323
3324                 close(h->fd[0]);
3325                 debug_extra = talloc_asprintf(NULL, "reloadips:");
3326
3327                 prctl_set_comment("ctdb_reloadips");
3328                 if (switch_from_server_to_client(ctdb, "reloadips-child") != 0) {
3329                         DEBUG(DEBUG_CRIT,("ERROR: Failed to switch reloadips child into client mode\n"));
3330                         res = -1;
3331                 } else {
3332                         res = ctdb_reloadips_child(ctdb);
3333                         if (res != 0) {
3334                                 DEBUG(DEBUG_ERR,("Failed to reload ips on local node\n"));
3335                         }
3336                 }
3337
3338                 sys_write(h->fd[1], &res, 1);
3339                 ctdb_wait_for_process_to_exit(parent);
3340                 _exit(0);
3341         }
3342
3343         h->c             = talloc_steal(h, c);
3344
3345         close(h->fd[1]);
3346         set_close_on_exec(h->fd[0]);
3347
3348         talloc_set_destructor(h, ctdb_reloadips_destructor);
3349
3350
3351         h->fde = tevent_add_fd(ctdb->ev, h, h->fd[0], TEVENT_FD_READ,
3352                                ctdb_reloadips_child_handler, (void *)h);
3353         tevent_fd_set_auto_close(h->fde);
3354
3355         tevent_add_timer(ctdb->ev, h, timeval_current_ofs(120, 0),
3356                          ctdb_reloadips_timeout_event, h);
3357
3358         /* we reply later */
3359         *async_reply = true;
3360         return 0;
3361 }