ctdb-ipalloc: Drop remote IP verification
[metze/samba/wip.git] / ctdb / server / ctdb_takeover.c
1 /* 
2    ctdb ip takeover code
3
4    Copyright (C) Ronnie Sahlberg  2007
5    Copyright (C) Andrew Tridgell  2007
6    Copyright (C) Martin Schwenke  2011
7
8    This program is free software; you can redistribute it and/or modify
9    it under the terms of the GNU General Public License as published by
10    the Free Software Foundation; either version 3 of the License, or
11    (at your option) any later version.
12    
13    This program is distributed in the hope that it will be useful,
14    but WITHOUT ANY WARRANTY; without even the implied warranty of
15    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16    GNU General Public License for more details.
17    
18    You should have received a copy of the GNU General Public License
19    along with this program; if not, see <http://www.gnu.org/licenses/>.
20 */
21 #include "replace.h"
22 #include "system/network.h"
23 #include "system/filesys.h"
24 #include "system/time.h"
25 #include "system/wait.h"
26
27 #include <talloc.h>
28 #include <tevent.h>
29
30 #include "lib/util/dlinklist.h"
31 #include "lib/util/debug.h"
32 #include "lib/util/samba_util.h"
33 #include "lib/util/util_process.h"
34
35 #include "ctdb_private.h"
36 #include "ctdb_client.h"
37
38 #include "common/rb_tree.h"
39 #include "common/reqid.h"
40 #include "common/system.h"
41 #include "common/common.h"
42 #include "common/logging.h"
43
44 #include "server/ipalloc.h"
45
46 #define TAKEOVER_TIMEOUT() timeval_current_ofs(ctdb->tunable.takeover_timeout,0)
47
48 #define CTDB_ARP_INTERVAL 1
49 #define CTDB_ARP_REPEAT   3
50
51 struct ctdb_interface {
52         struct ctdb_interface *prev, *next;
53         const char *name;
54         bool link_up;
55         uint32_t references;
56 };
57
58 static const char *ctdb_vnn_iface_string(const struct ctdb_vnn *vnn)
59 {
60         if (vnn->iface) {
61                 return vnn->iface->name;
62         }
63
64         return "__none__";
65 }
66
67 static int ctdb_add_local_iface(struct ctdb_context *ctdb, const char *iface)
68 {
69         struct ctdb_interface *i;
70
71         if (strlen(iface) > CTDB_IFACE_SIZE) {
72                 DEBUG(DEBUG_ERR, ("Interface name too long \"%s\"\n", iface));
73                 return -1;
74         }
75
76         /* Verify that we don't have an entry for this ip yet */
77         for (i=ctdb->ifaces;i;i=i->next) {
78                 if (strcmp(i->name, iface) == 0) {
79                         return 0;
80                 }
81         }
82
83         /* create a new structure for this interface */
84         i = talloc_zero(ctdb, struct ctdb_interface);
85         CTDB_NO_MEMORY_FATAL(ctdb, i);
86         i->name = talloc_strdup(i, iface);
87         CTDB_NO_MEMORY(ctdb, i->name);
88
89         i->link_up = true;
90
91         DLIST_ADD(ctdb->ifaces, i);
92
93         return 0;
94 }
95
96 static bool vnn_has_interface_with_name(struct ctdb_vnn *vnn,
97                                         const char *name)
98 {
99         int n;
100
101         for (n = 0; vnn->ifaces[n] != NULL; n++) {
102                 if (strcmp(name, vnn->ifaces[n]) == 0) {
103                         return true;
104                 }
105         }
106
107         return false;
108 }
109
110 /* If any interfaces now have no possible IPs then delete them.  This
111  * implementation is naive (i.e. simple) rather than clever
112  * (i.e. complex).  Given that this is run on delip and that operation
113  * is rare, this doesn't need to be efficient - it needs to be
114  * foolproof.  One alternative is reference counting, where the logic
115  * is distributed and can, therefore, be broken in multiple places.
116  * Another alternative is to build a red-black tree of interfaces that
117  * can have addresses (by walking ctdb->vnn once) and then walking
118  * ctdb->ifaces once and deleting those not in the tree.  Let's go to
119  * one of those if the naive implementation causes problems...  :-)
120  */
121 static void ctdb_remove_orphaned_ifaces(struct ctdb_context *ctdb,
122                                         struct ctdb_vnn *vnn)
123 {
124         struct ctdb_interface *i, *next;
125
126         /* For each interface, check if there's an IP using it. */
127         for (i = ctdb->ifaces; i != NULL; i = next) {
128                 struct ctdb_vnn *tv;
129                 bool found;
130                 next = i->next;
131
132                 /* Only consider interfaces named in the given VNN. */
133                 if (!vnn_has_interface_with_name(vnn, i->name)) {
134                         continue;
135                 }
136
137                 /* Search for a vnn with this interface. */
138                 found = false;
139                 for (tv=ctdb->vnn; tv; tv=tv->next) {
140                         if (vnn_has_interface_with_name(tv, i->name)) {
141                                 found = true;
142                                 break;
143                         }
144                 }
145
146                 if (!found) {
147                         /* None of the VNNs are using this interface. */
148                         DLIST_REMOVE(ctdb->ifaces, i);
149                         talloc_free(i);
150                 }
151         }
152 }
153
154
155 static struct ctdb_interface *ctdb_find_iface(struct ctdb_context *ctdb,
156                                               const char *iface)
157 {
158         struct ctdb_interface *i;
159
160         for (i=ctdb->ifaces;i;i=i->next) {
161                 if (strcmp(i->name, iface) == 0) {
162                         return i;
163                 }
164         }
165
166         return NULL;
167 }
168
169 static struct ctdb_interface *ctdb_vnn_best_iface(struct ctdb_context *ctdb,
170                                                   struct ctdb_vnn *vnn)
171 {
172         int i;
173         struct ctdb_interface *cur = NULL;
174         struct ctdb_interface *best = NULL;
175
176         for (i=0; vnn->ifaces[i]; i++) {
177
178                 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
179                 if (cur == NULL) {
180                         continue;
181                 }
182
183                 if (!cur->link_up) {
184                         continue;
185                 }
186
187                 if (best == NULL) {
188                         best = cur;
189                         continue;
190                 }
191
192                 if (cur->references < best->references) {
193                         best = cur;
194                         continue;
195                 }
196         }
197
198         return best;
199 }
200
201 static int32_t ctdb_vnn_assign_iface(struct ctdb_context *ctdb,
202                                      struct ctdb_vnn *vnn)
203 {
204         struct ctdb_interface *best = NULL;
205
206         if (vnn->iface) {
207                 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
208                                    "still assigned to iface '%s'\n",
209                                    ctdb_addr_to_str(&vnn->public_address),
210                                    ctdb_vnn_iface_string(vnn)));
211                 return 0;
212         }
213
214         best = ctdb_vnn_best_iface(ctdb, vnn);
215         if (best == NULL) {
216                 DEBUG(DEBUG_ERR, (__location__ " public address '%s' "
217                                   "cannot assign to iface any iface\n",
218                                   ctdb_addr_to_str(&vnn->public_address)));
219                 return -1;
220         }
221
222         vnn->iface = best;
223         best->references++;
224         vnn->pnn = ctdb->pnn;
225
226         DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
227                            "now assigned to iface '%s' refs[%d]\n",
228                            ctdb_addr_to_str(&vnn->public_address),
229                            ctdb_vnn_iface_string(vnn),
230                            best->references));
231         return 0;
232 }
233
234 static void ctdb_vnn_unassign_iface(struct ctdb_context *ctdb,
235                                     struct ctdb_vnn *vnn)
236 {
237         DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
238                            "now unassigned (old iface '%s' refs[%d])\n",
239                            ctdb_addr_to_str(&vnn->public_address),
240                            ctdb_vnn_iface_string(vnn),
241                            vnn->iface?vnn->iface->references:0));
242         if (vnn->iface) {
243                 vnn->iface->references--;
244         }
245         vnn->iface = NULL;
246         if (vnn->pnn == ctdb->pnn) {
247                 vnn->pnn = -1;
248         }
249 }
250
251 static bool ctdb_vnn_available(struct ctdb_context *ctdb,
252                                struct ctdb_vnn *vnn)
253 {
254         int i;
255
256         /* Nodes that are not RUNNING can not host IPs */
257         if (ctdb->runstate != CTDB_RUNSTATE_RUNNING) {
258                 return false;
259         }
260
261         if (vnn->delete_pending) {
262                 return false;
263         }
264
265         if (vnn->iface && vnn->iface->link_up) {
266                 return true;
267         }
268
269         for (i=0; vnn->ifaces[i]; i++) {
270                 struct ctdb_interface *cur;
271
272                 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
273                 if (cur == NULL) {
274                         continue;
275                 }
276
277                 if (cur->link_up) {
278                         return true;
279                 }
280         }
281
282         return false;
283 }
284
285 struct ctdb_takeover_arp {
286         struct ctdb_context *ctdb;
287         uint32_t count;
288         ctdb_sock_addr addr;
289         struct ctdb_tcp_array *tcparray;
290         struct ctdb_vnn *vnn;
291 };
292
293
294 /*
295   lists of tcp endpoints
296  */
297 struct ctdb_tcp_list {
298         struct ctdb_tcp_list *prev, *next;
299         struct ctdb_connection connection;
300 };
301
302 /*
303   list of clients to kill on IP release
304  */
305 struct ctdb_client_ip {
306         struct ctdb_client_ip *prev, *next;
307         struct ctdb_context *ctdb;
308         ctdb_sock_addr addr;
309         uint32_t client_id;
310 };
311
312
313 /*
314   send a gratuitous arp
315  */
316 static void ctdb_control_send_arp(struct tevent_context *ev,
317                                   struct tevent_timer *te,
318                                   struct timeval t, void *private_data)
319 {
320         struct ctdb_takeover_arp *arp = talloc_get_type(private_data, 
321                                                         struct ctdb_takeover_arp);
322         int i, ret;
323         struct ctdb_tcp_array *tcparray;
324         const char *iface = ctdb_vnn_iface_string(arp->vnn);
325
326         ret = ctdb_sys_send_arp(&arp->addr, iface);
327         if (ret != 0) {
328                 DEBUG(DEBUG_CRIT,(__location__ " sending of arp failed on iface '%s' (%s)\n",
329                                   iface, strerror(errno)));
330         }
331
332         tcparray = arp->tcparray;
333         if (tcparray) {
334                 for (i=0;i<tcparray->num;i++) {
335                         struct ctdb_connection *tcon;
336
337                         tcon = &tcparray->connections[i];
338                         DEBUG(DEBUG_INFO,("sending tcp tickle ack for %u->%s:%u\n",
339                                 (unsigned)ntohs(tcon->dst.ip.sin_port),
340                                 ctdb_addr_to_str(&tcon->src),
341                                 (unsigned)ntohs(tcon->src.ip.sin_port)));
342                         ret = ctdb_sys_send_tcp(
343                                 &tcon->src,
344                                 &tcon->dst,
345                                 0, 0, 0);
346                         if (ret != 0) {
347                                 DEBUG(DEBUG_CRIT,(__location__ " Failed to send tcp tickle ack for %s\n",
348                                         ctdb_addr_to_str(&tcon->src)));
349                         }
350                 }
351         }
352
353         arp->count++;
354
355         if (arp->count == CTDB_ARP_REPEAT) {
356                 talloc_free(arp);
357                 return;
358         }
359
360         tevent_add_timer(arp->ctdb->ev, arp->vnn->takeover_ctx,
361                          timeval_current_ofs(CTDB_ARP_INTERVAL, 100000),
362                          ctdb_control_send_arp, arp);
363 }
364
365 static int32_t ctdb_announce_vnn_iface(struct ctdb_context *ctdb,
366                                        struct ctdb_vnn *vnn)
367 {
368         struct ctdb_takeover_arp *arp;
369         struct ctdb_tcp_array *tcparray;
370
371         if (!vnn->takeover_ctx) {
372                 vnn->takeover_ctx = talloc_new(vnn);
373                 if (!vnn->takeover_ctx) {
374                         return -1;
375                 }
376         }
377
378         arp = talloc_zero(vnn->takeover_ctx, struct ctdb_takeover_arp);
379         if (!arp) {
380                 return -1;
381         }
382
383         arp->ctdb = ctdb;
384         arp->addr = vnn->public_address;
385         arp->vnn  = vnn;
386
387         tcparray = vnn->tcp_array;
388         if (tcparray) {
389                 /* add all of the known tcp connections for this IP to the
390                    list of tcp connections to send tickle acks for */
391                 arp->tcparray = talloc_steal(arp, tcparray);
392
393                 vnn->tcp_array = NULL;
394                 vnn->tcp_update_needed = true;
395         }
396
397         tevent_add_timer(arp->ctdb->ev, vnn->takeover_ctx,
398                          timeval_zero(), ctdb_control_send_arp, arp);
399
400         return 0;
401 }
402
403 struct takeover_callback_state {
404         struct ctdb_req_control_old *c;
405         ctdb_sock_addr *addr;
406         struct ctdb_vnn *vnn;
407 };
408
409 struct ctdb_do_takeip_state {
410         struct ctdb_req_control_old *c;
411         struct ctdb_vnn *vnn;
412 };
413
414 /*
415   called when takeip event finishes
416  */
417 static void ctdb_do_takeip_callback(struct ctdb_context *ctdb, int status,
418                                     void *private_data)
419 {
420         struct ctdb_do_takeip_state *state =
421                 talloc_get_type(private_data, struct ctdb_do_takeip_state);
422         int32_t ret;
423         TDB_DATA data;
424
425         if (status != 0) {
426                 if (status == -ETIME) {
427                         ctdb_ban_self(ctdb);
428                 }
429                 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
430                                  ctdb_addr_to_str(&state->vnn->public_address),
431                                  ctdb_vnn_iface_string(state->vnn)));
432                 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
433
434                 talloc_free(state);
435                 return;
436         }
437
438         if (ctdb->do_checkpublicip) {
439
440         ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
441         if (ret != 0) {
442                 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
443                 talloc_free(state);
444                 return;
445         }
446
447         }
448
449         data.dptr  = (uint8_t *)ctdb_addr_to_str(&state->vnn->public_address);
450         data.dsize = strlen((char *)data.dptr) + 1;
451         DEBUG(DEBUG_INFO,(__location__ " sending TAKE_IP for '%s'\n", data.dptr));
452
453         ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_TAKE_IP, data);
454
455
456         /* the control succeeded */
457         ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
458         talloc_free(state);
459         return;
460 }
461
462 static int ctdb_takeip_destructor(struct ctdb_do_takeip_state *state)
463 {
464         state->vnn->update_in_flight = false;
465         return 0;
466 }
467
468 /*
469   take over an ip address
470  */
471 static int32_t ctdb_do_takeip(struct ctdb_context *ctdb,
472                               struct ctdb_req_control_old *c,
473                               struct ctdb_vnn *vnn)
474 {
475         int ret;
476         struct ctdb_do_takeip_state *state;
477
478         if (vnn->update_in_flight) {
479                 DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u rejected "
480                                     "update for this IP already in flight\n",
481                                     ctdb_addr_to_str(&vnn->public_address),
482                                     vnn->public_netmask_bits));
483                 return -1;
484         }
485
486         ret = ctdb_vnn_assign_iface(ctdb, vnn);
487         if (ret != 0) {
488                 DEBUG(DEBUG_ERR,("Takeover of IP %s/%u failed to "
489                                  "assign a usable interface\n",
490                                  ctdb_addr_to_str(&vnn->public_address),
491                                  vnn->public_netmask_bits));
492                 return -1;
493         }
494
495         state = talloc(vnn, struct ctdb_do_takeip_state);
496         CTDB_NO_MEMORY(ctdb, state);
497
498         state->c = talloc_steal(ctdb, c);
499         state->vnn   = vnn;
500
501         vnn->update_in_flight = true;
502         talloc_set_destructor(state, ctdb_takeip_destructor);
503
504         DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u on interface %s\n",
505                             ctdb_addr_to_str(&vnn->public_address),
506                             vnn->public_netmask_bits,
507                             ctdb_vnn_iface_string(vnn)));
508
509         ret = ctdb_event_script_callback(ctdb,
510                                          state,
511                                          ctdb_do_takeip_callback,
512                                          state,
513                                          CTDB_EVENT_TAKE_IP,
514                                          "%s %s %u",
515                                          ctdb_vnn_iface_string(vnn),
516                                          ctdb_addr_to_str(&vnn->public_address),
517                                          vnn->public_netmask_bits);
518
519         if (ret != 0) {
520                 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
521                         ctdb_addr_to_str(&vnn->public_address),
522                         ctdb_vnn_iface_string(vnn)));
523                 talloc_free(state);
524                 return -1;
525         }
526
527         return 0;
528 }
529
530 struct ctdb_do_updateip_state {
531         struct ctdb_req_control_old *c;
532         struct ctdb_interface *old;
533         struct ctdb_vnn *vnn;
534 };
535
536 /*
537   called when updateip event finishes
538  */
539 static void ctdb_do_updateip_callback(struct ctdb_context *ctdb, int status,
540                                       void *private_data)
541 {
542         struct ctdb_do_updateip_state *state =
543                 talloc_get_type(private_data, struct ctdb_do_updateip_state);
544         int32_t ret;
545
546         if (status != 0) {
547                 if (status == -ETIME) {
548                         ctdb_ban_self(ctdb);
549                 }
550                 DEBUG(DEBUG_ERR,(__location__ " Failed to move IP %s from interface %s to %s\n",
551                         ctdb_addr_to_str(&state->vnn->public_address),
552                         state->old->name,
553                         ctdb_vnn_iface_string(state->vnn)));
554
555                 /*
556                  * All we can do is reset the old interface
557                  * and let the next run fix it
558                  */
559                 ctdb_vnn_unassign_iface(ctdb, state->vnn);
560                 state->vnn->iface = state->old;
561                 state->vnn->iface->references++;
562
563                 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
564                 talloc_free(state);
565                 return;
566         }
567
568         if (ctdb->do_checkpublicip) {
569
570         ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
571         if (ret != 0) {
572                 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
573                 talloc_free(state);
574                 return;
575         }
576
577         }
578
579         /* the control succeeded */
580         ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
581         talloc_free(state);
582         return;
583 }
584
585 static int ctdb_updateip_destructor(struct ctdb_do_updateip_state *state)
586 {
587         state->vnn->update_in_flight = false;
588         return 0;
589 }
590
591 /*
592   update (move) an ip address
593  */
594 static int32_t ctdb_do_updateip(struct ctdb_context *ctdb,
595                                 struct ctdb_req_control_old *c,
596                                 struct ctdb_vnn *vnn)
597 {
598         int ret;
599         struct ctdb_do_updateip_state *state;
600         struct ctdb_interface *old = vnn->iface;
601         const char *new_name;
602
603         if (vnn->update_in_flight) {
604                 DEBUG(DEBUG_NOTICE,("Update of IP %s/%u rejected "
605                                     "update for this IP already in flight\n",
606                                     ctdb_addr_to_str(&vnn->public_address),
607                                     vnn->public_netmask_bits));
608                 return -1;
609         }
610
611         ctdb_vnn_unassign_iface(ctdb, vnn);
612         ret = ctdb_vnn_assign_iface(ctdb, vnn);
613         if (ret != 0) {
614                 DEBUG(DEBUG_ERR,("update of IP %s/%u failed to "
615                                  "assin a usable interface (old iface '%s')\n",
616                                  ctdb_addr_to_str(&vnn->public_address),
617                                  vnn->public_netmask_bits,
618                                  old->name));
619                 return -1;
620         }
621
622         new_name = ctdb_vnn_iface_string(vnn);
623         if (old->name != NULL && new_name != NULL && !strcmp(old->name, new_name)) {
624                 /* A benign update from one interface onto itself.
625                  * no need to run the eventscripts in this case, just return
626                  * success.
627                  */
628                 ctdb_request_control_reply(ctdb, c, NULL, 0, NULL);
629                 return 0;
630         }
631
632         state = talloc(vnn, struct ctdb_do_updateip_state);
633         CTDB_NO_MEMORY(ctdb, state);
634
635         state->c = talloc_steal(ctdb, c);
636         state->old = old;
637         state->vnn = vnn;
638
639         vnn->update_in_flight = true;
640         talloc_set_destructor(state, ctdb_updateip_destructor);
641
642         DEBUG(DEBUG_NOTICE,("Update of IP %s/%u from "
643                             "interface %s to %s\n",
644                             ctdb_addr_to_str(&vnn->public_address),
645                             vnn->public_netmask_bits,
646                             old->name,
647                             new_name));
648
649         ret = ctdb_event_script_callback(ctdb,
650                                          state,
651                                          ctdb_do_updateip_callback,
652                                          state,
653                                          CTDB_EVENT_UPDATE_IP,
654                                          "%s %s %s %u",
655                                          state->old->name,
656                                          new_name,
657                                          ctdb_addr_to_str(&vnn->public_address),
658                                          vnn->public_netmask_bits);
659         if (ret != 0) {
660                 DEBUG(DEBUG_ERR,(__location__ " Failed update IP %s from interface %s to %s\n",
661                                  ctdb_addr_to_str(&vnn->public_address),
662                                  old->name, new_name));
663                 talloc_free(state);
664                 return -1;
665         }
666
667         return 0;
668 }
669
670 /*
671   Find the vnn of the node that has a public ip address
672   returns -1 if the address is not known as a public address
673  */
674 static struct ctdb_vnn *find_public_ip_vnn(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
675 {
676         struct ctdb_vnn *vnn;
677
678         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
679                 if (ctdb_same_ip(&vnn->public_address, addr)) {
680                         return vnn;
681                 }
682         }
683
684         return NULL;
685 }
686
687 /*
688   take over an ip address
689  */
690 int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb,
691                                  struct ctdb_req_control_old *c,
692                                  TDB_DATA indata,
693                                  bool *async_reply)
694 {
695         int ret;
696         struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
697         struct ctdb_vnn *vnn;
698         bool have_ip = false;
699         bool do_updateip = false;
700         bool do_takeip = false;
701         struct ctdb_interface *best_iface = NULL;
702
703         if (pip->pnn != ctdb->pnn) {
704                 DEBUG(DEBUG_ERR,(__location__" takeoverip called for an ip '%s' "
705                                  "with pnn %d, but we're node %d\n",
706                                  ctdb_addr_to_str(&pip->addr),
707                                  pip->pnn, ctdb->pnn));
708                 return -1;
709         }
710
711         /* update out vnn list */
712         vnn = find_public_ip_vnn(ctdb, &pip->addr);
713         if (vnn == NULL) {
714                 DEBUG(DEBUG_INFO,("takeoverip called for an ip '%s' that is not a public address\n",
715                         ctdb_addr_to_str(&pip->addr)));
716                 return 0;
717         }
718
719         if (ctdb->tunable.disable_ip_failover == 0 && ctdb->do_checkpublicip) {
720                 have_ip = ctdb_sys_have_ip(&pip->addr);
721         }
722         best_iface = ctdb_vnn_best_iface(ctdb, vnn);
723         if (best_iface == NULL) {
724                 DEBUG(DEBUG_ERR,("takeoverip of IP %s/%u failed to find"
725                                  "a usable interface (old %s, have_ip %d)\n",
726                                  ctdb_addr_to_str(&vnn->public_address),
727                                  vnn->public_netmask_bits,
728                                  ctdb_vnn_iface_string(vnn),
729                                  have_ip));
730                 return -1;
731         }
732
733         if (vnn->iface == NULL && vnn->pnn == -1 && have_ip && best_iface != NULL) {
734                 DEBUG(DEBUG_ERR,("Taking over newly created ip\n"));
735                 have_ip = false;
736         }
737
738
739         if (vnn->iface == NULL && have_ip) {
740                 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
741                                   "but we have no interface assigned, has someone manually configured it? Ignore for now.\n",
742                                  ctdb_addr_to_str(&vnn->public_address)));
743                 return 0;
744         }
745
746         if (vnn->pnn != ctdb->pnn && have_ip && vnn->pnn != -1) {
747                 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
748                                   "and we have it on iface[%s], but it was assigned to node %d"
749                                   "and we are node %d, banning ourself\n",
750                                  ctdb_addr_to_str(&vnn->public_address),
751                                  ctdb_vnn_iface_string(vnn), vnn->pnn, ctdb->pnn));
752                 ctdb_ban_self(ctdb);
753                 return -1;
754         }
755
756         if (vnn->pnn == -1 && have_ip) {
757                 vnn->pnn = ctdb->pnn;
758                 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
759                                   "and we already have it on iface[%s], update local daemon\n",
760                                  ctdb_addr_to_str(&vnn->public_address),
761                                   ctdb_vnn_iface_string(vnn)));
762                 return 0;
763         }
764
765         if (vnn->iface) {
766                 if (vnn->iface != best_iface) {
767                         if (!vnn->iface->link_up) {
768                                 do_updateip = true;
769                         } else if (vnn->iface->references > (best_iface->references + 1)) {
770                                 /* only move when the rebalance gains something */
771                                         do_updateip = true;
772                         }
773                 }
774         }
775
776         if (!have_ip) {
777                 if (do_updateip) {
778                         ctdb_vnn_unassign_iface(ctdb, vnn);
779                         do_updateip = false;
780                 }
781                 do_takeip = true;
782         }
783
784         if (do_takeip) {
785                 ret = ctdb_do_takeip(ctdb, c, vnn);
786                 if (ret != 0) {
787                         return -1;
788                 }
789         } else if (do_updateip) {
790                 ret = ctdb_do_updateip(ctdb, c, vnn);
791                 if (ret != 0) {
792                         return -1;
793                 }
794         } else {
795                 /*
796                  * The interface is up and the kernel known the ip
797                  * => do nothing
798                  */
799                 DEBUG(DEBUG_INFO,("Redundant takeover of IP %s/%u on interface %s (ip already held)\n",
800                         ctdb_addr_to_str(&pip->addr),
801                         vnn->public_netmask_bits,
802                         ctdb_vnn_iface_string(vnn)));
803                 return 0;
804         }
805
806         /* tell ctdb_control.c that we will be replying asynchronously */
807         *async_reply = true;
808
809         return 0;
810 }
811
812 static void do_delete_ip(struct ctdb_context *ctdb, struct ctdb_vnn *vnn)
813 {
814         DLIST_REMOVE(ctdb->vnn, vnn);
815         ctdb_vnn_unassign_iface(ctdb, vnn);
816         ctdb_remove_orphaned_ifaces(ctdb, vnn);
817         talloc_free(vnn);
818 }
819
820 /*
821   called when releaseip event finishes
822  */
823 static void release_ip_callback(struct ctdb_context *ctdb, int status, 
824                                 void *private_data)
825 {
826         struct takeover_callback_state *state = 
827                 talloc_get_type(private_data, struct takeover_callback_state);
828         TDB_DATA data;
829
830         if (status == -ETIME) {
831                 ctdb_ban_self(ctdb);
832         }
833
834         if (ctdb->tunable.disable_ip_failover == 0 && ctdb->do_checkpublicip) {
835                 if  (ctdb_sys_have_ip(state->addr)) {
836                         DEBUG(DEBUG_ERR,
837                               ("IP %s still hosted during release IP callback, failing\n",
838                                ctdb_addr_to_str(state->addr)));
839                         ctdb_request_control_reply(ctdb, state->c,
840                                                    NULL, -1, NULL);
841                         talloc_free(state);
842                         return;
843                 }
844         }
845
846         /* send a message to all clients of this node telling them
847            that the cluster has been reconfigured and they should
848            release any sockets on this IP */
849         data.dptr = (uint8_t *)talloc_strdup(state, ctdb_addr_to_str(state->addr));
850         CTDB_NO_MEMORY_VOID(ctdb, data.dptr);
851         data.dsize = strlen((char *)data.dptr)+1;
852
853         DEBUG(DEBUG_INFO,(__location__ " sending RELEASE_IP for '%s'\n", data.dptr));
854
855         ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_RELEASE_IP, data);
856
857         ctdb_vnn_unassign_iface(ctdb, state->vnn);
858
859         /* Process the IP if it has been marked for deletion */
860         if (state->vnn->delete_pending) {
861                 do_delete_ip(ctdb, state->vnn);
862                 state->vnn = NULL;
863         }
864
865         /* the control succeeded */
866         ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
867         talloc_free(state);
868 }
869
870 static int ctdb_releaseip_destructor(struct takeover_callback_state *state)
871 {
872         if (state->vnn != NULL) {
873                 state->vnn->update_in_flight = false;
874         }
875         return 0;
876 }
877
878 /*
879   release an ip address
880  */
881 int32_t ctdb_control_release_ip(struct ctdb_context *ctdb, 
882                                 struct ctdb_req_control_old *c,
883                                 TDB_DATA indata, 
884                                 bool *async_reply)
885 {
886         int ret;
887         struct takeover_callback_state *state;
888         struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
889         struct ctdb_vnn *vnn;
890         char *iface;
891
892         /* update our vnn list */
893         vnn = find_public_ip_vnn(ctdb, &pip->addr);
894         if (vnn == NULL) {
895                 DEBUG(DEBUG_INFO,("releaseip called for an ip '%s' that is not a public address\n",
896                         ctdb_addr_to_str(&pip->addr)));
897                 return 0;
898         }
899         vnn->pnn = pip->pnn;
900
901         /* stop any previous arps */
902         talloc_free(vnn->takeover_ctx);
903         vnn->takeover_ctx = NULL;
904
905         /* Some ctdb tool commands (e.g. moveip) send
906          * lazy multicast to drop an IP from any node that isn't the
907          * intended new node.  The following causes makes ctdbd ignore
908          * a release for any address it doesn't host.
909          */
910         if (ctdb->tunable.disable_ip_failover == 0 && ctdb->do_checkpublicip) {
911                 if (!ctdb_sys_have_ip(&pip->addr)) {
912                         DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u on interface %s (ip not held)\n",
913                                 ctdb_addr_to_str(&pip->addr),
914                                 vnn->public_netmask_bits,
915                                 ctdb_vnn_iface_string(vnn)));
916                         ctdb_vnn_unassign_iface(ctdb, vnn);
917                         return 0;
918                 }
919         } else {
920                 if (vnn->iface == NULL) {
921                         DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u (ip not held)\n",
922                                            ctdb_addr_to_str(&pip->addr),
923                                            vnn->public_netmask_bits));
924                         return 0;
925                 }
926         }
927
928         /* There is a potential race between take_ip and us because we
929          * update the VNN via a callback that run when the
930          * eventscripts have been run.  Avoid the race by allowing one
931          * update to be in flight at a time.
932          */
933         if (vnn->update_in_flight) {
934                 DEBUG(DEBUG_NOTICE,("Release of IP %s/%u rejected "
935                                     "update for this IP already in flight\n",
936                                     ctdb_addr_to_str(&vnn->public_address),
937                                     vnn->public_netmask_bits));
938                 return -1;
939         }
940
941         iface = strdup(ctdb_vnn_iface_string(vnn));
942
943         DEBUG(DEBUG_NOTICE,("Release of IP %s/%u on interface %s  node:%d\n",
944                 ctdb_addr_to_str(&pip->addr),
945                 vnn->public_netmask_bits,
946                 iface,
947                 pip->pnn));
948
949         state = talloc(ctdb, struct takeover_callback_state);
950         if (state == NULL) {
951                 ctdb_set_error(ctdb, "Out of memory at %s:%d",
952                                __FILE__, __LINE__);
953                 free(iface);
954                 return -1;
955         }
956
957         state->c = talloc_steal(state, c);
958         state->addr = talloc(state, ctdb_sock_addr);       
959         if (state->addr == NULL) {
960                 ctdb_set_error(ctdb, "Out of memory at %s:%d",
961                                __FILE__, __LINE__);
962                 free(iface);
963                 talloc_free(state);
964                 return -1;
965         }
966         *state->addr = pip->addr;
967         state->vnn   = vnn;
968
969         vnn->update_in_flight = true;
970         talloc_set_destructor(state, ctdb_releaseip_destructor);
971
972         ret = ctdb_event_script_callback(ctdb, 
973                                          state, release_ip_callback, state,
974                                          CTDB_EVENT_RELEASE_IP,
975                                          "%s %s %u",
976                                          iface,
977                                          ctdb_addr_to_str(&pip->addr),
978                                          vnn->public_netmask_bits);
979         free(iface);
980         if (ret != 0) {
981                 DEBUG(DEBUG_ERR,(__location__ " Failed to release IP %s on interface %s\n",
982                         ctdb_addr_to_str(&pip->addr),
983                         ctdb_vnn_iface_string(vnn)));
984                 talloc_free(state);
985                 return -1;
986         }
987
988         /* tell the control that we will be reply asynchronously */
989         *async_reply = true;
990         return 0;
991 }
992
993 static int ctdb_add_public_address(struct ctdb_context *ctdb,
994                                    ctdb_sock_addr *addr,
995                                    unsigned mask, const char *ifaces,
996                                    bool check_address)
997 {
998         struct ctdb_vnn      *vnn;
999         uint32_t num = 0;
1000         char *tmp;
1001         const char *iface;
1002         int i;
1003         int ret;
1004
1005         tmp = strdup(ifaces);
1006         for (iface = strtok(tmp, ","); iface; iface = strtok(NULL, ",")) {
1007                 if (!ctdb_sys_check_iface_exists(iface)) {
1008                         DEBUG(DEBUG_CRIT,("Interface %s does not exist. Can not add public-address : %s\n", iface, ctdb_addr_to_str(addr)));
1009                         free(tmp);
1010                         return -1;
1011                 }
1012         }
1013         free(tmp);
1014
1015         /* Verify that we don't have an entry for this ip yet */
1016         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1017                 if (ctdb_same_sockaddr(addr, &vnn->public_address)) {
1018                         DEBUG(DEBUG_CRIT,("Same ip '%s' specified multiple times in the public address list \n", 
1019                                 ctdb_addr_to_str(addr)));
1020                         return -1;
1021                 }               
1022         }
1023
1024         /* create a new vnn structure for this ip address */
1025         vnn = talloc_zero(ctdb, struct ctdb_vnn);
1026         CTDB_NO_MEMORY_FATAL(ctdb, vnn);
1027         vnn->ifaces = talloc_array(vnn, const char *, num + 2);
1028         tmp = talloc_strdup(vnn, ifaces);
1029         CTDB_NO_MEMORY_FATAL(ctdb, tmp);
1030         for (iface = strtok(tmp, ","); iface; iface = strtok(NULL, ",")) {
1031                 vnn->ifaces = talloc_realloc(vnn, vnn->ifaces, const char *, num + 2);
1032                 CTDB_NO_MEMORY_FATAL(ctdb, vnn->ifaces);
1033                 vnn->ifaces[num] = talloc_strdup(vnn, iface);
1034                 CTDB_NO_MEMORY_FATAL(ctdb, vnn->ifaces[num]);
1035                 num++;
1036         }
1037         talloc_free(tmp);
1038         vnn->ifaces[num] = NULL;
1039         vnn->public_address      = *addr;
1040         vnn->public_netmask_bits = mask;
1041         vnn->pnn                 = -1;
1042         if (check_address) {
1043                 if (ctdb_sys_have_ip(addr)) {
1044                         DEBUG(DEBUG_ERR,("We are already hosting public address '%s'. setting PNN to ourself:%d\n", ctdb_addr_to_str(addr), ctdb->pnn));
1045                         vnn->pnn = ctdb->pnn;
1046                 }
1047         }
1048
1049         for (i=0; vnn->ifaces[i]; i++) {
1050                 ret = ctdb_add_local_iface(ctdb, vnn->ifaces[i]);
1051                 if (ret != 0) {
1052                         DEBUG(DEBUG_CRIT, (__location__ " failed to add iface[%s] "
1053                                            "for public_address[%s]\n",
1054                                            vnn->ifaces[i], ctdb_addr_to_str(addr)));
1055                         talloc_free(vnn);
1056                         return -1;
1057                 }
1058         }
1059
1060         DLIST_ADD(ctdb->vnn, vnn);
1061
1062         return 0;
1063 }
1064
1065 /*
1066   setup the public address lists from a file
1067 */
1068 int ctdb_set_public_addresses(struct ctdb_context *ctdb, bool check_addresses)
1069 {
1070         char **lines;
1071         int nlines;
1072         int i;
1073
1074         lines = file_lines_load(ctdb->public_addresses_file, &nlines, 0, ctdb);
1075         if (lines == NULL) {
1076                 ctdb_set_error(ctdb, "Failed to load public address list '%s'\n", ctdb->public_addresses_file);
1077                 return -1;
1078         }
1079         while (nlines > 0 && strcmp(lines[nlines-1], "") == 0) {
1080                 nlines--;
1081         }
1082
1083         for (i=0;i<nlines;i++) {
1084                 unsigned mask;
1085                 ctdb_sock_addr addr;
1086                 const char *addrstr;
1087                 const char *ifaces;
1088                 char *tok, *line;
1089
1090                 line = lines[i];
1091                 while ((*line == ' ') || (*line == '\t')) {
1092                         line++;
1093                 }
1094                 if (*line == '#') {
1095                         continue;
1096                 }
1097                 if (strcmp(line, "") == 0) {
1098                         continue;
1099                 }
1100                 tok = strtok(line, " \t");
1101                 addrstr = tok;
1102                 tok = strtok(NULL, " \t");
1103                 if (tok == NULL) {
1104                         if (NULL == ctdb->default_public_interface) {
1105                                 DEBUG(DEBUG_CRIT,("No default public interface and no interface specified at line %u of public address list\n",
1106                                          i+1));
1107                                 talloc_free(lines);
1108                                 return -1;
1109                         }
1110                         ifaces = ctdb->default_public_interface;
1111                 } else {
1112                         ifaces = tok;
1113                 }
1114
1115                 if (!addrstr || !parse_ip_mask(addrstr, ifaces, &addr, &mask)) {
1116                         DEBUG(DEBUG_CRIT,("Badly formed line %u in public address list\n", i+1));
1117                         talloc_free(lines);
1118                         return -1;
1119                 }
1120                 if (ctdb_add_public_address(ctdb, &addr, mask, ifaces, check_addresses)) {
1121                         DEBUG(DEBUG_CRIT,("Failed to add line %u to the public address list\n", i+1));
1122                         talloc_free(lines);
1123                         return -1;
1124                 }
1125         }
1126
1127
1128         talloc_free(lines);
1129         return 0;
1130 }
1131
1132 static void *add_ip_callback(void *parm, void *data)
1133 {
1134         struct public_ip_list *this_ip = parm;
1135         struct public_ip_list *prev_ip = data;
1136
1137         if (prev_ip == NULL) {
1138                 return parm;
1139         }
1140         if (this_ip->pnn == -1) {
1141                 this_ip->pnn = prev_ip->pnn;
1142         }
1143
1144         return parm;
1145 }
1146
1147 static int getips_count_callback(void *param, void *data)
1148 {
1149         struct public_ip_list **ip_list = (struct public_ip_list **)param;
1150         struct public_ip_list *new_ip = (struct public_ip_list *)data;
1151
1152         new_ip->next = *ip_list;
1153         *ip_list     = new_ip;
1154         return 0;
1155 }
1156
1157 static int ctdb_reload_remote_public_ips(struct ctdb_context *ctdb,
1158                                          struct ipalloc_state *ipalloc_state,
1159                                          struct ctdb_node_map_old *nodemap)
1160 {
1161         int j;
1162         int ret;
1163         struct ctdb_public_ip_list_old *ip_list;
1164
1165         if (ipalloc_state->num != nodemap->num) {
1166                 DEBUG(DEBUG_ERR,
1167                       (__location__
1168                        " ipalloc_state->num (%d) != nodemap->num (%d) invalid param\n",
1169                        ipalloc_state->num, nodemap->num));
1170                 return -1;
1171         }
1172
1173         for (j=0; j<nodemap->num; j++) {
1174                 if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) {
1175                         continue;
1176                 }
1177
1178                 /* Retrieve the list of known public IPs from the node */
1179                 ret = ctdb_ctrl_get_public_ips_flags(ctdb,
1180                                         TAKEOVER_TIMEOUT(),
1181                                         j,
1182                                         ipalloc_state->known_public_ips,
1183                                         0,
1184                                         &ip_list);
1185                 if (ret != 0) {
1186                         DEBUG(DEBUG_ERR,
1187                               ("Failed to read known public IPs from node: %u\n",
1188                                j));
1189                         return -1;
1190                 }
1191                 ipalloc_state->known_public_ips[j].num = ip_list->num;
1192                 /* This could be copied and freed.  However, ip_list
1193                  * is allocated off ipalloc_state->known_public_ips,
1194                  * so this is a safe hack.  This will go away in a
1195                  * while anyway... */
1196                 ipalloc_state->known_public_ips[j].ip = &ip_list->ips[0];
1197
1198                 /* Retrieve the list of available public IPs from the node */
1199                 ret = ctdb_ctrl_get_public_ips_flags(ctdb,
1200                                         TAKEOVER_TIMEOUT(),
1201                                         j,
1202                                         ipalloc_state->available_public_ips,
1203                                         CTDB_PUBLIC_IP_FLAGS_ONLY_AVAILABLE,
1204                                         &ip_list);
1205                 if (ret != 0) {
1206                         DEBUG(DEBUG_ERR,
1207                               ("Failed to read available public IPs from node: %u\n",
1208                                j));
1209                         return -1;
1210                 }
1211                 ipalloc_state->available_public_ips[j].num = ip_list->num;
1212                 /* This could be copied and freed.  However, ip_list
1213                  * is allocated off ipalloc_state->available_public_ips,
1214                  * so this is a safe hack.  This will go away in a
1215                  * while anyway... */
1216                 ipalloc_state->available_public_ips[j].ip = &ip_list->ips[0];
1217         }
1218
1219         return 0;
1220 }
1221
1222 static struct public_ip_list *
1223 create_merged_ip_list(struct ctdb_context *ctdb, struct ipalloc_state *ipalloc_state)
1224 {
1225         int i, j;
1226         struct public_ip_list *ip_list;
1227         struct ctdb_public_ip_list *public_ips;
1228
1229         TALLOC_FREE(ctdb->ip_tree);
1230         ctdb->ip_tree = trbt_create(ctdb, 0);
1231
1232         if (ipalloc_state->known_public_ips == NULL) {
1233                 DEBUG(DEBUG_ERR, ("Known public IPs not set\n"));
1234                 return NULL;
1235         }
1236
1237         for (i=0; i < ipalloc_state->num; i++) {
1238
1239                 public_ips = &ipalloc_state->known_public_ips[i];
1240
1241                 for (j=0; j < public_ips->num; j++) {
1242                         struct public_ip_list *tmp_ip;
1243
1244                         tmp_ip = talloc_zero(ctdb->ip_tree, struct public_ip_list);
1245                         if (tmp_ip == NULL) {
1246                                 DEBUG(DEBUG_ERR,
1247                                       (__location__ " out of memory\n"));
1248                                 return NULL;
1249                         }
1250
1251                         /* Do not use information about IP addresses hosted
1252                          * on other nodes, it may not be accurate */
1253                         if (public_ips->ip[j].pnn == i) {
1254                                 tmp_ip->pnn = public_ips->ip[j].pnn;
1255                         } else {
1256                                 tmp_ip->pnn = -1;
1257                         }
1258                         tmp_ip->addr = public_ips->ip[j].addr;
1259                         tmp_ip->next = NULL;
1260
1261                         trbt_insertarray32_callback(ctdb->ip_tree,
1262                                 IP_KEYLEN, ip_key(&public_ips->ip[j].addr),
1263                                 add_ip_callback,
1264                                 tmp_ip);
1265                 }
1266         }
1267
1268         ip_list = NULL;
1269         trbt_traversearray32(ctdb->ip_tree, IP_KEYLEN, getips_count_callback, &ip_list);
1270
1271         return ip_list;
1272 }
1273
1274 static bool all_nodes_are_disabled(struct ctdb_node_map_old *nodemap)
1275 {
1276         int i;
1277
1278         for (i=0;i<nodemap->num;i++) {
1279                 if (!(nodemap->nodes[i].flags & (NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED))) {
1280                         /* Found one completely healthy node */
1281                         return false;
1282                 }
1283         }
1284
1285         return true;
1286 }
1287
1288 struct get_tunable_callback_data {
1289         const char *tunable;
1290         uint32_t *out;
1291         bool fatal;
1292 };
1293
1294 static void get_tunable_callback(struct ctdb_context *ctdb, uint32_t pnn,
1295                                  int32_t res, TDB_DATA outdata,
1296                                  void *callback)
1297 {
1298         struct get_tunable_callback_data *cd =
1299                 (struct get_tunable_callback_data *)callback;
1300         int size;
1301
1302         if (res != 0) {
1303                 /* Already handled in fail callback */
1304                 return;
1305         }
1306
1307         if (outdata.dsize != sizeof(uint32_t)) {
1308                 DEBUG(DEBUG_ERR,("Wrong size of returned data when reading \"%s\" tunable from node %d. Expected %d bytes but received %d bytes\n",
1309                                  cd->tunable, pnn, (int)sizeof(uint32_t),
1310                                  (int)outdata.dsize));
1311                 cd->fatal = true;
1312                 return;
1313         }
1314
1315         size = talloc_array_length(cd->out);
1316         if (pnn >= size) {
1317                 DEBUG(DEBUG_ERR,("Got %s reply from node %d but nodemap only has %d entries\n",
1318                                  cd->tunable, pnn, size));
1319                 return;
1320         }
1321
1322                 
1323         cd->out[pnn] = *(uint32_t *)outdata.dptr;
1324 }
1325
1326 static void get_tunable_fail_callback(struct ctdb_context *ctdb, uint32_t pnn,
1327                                        int32_t res, TDB_DATA outdata,
1328                                        void *callback)
1329 {
1330         struct get_tunable_callback_data *cd =
1331                 (struct get_tunable_callback_data *)callback;
1332
1333         switch (res) {
1334         case -ETIME:
1335                 DEBUG(DEBUG_ERR,
1336                       ("Timed out getting tunable \"%s\" from node %d\n",
1337                        cd->tunable, pnn));
1338                 cd->fatal = true;
1339                 break;
1340         case -EINVAL:
1341         case -1:
1342                 DEBUG(DEBUG_WARNING,
1343                       ("Tunable \"%s\" not implemented on node %d\n",
1344                        cd->tunable, pnn));
1345                 break;
1346         default:
1347                 DEBUG(DEBUG_ERR,
1348                       ("Unexpected error getting tunable \"%s\" from node %d\n",
1349                        cd->tunable, pnn));
1350                 cd->fatal = true;
1351         }
1352 }
1353
1354 static uint32_t *get_tunable_from_nodes(struct ctdb_context *ctdb,
1355                                         TALLOC_CTX *tmp_ctx,
1356                                         struct ctdb_node_map_old *nodemap,
1357                                         const char *tunable,
1358                                         uint32_t default_value)
1359 {
1360         TDB_DATA data;
1361         struct ctdb_control_get_tunable *t;
1362         uint32_t *nodes;
1363         uint32_t *tvals;
1364         struct get_tunable_callback_data callback_data;
1365         int i;
1366
1367         tvals = talloc_array(tmp_ctx, uint32_t, nodemap->num);
1368         CTDB_NO_MEMORY_NULL(ctdb, tvals);
1369         for (i=0; i<nodemap->num; i++) {
1370                 tvals[i] = default_value;
1371         }
1372                 
1373         callback_data.out = tvals;
1374         callback_data.tunable = tunable;
1375         callback_data.fatal = false;
1376
1377         data.dsize = offsetof(struct ctdb_control_get_tunable, name) + strlen(tunable) + 1;
1378         data.dptr  = talloc_size(tmp_ctx, data.dsize);
1379         t = (struct ctdb_control_get_tunable *)data.dptr;
1380         t->length = strlen(tunable)+1;
1381         memcpy(t->name, tunable, t->length);
1382         nodes = list_of_connected_nodes(ctdb, nodemap, tmp_ctx, true);
1383         if (ctdb_client_async_control(ctdb, CTDB_CONTROL_GET_TUNABLE,
1384                                       nodes, 0, TAKEOVER_TIMEOUT(),
1385                                       false, data,
1386                                       get_tunable_callback,
1387                                       get_tunable_fail_callback,
1388                                       &callback_data) != 0) {
1389                 if (callback_data.fatal) {
1390                         talloc_free(tvals);
1391                         tvals = NULL;
1392                 }
1393         }
1394         talloc_free(nodes);
1395         talloc_free(data.dptr);
1396
1397         return tvals;
1398 }
1399
1400 /* Set internal flags for IP allocation:
1401  *   Clear ip flags
1402  *   Set NOIPTAKOVER ip flags from per-node NoIPTakeover tunable
1403  *   Set NOIPHOST ip flag for each INACTIVE node
1404  *   if all nodes are disabled:
1405  *     Set NOIPHOST ip flags from per-node NoIPHostOnAllDisabled tunable
1406  *   else
1407  *     Set NOIPHOST ip flags for disabled nodes
1408  */
1409 static void set_ipflags_internal(struct ipalloc_state *ipalloc_state,
1410                                  struct ctdb_node_map_old *nodemap,
1411                                  uint32_t *tval_noiptakeover,
1412                                  uint32_t *tval_noiphostonalldisabled)
1413 {
1414         int i;
1415
1416         for (i=0;i<nodemap->num;i++) {
1417                 /* Can not take IPs on node with NoIPTakeover set */
1418                 if (tval_noiptakeover[i] != 0) {
1419                         ipalloc_state->noiptakeover[i] = true;
1420                 }
1421
1422                 /* Can not host IPs on INACTIVE node */
1423                 if (nodemap->nodes[i].flags & NODE_FLAGS_INACTIVE) {
1424                         ipalloc_state->noiphost[i] = true;
1425                 }
1426         }
1427
1428         if (all_nodes_are_disabled(nodemap)) {
1429                 /* If all nodes are disabled, can not host IPs on node
1430                  * with NoIPHostOnAllDisabled set
1431                  */
1432                 for (i=0;i<nodemap->num;i++) {
1433                         if (tval_noiphostonalldisabled[i] != 0) {
1434                                 ipalloc_state->noiphost[i] = true;
1435                         }
1436                 }
1437         } else {
1438                 /* If some nodes are not disabled, then can not host
1439                  * IPs on DISABLED node
1440                  */
1441                 for (i=0;i<nodemap->num;i++) {
1442                         if (nodemap->nodes[i].flags & NODE_FLAGS_DISABLED) {
1443                                 ipalloc_state->noiphost[i] = true;
1444                         }
1445                 }
1446         }
1447 }
1448
1449 static bool set_ipflags(struct ctdb_context *ctdb,
1450                         struct ipalloc_state *ipalloc_state,
1451                         struct ctdb_node_map_old *nodemap)
1452 {
1453         uint32_t *tval_noiptakeover;
1454         uint32_t *tval_noiphostonalldisabled;
1455
1456         tval_noiptakeover = get_tunable_from_nodes(ctdb, ipalloc_state, nodemap,
1457                                                    "NoIPTakeover", 0);
1458         if (tval_noiptakeover == NULL) {
1459                 return false;
1460         }
1461
1462         tval_noiphostonalldisabled =
1463                 get_tunable_from_nodes(ctdb, ipalloc_state, nodemap,
1464                                        "NoIPHostOnAllDisabled", 0);
1465         if (tval_noiphostonalldisabled == NULL) {
1466                 /* Caller frees tmp_ctx */
1467                 return false;
1468         }
1469
1470         set_ipflags_internal(ipalloc_state, nodemap,
1471                              tval_noiptakeover,
1472                              tval_noiphostonalldisabled);
1473
1474         talloc_free(tval_noiptakeover);
1475         talloc_free(tval_noiphostonalldisabled);
1476
1477         return true;
1478 }
1479
1480 static struct ipalloc_state * ipalloc_state_init(struct ctdb_context *ctdb,
1481                                                  TALLOC_CTX *mem_ctx)
1482 {
1483         struct ipalloc_state *ipalloc_state =
1484                 talloc_zero(mem_ctx, struct ipalloc_state);
1485         if (ipalloc_state == NULL) {
1486                 DEBUG(DEBUG_ERR, (__location__ " Out of memory\n"));
1487                 return NULL;
1488         }
1489
1490         ipalloc_state->num = ctdb->num_nodes;
1491
1492         ipalloc_state->known_public_ips =
1493                 talloc_zero_array(ipalloc_state,
1494                                   struct ctdb_public_ip_list,
1495                                   ipalloc_state->num);
1496         if (ipalloc_state->known_public_ips == NULL) {
1497                 DEBUG(DEBUG_ERR, (__location__ " Out of memory\n"));
1498                 goto fail;
1499         }
1500
1501         ipalloc_state->available_public_ips =
1502                 talloc_zero_array(ipalloc_state,
1503                                   struct ctdb_public_ip_list,
1504                                   ipalloc_state->num);
1505         if (ipalloc_state->available_public_ips == NULL) {
1506                 DEBUG(DEBUG_ERR, (__location__ " Out of memory\n"));
1507                 goto fail;
1508         }
1509         ipalloc_state->noiptakeover =
1510                 talloc_zero_array(ipalloc_state,
1511                                   bool,
1512                                   ipalloc_state->num);
1513         if (ipalloc_state->noiptakeover == NULL) {
1514                 DEBUG(DEBUG_ERR, (__location__ " Out of memory\n"));
1515                 goto fail;
1516         }
1517         ipalloc_state->noiphost =
1518                 talloc_zero_array(ipalloc_state,
1519                                   bool,
1520                                   ipalloc_state->num);
1521         if (ipalloc_state->noiphost == NULL) {
1522                 DEBUG(DEBUG_ERR, (__location__ " Out of memory\n"));
1523                 goto fail;
1524         }
1525
1526         if (1 == ctdb->tunable.lcp2_public_ip_assignment) {
1527                 ipalloc_state->algorithm = IPALLOC_LCP2;
1528         } else if (1 == ctdb->tunable.deterministic_public_ips) {
1529                 ipalloc_state->algorithm = IPALLOC_DETERMINISTIC;
1530         } else {
1531                 ipalloc_state->algorithm = IPALLOC_NONDETERMINISTIC;
1532         }
1533
1534         ipalloc_state->no_ip_failback = ctdb->tunable.no_ip_failback;
1535
1536         return ipalloc_state;
1537 fail:
1538         talloc_free(ipalloc_state);
1539         return NULL;
1540 }
1541
1542 struct takeover_callback_data {
1543         uint32_t num_nodes;
1544         unsigned int *fail_count;
1545 };
1546
1547 static struct takeover_callback_data *
1548 takeover_callback_data_init(TALLOC_CTX *mem_ctx,
1549                             uint32_t num_nodes)
1550 {
1551         static struct takeover_callback_data *takeover_data;
1552
1553         takeover_data = talloc_zero(mem_ctx, struct takeover_callback_data);
1554         if (takeover_data == NULL) {
1555                 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
1556                 return NULL;
1557         }
1558
1559         takeover_data->fail_count = talloc_zero_array(takeover_data,
1560                                                       unsigned int, num_nodes);
1561         if (takeover_data->fail_count == NULL) {
1562                 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
1563                 talloc_free(takeover_data);
1564                 return NULL;
1565         }
1566
1567         takeover_data->num_nodes = num_nodes;
1568
1569         return takeover_data;
1570 }
1571
1572 static void takeover_run_fail_callback(struct ctdb_context *ctdb,
1573                                        uint32_t node_pnn, int32_t res,
1574                                        TDB_DATA outdata, void *callback_data)
1575 {
1576         struct takeover_callback_data *cd =
1577                 talloc_get_type_abort(callback_data,
1578                                       struct takeover_callback_data);
1579
1580         if (node_pnn >= cd->num_nodes) {
1581                 DEBUG(DEBUG_ERR, (__location__ " invalid PNN %u\n", node_pnn));
1582                 return;
1583         }
1584
1585         if (cd->fail_count[node_pnn] == 0) {
1586                 DEBUG(DEBUG_ERR,
1587                       ("Node %u failed the takeover run\n", node_pnn));
1588         }
1589
1590         cd->fail_count[node_pnn]++;
1591 }
1592
1593 static void takeover_run_process_failures(struct ctdb_context *ctdb,
1594                                           struct takeover_callback_data *tcd)
1595 {
1596         unsigned int max_fails = 0;
1597         uint32_t max_pnn = -1;
1598         uint32_t i;
1599
1600         for (i = 0; i < tcd->num_nodes; i++) {
1601                 if (tcd->fail_count[i] > max_fails) {
1602                         max_pnn = i;
1603                         max_fails = tcd->fail_count[i];
1604                 }
1605         }
1606
1607         if (max_fails > 0) {
1608                 int ret;
1609                 TDB_DATA data;
1610
1611                 DEBUG(DEBUG_ERR,
1612                       ("Sending banning credits to %u with fail count %u\n",
1613                        max_pnn, max_fails));
1614
1615                 data.dptr = (uint8_t *)&max_pnn;
1616                 data.dsize = sizeof(uint32_t);
1617                 ret = ctdb_client_send_message(ctdb,
1618                                                CTDB_BROADCAST_CONNECTED,
1619                                                CTDB_SRVID_BANNING,
1620                                                data);
1621                 if (ret != 0) {
1622                         DEBUG(DEBUG_ERR,
1623                               ("Failed to set banning credits for node %u\n",
1624                                max_pnn));
1625                 }
1626         }
1627 }
1628
1629 /*
1630  * Recalculate the allocation of public IPs to nodes and have the
1631  * nodes host their allocated addresses.
1632  *
1633  * - Allocate memory for IP allocation state, including per node
1634  *   arrays
1635  * - Populate IP allocation algorithm in IP allocation state
1636  * - Populate local value of tunable NoIPFailback in IP allocation
1637      state - this is really a cluster-wide configuration variable and
1638      only the value form the master node is used
1639  * - Retrieve tunables NoIPTakeover and NoIPHostOnAllDisabled from all
1640  *   connected nodes - this is done separately so tunable values can
1641  *   be faked in unit testing
1642  * - Populate NoIPTakover tunable in IP allocation state
1643  * - Populate NoIPHost in IP allocation state, derived from node flags
1644  *   and NoIPHostOnAllDisabled tunable
1645  * - Retrieve and populate known and available IP lists in IP
1646  *   allocation state
1647  * - If no available IP addresses then early exit
1648  * - Build list of (known IPs, currently assigned node)
1649  * - Populate list of nodes to force rebalance - internal structure,
1650  *   currently no way to fetch, only used by LCP2 for nodes that have
1651  *   had new IP addresses added
1652  * - Run IP allocation algorithm
1653  * - Send RELEASE_IP to all nodes for IPs they should not host
1654  * - Send TAKE_IP to all nodes for IPs they should host
1655  * - Send IPREALLOCATED to all nodes (with backward compatibility hack)
1656  */
1657 int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map_old *nodemap,
1658                       uint32_t *force_rebalance_nodes)
1659 {
1660         int i, ret;
1661         struct ctdb_public_ip ip;
1662         uint32_t *nodes;
1663         struct public_ip_list *all_ips, *tmp_ip;
1664         TDB_DATA data;
1665         struct timeval timeout;
1666         struct client_async_data *async_data;
1667         struct ctdb_client_control_state *state;
1668         TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
1669         struct ipalloc_state *ipalloc_state;
1670         struct takeover_callback_data *takeover_data;
1671         bool can_host_ips;
1672
1673         /* Initialise fail callback data to be used with
1674          * takeover_run_fail_callback().  A failure in any of the
1675          * following steps will cause an early return, so this can be
1676          * reused for each of those steps without re-initialising. */
1677         takeover_data = takeover_callback_data_init(tmp_ctx,
1678                                                     nodemap->num);
1679         if (takeover_data == NULL) {
1680                 talloc_free(tmp_ctx);
1681                 return -1;
1682         }
1683
1684         /*
1685          * ip failover is completely disabled, just send out the 
1686          * ipreallocated event.
1687          */
1688         if (ctdb->tunable.disable_ip_failover != 0) {
1689                 goto ipreallocated;
1690         }
1691
1692         ipalloc_state = ipalloc_state_init(ctdb, tmp_ctx);
1693         if (ipalloc_state == NULL) {
1694                 talloc_free(tmp_ctx);
1695                 return -1;
1696         }
1697
1698         if (!set_ipflags(ctdb, ipalloc_state, nodemap)) {
1699                 DEBUG(DEBUG_ERR,("Failed to set IP flags - aborting takeover run\n"));
1700                 talloc_free(tmp_ctx);
1701                 return -1;
1702         }
1703
1704         /* Fetch known/available public IPs from each active node */
1705         ret = ctdb_reload_remote_public_ips(ctdb, ipalloc_state, nodemap);
1706         if (ret != 0) {
1707                 talloc_free(tmp_ctx);
1708                 return -1;
1709         }
1710
1711         /* Short-circuit IP allocation if no node has available IPs */
1712         can_host_ips = false;
1713         for (i=0; i < ipalloc_state->num; i++) {
1714                 if (ipalloc_state->available_public_ips[i].num != 0) {
1715                         can_host_ips = true;
1716                 }
1717         }
1718         if (!can_host_ips) {
1719                 DEBUG(DEBUG_WARNING,("No nodes available to host public IPs yet\n"));
1720                 goto ipreallocated;
1721         }
1722
1723         /* since nodes only know about those public addresses that
1724            can be served by that particular node, no single node has
1725            a full list of all public addresses that exist in the cluster.
1726            Walk over all node structures and create a merged list of
1727            all public addresses that exist in the cluster.
1728
1729            keep the tree of ips around as ctdb->ip_tree
1730         */
1731         all_ips = create_merged_ip_list(ctdb, ipalloc_state);
1732         ipalloc_state->all_ips = all_ips;
1733
1734         ipalloc_state->force_rebalance_nodes = force_rebalance_nodes;
1735
1736         /* Do the IP reassignment calculations */
1737         ipalloc(ipalloc_state);
1738
1739         /* Now tell all nodes to release any public IPs should not
1740          * host.  This will be a NOOP on nodes that don't currently
1741          * hold the given IP.
1742          */
1743         async_data = talloc_zero(tmp_ctx, struct client_async_data);
1744         CTDB_NO_MEMORY_FATAL(ctdb, async_data);
1745
1746         async_data->fail_callback = takeover_run_fail_callback;
1747         async_data->callback_data = takeover_data;
1748
1749         ZERO_STRUCT(ip); /* Avoid valgrind warnings for union */
1750
1751         /* Send a RELEASE_IP to all nodes that should not be hosting
1752          * each IP.  For each IP, all but one of these will be
1753          * redundant.  However, the redundant ones are used to tell
1754          * nodes which node should be hosting the IP so that commands
1755          * like "ctdb ip" can display a particular nodes idea of who
1756          * is hosting what. */
1757         for (i=0;i<nodemap->num;i++) {
1758                 /* don't talk to unconnected nodes, but do talk to banned nodes */
1759                 if (nodemap->nodes[i].flags & NODE_FLAGS_DISCONNECTED) {
1760                         continue;
1761                 }
1762
1763                 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1764                         if (tmp_ip->pnn == nodemap->nodes[i].pnn) {
1765                                 /* This node should be serving this
1766                                    vnn so don't tell it to release the ip
1767                                 */
1768                                 continue;
1769                         }
1770                         ip.pnn  = tmp_ip->pnn;
1771                         ip.addr = tmp_ip->addr;
1772
1773                         timeout = TAKEOVER_TIMEOUT();
1774                         data.dsize = sizeof(ip);
1775                         data.dptr  = (uint8_t *)&ip;
1776                         state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
1777                                                   0, CTDB_CONTROL_RELEASE_IP, 0,
1778                                                   data, async_data,
1779                                                   &timeout, NULL);
1780                         if (state == NULL) {
1781                                 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_RELEASE_IP to node %u\n", nodemap->nodes[i].pnn));
1782                                 talloc_free(tmp_ctx);
1783                                 return -1;
1784                         }
1785
1786                         ctdb_client_async_add(async_data, state);
1787                 }
1788         }
1789         if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1790                 DEBUG(DEBUG_ERR,
1791                       ("Async control CTDB_CONTROL_RELEASE_IP failed\n"));
1792                 goto fail;
1793         }
1794         talloc_free(async_data);
1795
1796
1797         /* For each IP, send a TAKOVER_IP to the node that should be
1798          * hosting it.  Many of these will often be redundant (since
1799          * the allocation won't have changed) but they can be useful
1800          * to recover from inconsistencies. */
1801         async_data = talloc_zero(tmp_ctx, struct client_async_data);
1802         CTDB_NO_MEMORY_FATAL(ctdb, async_data);
1803
1804         async_data->fail_callback = takeover_run_fail_callback;
1805         async_data->callback_data = takeover_data;
1806
1807         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1808                 if (tmp_ip->pnn == -1) {
1809                         /* this IP won't be taken over */
1810                         continue;
1811                 }
1812
1813                 ip.pnn  = tmp_ip->pnn;
1814                 ip.addr = tmp_ip->addr;
1815
1816                 timeout = TAKEOVER_TIMEOUT();
1817                 data.dsize = sizeof(ip);
1818                 data.dptr  = (uint8_t *)&ip;
1819                 state = ctdb_control_send(ctdb, tmp_ip->pnn,
1820                                           0, CTDB_CONTROL_TAKEOVER_IP, 0,
1821                                           data, async_data, &timeout, NULL);
1822                 if (state == NULL) {
1823                         DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_TAKEOVER_IP to node %u\n", tmp_ip->pnn));
1824                         talloc_free(tmp_ctx);
1825                         return -1;
1826                 }
1827
1828                 ctdb_client_async_add(async_data, state);
1829         }
1830         if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1831                 DEBUG(DEBUG_ERR,
1832                       ("Async control CTDB_CONTROL_TAKEOVER_IP failed\n"));
1833                 goto fail;
1834         }
1835
1836 ipreallocated:
1837         /*
1838          * Tell all nodes to run eventscripts to process the
1839          * "ipreallocated" event.  This can do a lot of things,
1840          * including restarting services to reconfigure them if public
1841          * IPs have moved.  Once upon a time this event only used to
1842          * update natgw.
1843          */
1844         nodes = list_of_connected_nodes(ctdb, nodemap, tmp_ctx, true);
1845         ret = ctdb_client_async_control(ctdb, CTDB_CONTROL_IPREALLOCATED,
1846                                         nodes, 0, TAKEOVER_TIMEOUT(),
1847                                         false, tdb_null,
1848                                         NULL, takeover_run_fail_callback,
1849                                         takeover_data);
1850         if (ret != 0) {
1851                 DEBUG(DEBUG_ERR,
1852                       ("Async CTDB_CONTROL_IPREALLOCATED control failed\n"));
1853                 goto fail;
1854         }
1855
1856         talloc_free(tmp_ctx);
1857         return ret;
1858
1859 fail:
1860         takeover_run_process_failures(ctdb, takeover_data);
1861         talloc_free(tmp_ctx);
1862         return -1;
1863 }
1864
1865
1866 /*
1867   destroy a ctdb_client_ip structure
1868  */
1869 static int ctdb_client_ip_destructor(struct ctdb_client_ip *ip)
1870 {
1871         DEBUG(DEBUG_DEBUG,("destroying client tcp for %s:%u (client_id %u)\n",
1872                 ctdb_addr_to_str(&ip->addr),
1873                 ntohs(ip->addr.ip.sin_port),
1874                 ip->client_id));
1875
1876         DLIST_REMOVE(ip->ctdb->client_ip_list, ip);
1877         return 0;
1878 }
1879
1880 /*
1881   called by a client to inform us of a TCP connection that it is managing
1882   that should tickled with an ACK when IP takeover is done
1883  */
1884 int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id,
1885                                 TDB_DATA indata)
1886 {
1887         struct ctdb_client *client = reqid_find(ctdb->idr, client_id, struct ctdb_client);
1888         struct ctdb_connection *tcp_sock = NULL;
1889         struct ctdb_tcp_list *tcp;
1890         struct ctdb_connection t;
1891         int ret;
1892         TDB_DATA data;
1893         struct ctdb_client_ip *ip;
1894         struct ctdb_vnn *vnn;
1895         ctdb_sock_addr addr;
1896
1897         /* If we don't have public IPs, tickles are useless */
1898         if (ctdb->vnn == NULL) {
1899                 return 0;
1900         }
1901
1902         tcp_sock = (struct ctdb_connection *)indata.dptr;
1903
1904         addr = tcp_sock->src;
1905         ctdb_canonicalize_ip(&addr,  &tcp_sock->src);
1906         addr = tcp_sock->dst;
1907         ctdb_canonicalize_ip(&addr, &tcp_sock->dst);
1908
1909         ZERO_STRUCT(addr);
1910         memcpy(&addr, &tcp_sock->dst, sizeof(addr));
1911         vnn = find_public_ip_vnn(ctdb, &addr);
1912         if (vnn == NULL) {
1913                 switch (addr.sa.sa_family) {
1914                 case AF_INET:
1915                         if (ntohl(addr.ip.sin_addr.s_addr) != INADDR_LOOPBACK) {
1916                                 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public address.\n", 
1917                                         ctdb_addr_to_str(&addr)));
1918                         }
1919                         break;
1920                 case AF_INET6:
1921                         DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public ipv6 address.\n", 
1922                                 ctdb_addr_to_str(&addr)));
1923                         break;
1924                 default:
1925                         DEBUG(DEBUG_ERR,(__location__ " Unknown family type %d\n", addr.sa.sa_family));
1926                 }
1927
1928                 return 0;
1929         }
1930
1931         if (vnn->pnn != ctdb->pnn) {
1932                 DEBUG(DEBUG_ERR,("Attempt to register tcp client for IP %s we don't hold - failing (client_id %u pid %u)\n",
1933                         ctdb_addr_to_str(&addr),
1934                         client_id, client->pid));
1935                 /* failing this call will tell smbd to die */
1936                 return -1;
1937         }
1938
1939         ip = talloc(client, struct ctdb_client_ip);
1940         CTDB_NO_MEMORY(ctdb, ip);
1941
1942         ip->ctdb      = ctdb;
1943         ip->addr      = addr;
1944         ip->client_id = client_id;
1945         talloc_set_destructor(ip, ctdb_client_ip_destructor);
1946         DLIST_ADD(ctdb->client_ip_list, ip);
1947
1948         tcp = talloc(client, struct ctdb_tcp_list);
1949         CTDB_NO_MEMORY(ctdb, tcp);
1950
1951         tcp->connection.src = tcp_sock->src;
1952         tcp->connection.dst = tcp_sock->dst;
1953
1954         DLIST_ADD(client->tcp_list, tcp);
1955
1956         t.src = tcp_sock->src;
1957         t.dst = tcp_sock->dst;
1958
1959         data.dptr = (uint8_t *)&t;
1960         data.dsize = sizeof(t);
1961
1962         switch (addr.sa.sa_family) {
1963         case AF_INET:
1964                 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1965                         (unsigned)ntohs(tcp_sock->dst.ip.sin_port),
1966                         ctdb_addr_to_str(&tcp_sock->src),
1967                         (unsigned)ntohs(tcp_sock->src.ip.sin_port), client_id, client->pid));
1968                 break;
1969         case AF_INET6:
1970                 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1971                         (unsigned)ntohs(tcp_sock->dst.ip6.sin6_port),
1972                         ctdb_addr_to_str(&tcp_sock->src),
1973                         (unsigned)ntohs(tcp_sock->src.ip6.sin6_port), client_id, client->pid));
1974                 break;
1975         default:
1976                 DEBUG(DEBUG_ERR,(__location__ " Unknown family %d\n", addr.sa.sa_family));
1977         }
1978
1979
1980         /* tell all nodes about this tcp connection */
1981         ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0, 
1982                                        CTDB_CONTROL_TCP_ADD,
1983                                        0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
1984         if (ret != 0) {
1985                 DEBUG(DEBUG_ERR,(__location__ " Failed to send CTDB_CONTROL_TCP_ADD\n"));
1986                 return -1;
1987         }
1988
1989         return 0;
1990 }
1991
1992 /*
1993   find a tcp address on a list
1994  */
1995 static struct ctdb_connection *ctdb_tcp_find(struct ctdb_tcp_array *array,
1996                                            struct ctdb_connection *tcp)
1997 {
1998         int i;
1999
2000         if (array == NULL) {
2001                 return NULL;
2002         }
2003
2004         for (i=0;i<array->num;i++) {
2005                 if (ctdb_same_sockaddr(&array->connections[i].src, &tcp->src) &&
2006                     ctdb_same_sockaddr(&array->connections[i].dst, &tcp->dst)) {
2007                         return &array->connections[i];
2008                 }
2009         }
2010         return NULL;
2011 }
2012
2013
2014
2015 /*
2016   called by a daemon to inform us of a TCP connection that one of its
2017   clients managing that should tickled with an ACK when IP takeover is
2018   done
2019  */
2020 int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata, bool tcp_update_needed)
2021 {
2022         struct ctdb_connection *p = (struct ctdb_connection *)indata.dptr;
2023         struct ctdb_tcp_array *tcparray;
2024         struct ctdb_connection tcp;
2025         struct ctdb_vnn *vnn;
2026
2027         /* If we don't have public IPs, tickles are useless */
2028         if (ctdb->vnn == NULL) {
2029                 return 0;
2030         }
2031
2032         vnn = find_public_ip_vnn(ctdb, &p->dst);
2033         if (vnn == NULL) {
2034                 DEBUG(DEBUG_INFO,(__location__ " got TCP_ADD control for an address which is not a public address '%s'\n",
2035                         ctdb_addr_to_str(&p->dst)));
2036
2037                 return -1;
2038         }
2039
2040
2041         tcparray = vnn->tcp_array;
2042
2043         /* If this is the first tickle */
2044         if (tcparray == NULL) {
2045                 tcparray = talloc(vnn, struct ctdb_tcp_array);
2046                 CTDB_NO_MEMORY(ctdb, tcparray);
2047                 vnn->tcp_array = tcparray;
2048
2049                 tcparray->num = 0;
2050                 tcparray->connections = talloc_size(tcparray, sizeof(struct ctdb_connection));
2051                 CTDB_NO_MEMORY(ctdb, tcparray->connections);
2052
2053                 tcparray->connections[tcparray->num].src = p->src;
2054                 tcparray->connections[tcparray->num].dst = p->dst;
2055                 tcparray->num++;
2056
2057                 if (tcp_update_needed) {
2058                         vnn->tcp_update_needed = true;
2059                 }
2060                 return 0;
2061         }
2062
2063
2064         /* Do we already have this tickle ?*/
2065         tcp.src = p->src;
2066         tcp.dst = p->dst;
2067         if (ctdb_tcp_find(tcparray, &tcp) != NULL) {
2068                 DEBUG(DEBUG_DEBUG,("Already had tickle info for %s:%u for vnn:%u\n",
2069                         ctdb_addr_to_str(&tcp.dst),
2070                         ntohs(tcp.dst.ip.sin_port),
2071                         vnn->pnn));
2072                 return 0;
2073         }
2074
2075         /* A new tickle, we must add it to the array */
2076         tcparray->connections = talloc_realloc(tcparray, tcparray->connections,
2077                                         struct ctdb_connection,
2078                                         tcparray->num+1);
2079         CTDB_NO_MEMORY(ctdb, tcparray->connections);
2080
2081         tcparray->connections[tcparray->num].src = p->src;
2082         tcparray->connections[tcparray->num].dst = p->dst;
2083         tcparray->num++;
2084
2085         DEBUG(DEBUG_INFO,("Added tickle info for %s:%u from vnn %u\n",
2086                 ctdb_addr_to_str(&tcp.dst),
2087                 ntohs(tcp.dst.ip.sin_port),
2088                 vnn->pnn));
2089
2090         if (tcp_update_needed) {
2091                 vnn->tcp_update_needed = true;
2092         }
2093
2094         return 0;
2095 }
2096
2097
2098 static void ctdb_remove_connection(struct ctdb_vnn *vnn, struct ctdb_connection *conn)
2099 {
2100         struct ctdb_connection *tcpp;
2101
2102         if (vnn == NULL) {
2103                 return;
2104         }
2105
2106         /* if the array is empty we cant remove it
2107            and we don't need to do anything
2108          */
2109         if (vnn->tcp_array == NULL) {
2110                 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist (array is empty) %s:%u\n",
2111                         ctdb_addr_to_str(&conn->dst),
2112                         ntohs(conn->dst.ip.sin_port)));
2113                 return;
2114         }
2115
2116
2117         /* See if we know this connection
2118            if we don't know this connection  then we dont need to do anything
2119          */
2120         tcpp = ctdb_tcp_find(vnn->tcp_array, conn);
2121         if (tcpp == NULL) {
2122                 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist %s:%u\n",
2123                         ctdb_addr_to_str(&conn->dst),
2124                         ntohs(conn->dst.ip.sin_port)));
2125                 return;
2126         }
2127
2128
2129         /* We need to remove this entry from the array.
2130            Instead of allocating a new array and copying data to it
2131            we cheat and just copy the last entry in the existing array
2132            to the entry that is to be removed and just shring the 
2133            ->num field
2134          */
2135         *tcpp = vnn->tcp_array->connections[vnn->tcp_array->num - 1];
2136         vnn->tcp_array->num--;
2137
2138         /* If we deleted the last entry we also need to remove the entire array
2139          */
2140         if (vnn->tcp_array->num == 0) {
2141                 talloc_free(vnn->tcp_array);
2142                 vnn->tcp_array = NULL;
2143         }               
2144
2145         vnn->tcp_update_needed = true;
2146
2147         DEBUG(DEBUG_INFO,("Removed tickle info for %s:%u\n",
2148                 ctdb_addr_to_str(&conn->src),
2149                 ntohs(conn->src.ip.sin_port)));
2150 }
2151
2152
2153 /*
2154   called by a daemon to inform us of a TCP connection that one of its
2155   clients used are no longer needed in the tickle database
2156  */
2157 int32_t ctdb_control_tcp_remove(struct ctdb_context *ctdb, TDB_DATA indata)
2158 {
2159         struct ctdb_vnn *vnn;
2160         struct ctdb_connection *conn = (struct ctdb_connection *)indata.dptr;
2161
2162         /* If we don't have public IPs, tickles are useless */
2163         if (ctdb->vnn == NULL) {
2164                 return 0;
2165         }
2166
2167         vnn = find_public_ip_vnn(ctdb, &conn->dst);
2168         if (vnn == NULL) {
2169                 DEBUG(DEBUG_ERR,
2170                       (__location__ " unable to find public address %s\n",
2171                        ctdb_addr_to_str(&conn->dst)));
2172                 return 0;
2173         }
2174
2175         ctdb_remove_connection(vnn, conn);
2176
2177         return 0;
2178 }
2179
2180
2181 /*
2182   Called when another daemon starts - causes all tickles for all
2183   public addresses we are serving to be sent to the new node on the
2184   next check.  This actually causes the next scheduled call to
2185   tdb_update_tcp_tickles() to update all nodes.  This is simple and
2186   doesn't require careful error handling.
2187  */
2188 int32_t ctdb_control_startup(struct ctdb_context *ctdb, uint32_t pnn)
2189 {
2190         struct ctdb_vnn *vnn;
2191
2192         DEBUG(DEBUG_INFO, ("Received startup control from node %lu\n",
2193                            (unsigned long) pnn));
2194
2195         for (vnn = ctdb->vnn; vnn != NULL; vnn = vnn->next) {
2196                 vnn->tcp_update_needed = true;
2197         }
2198
2199         return 0;
2200 }
2201
2202
2203 /*
2204   called when a client structure goes away - hook to remove
2205   elements from the tcp_list in all daemons
2206  */
2207 void ctdb_takeover_client_destructor_hook(struct ctdb_client *client)
2208 {
2209         while (client->tcp_list) {
2210                 struct ctdb_vnn *vnn;
2211                 struct ctdb_tcp_list *tcp = client->tcp_list;
2212                 struct ctdb_connection *conn = &tcp->connection;
2213
2214                 DLIST_REMOVE(client->tcp_list, tcp);
2215
2216                 vnn = find_public_ip_vnn(client->ctdb,
2217                                          &conn->dst);
2218                 if (vnn == NULL) {
2219                         DEBUG(DEBUG_ERR,
2220                               (__location__ " unable to find public address %s\n",
2221                                ctdb_addr_to_str(&conn->dst)));
2222                         continue;
2223                 }
2224
2225                 /* If the IP address is hosted on this node then
2226                  * remove the connection. */
2227                 if (vnn->pnn == client->ctdb->pnn) {
2228                         ctdb_remove_connection(vnn, conn);
2229                 }
2230
2231                 /* Otherwise this function has been called because the
2232                  * server IP address has been released to another node
2233                  * and the client has exited.  This means that we
2234                  * should not delete the connection information.  The
2235                  * takeover node processes connections too. */
2236         }
2237 }
2238
2239
2240 void ctdb_release_all_ips(struct ctdb_context *ctdb)
2241 {
2242         struct ctdb_vnn *vnn;
2243         int count = 0;
2244         TDB_DATA data;
2245
2246         if (ctdb->tunable.disable_ip_failover == 1) {
2247                 return;
2248         }
2249
2250         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2251                 if (!ctdb_sys_have_ip(&vnn->public_address)) {
2252                         ctdb_vnn_unassign_iface(ctdb, vnn);
2253                         continue;
2254                 }
2255                 if (!vnn->iface) {
2256                         continue;
2257                 }
2258
2259                 /* Don't allow multiple releases at once.  Some code,
2260                  * particularly ctdb_tickle_sentenced_connections() is
2261                  * not re-entrant */
2262                 if (vnn->update_in_flight) {
2263                         DEBUG(DEBUG_WARNING,
2264                               (__location__
2265                                " Not releasing IP %s/%u on interface %s, an update is already in progess\n",
2266                                     ctdb_addr_to_str(&vnn->public_address),
2267                                     vnn->public_netmask_bits,
2268                                     ctdb_vnn_iface_string(vnn)));
2269                         continue;
2270                 }
2271                 vnn->update_in_flight = true;
2272
2273                 DEBUG(DEBUG_INFO,("Release of IP %s/%u on interface %s node:-1\n",
2274                                     ctdb_addr_to_str(&vnn->public_address),
2275                                     vnn->public_netmask_bits,
2276                                     ctdb_vnn_iface_string(vnn)));
2277
2278                 ctdb_event_script_args(ctdb, CTDB_EVENT_RELEASE_IP, "%s %s %u",
2279                                   ctdb_vnn_iface_string(vnn),
2280                                   ctdb_addr_to_str(&vnn->public_address),
2281                                   vnn->public_netmask_bits);
2282
2283                 data.dptr = (uint8_t *)talloc_strdup(
2284                                 vnn, ctdb_addr_to_str(&vnn->public_address));
2285                 if (data.dptr != NULL) {
2286                         data.dsize = strlen((char *)data.dptr) + 1;
2287                         ctdb_daemon_send_message(ctdb, ctdb->pnn,
2288                                                  CTDB_SRVID_RELEASE_IP, data);
2289                         talloc_free(data.dptr);
2290                 }
2291
2292                 ctdb_vnn_unassign_iface(ctdb, vnn);
2293                 vnn->update_in_flight = false;
2294                 count++;
2295         }
2296
2297         DEBUG(DEBUG_NOTICE,(__location__ " Released %d public IPs\n", count));
2298 }
2299
2300
2301 /*
2302   get list of public IPs
2303  */
2304 int32_t ctdb_control_get_public_ips(struct ctdb_context *ctdb, 
2305                                     struct ctdb_req_control_old *c, TDB_DATA *outdata)
2306 {
2307         int i, num, len;
2308         struct ctdb_public_ip_list_old *ips;
2309         struct ctdb_vnn *vnn;
2310         bool only_available = false;
2311
2312         if (c->flags & CTDB_PUBLIC_IP_FLAGS_ONLY_AVAILABLE) {
2313                 only_available = true;
2314         }
2315
2316         /* count how many public ip structures we have */
2317         num = 0;
2318         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2319                 num++;
2320         }
2321
2322         len = offsetof(struct ctdb_public_ip_list_old, ips) +
2323                 num*sizeof(struct ctdb_public_ip);
2324         ips = talloc_zero_size(outdata, len);
2325         CTDB_NO_MEMORY(ctdb, ips);
2326
2327         i = 0;
2328         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2329                 if (only_available && !ctdb_vnn_available(ctdb, vnn)) {
2330                         continue;
2331                 }
2332                 ips->ips[i].pnn  = vnn->pnn;
2333                 ips->ips[i].addr = vnn->public_address;
2334                 i++;
2335         }
2336         ips->num = i;
2337         len = offsetof(struct ctdb_public_ip_list_old, ips) +
2338                 i*sizeof(struct ctdb_public_ip);
2339
2340         outdata->dsize = len;
2341         outdata->dptr  = (uint8_t *)ips;
2342
2343         return 0;
2344 }
2345
2346
2347 int32_t ctdb_control_get_public_ip_info(struct ctdb_context *ctdb,
2348                                         struct ctdb_req_control_old *c,
2349                                         TDB_DATA indata,
2350                                         TDB_DATA *outdata)
2351 {
2352         int i, num, len;
2353         ctdb_sock_addr *addr;
2354         struct ctdb_public_ip_info_old *info;
2355         struct ctdb_vnn *vnn;
2356
2357         addr = (ctdb_sock_addr *)indata.dptr;
2358
2359         vnn = find_public_ip_vnn(ctdb, addr);
2360         if (vnn == NULL) {
2361                 DEBUG(DEBUG_ERR,(__location__ " Could not get public ip info, "
2362                                  "'%s'not a public address\n",
2363                                  ctdb_addr_to_str(addr)));
2364                 return -1;
2365         }
2366
2367         /* count how many public ip structures we have */
2368         num = 0;
2369         for (;vnn->ifaces[num];) {
2370                 num++;
2371         }
2372
2373         len = offsetof(struct ctdb_public_ip_info_old, ifaces) +
2374                 num*sizeof(struct ctdb_iface);
2375         info = talloc_zero_size(outdata, len);
2376         CTDB_NO_MEMORY(ctdb, info);
2377
2378         info->ip.addr = vnn->public_address;
2379         info->ip.pnn = vnn->pnn;
2380         info->active_idx = 0xFFFFFFFF;
2381
2382         for (i=0; vnn->ifaces[i]; i++) {
2383                 struct ctdb_interface *cur;
2384
2385                 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
2386                 if (cur == NULL) {
2387                         DEBUG(DEBUG_CRIT, (__location__ " internal error iface[%s] unknown\n",
2388                                            vnn->ifaces[i]));
2389                         return -1;
2390                 }
2391                 if (vnn->iface == cur) {
2392                         info->active_idx = i;
2393                 }
2394                 strncpy(info->ifaces[i].name, cur->name,
2395                         sizeof(info->ifaces[i].name));
2396                 info->ifaces[i].name[sizeof(info->ifaces[i].name)-1] = '\0';
2397                 info->ifaces[i].link_state = cur->link_up;
2398                 info->ifaces[i].references = cur->references;
2399         }
2400         info->num = i;
2401         len = offsetof(struct ctdb_public_ip_info_old, ifaces) +
2402                 i*sizeof(struct ctdb_iface);
2403
2404         outdata->dsize = len;
2405         outdata->dptr  = (uint8_t *)info;
2406
2407         return 0;
2408 }
2409
2410 int32_t ctdb_control_get_ifaces(struct ctdb_context *ctdb,
2411                                 struct ctdb_req_control_old *c,
2412                                 TDB_DATA *outdata)
2413 {
2414         int i, num, len;
2415         struct ctdb_iface_list_old *ifaces;
2416         struct ctdb_interface *cur;
2417
2418         /* count how many public ip structures we have */
2419         num = 0;
2420         for (cur=ctdb->ifaces;cur;cur=cur->next) {
2421                 num++;
2422         }
2423
2424         len = offsetof(struct ctdb_iface_list_old, ifaces) +
2425                 num*sizeof(struct ctdb_iface);
2426         ifaces = talloc_zero_size(outdata, len);
2427         CTDB_NO_MEMORY(ctdb, ifaces);
2428
2429         i = 0;
2430         for (cur=ctdb->ifaces;cur;cur=cur->next) {
2431                 strncpy(ifaces->ifaces[i].name, cur->name,
2432                         sizeof(ifaces->ifaces[i].name));
2433                 ifaces->ifaces[i].name[sizeof(ifaces->ifaces[i].name)-1] = '\0';
2434                 ifaces->ifaces[i].link_state = cur->link_up;
2435                 ifaces->ifaces[i].references = cur->references;
2436                 i++;
2437         }
2438         ifaces->num = i;
2439         len = offsetof(struct ctdb_iface_list_old, ifaces) +
2440                 i*sizeof(struct ctdb_iface);
2441
2442         outdata->dsize = len;
2443         outdata->dptr  = (uint8_t *)ifaces;
2444
2445         return 0;
2446 }
2447
2448 int32_t ctdb_control_set_iface_link(struct ctdb_context *ctdb,
2449                                     struct ctdb_req_control_old *c,
2450                                     TDB_DATA indata)
2451 {
2452         struct ctdb_iface *info;
2453         struct ctdb_interface *iface;
2454         bool link_up = false;
2455
2456         info = (struct ctdb_iface *)indata.dptr;
2457
2458         if (info->name[CTDB_IFACE_SIZE] != '\0') {
2459                 int len = strnlen(info->name, CTDB_IFACE_SIZE);
2460                 DEBUG(DEBUG_ERR, (__location__ " name[%*.*s] not terminated\n",
2461                                   len, len, info->name));
2462                 return -1;
2463         }
2464
2465         switch (info->link_state) {
2466         case 0:
2467                 link_up = false;
2468                 break;
2469         case 1:
2470                 link_up = true;
2471                 break;
2472         default:
2473                 DEBUG(DEBUG_ERR, (__location__ " link_state[%u] invalid\n",
2474                                   (unsigned int)info->link_state));
2475                 return -1;
2476         }
2477
2478         if (info->references != 0) {
2479                 DEBUG(DEBUG_ERR, (__location__ " references[%u] should be 0\n",
2480                                   (unsigned int)info->references));
2481                 return -1;
2482         }
2483
2484         iface = ctdb_find_iface(ctdb, info->name);
2485         if (iface == NULL) {
2486                 return -1;
2487         }
2488
2489         if (link_up == iface->link_up) {
2490                 return 0;
2491         }
2492
2493         DEBUG(iface->link_up?DEBUG_ERR:DEBUG_NOTICE,
2494               ("iface[%s] has changed it's link status %s => %s\n",
2495                iface->name,
2496                iface->link_up?"up":"down",
2497                link_up?"up":"down"));
2498
2499         iface->link_up = link_up;
2500         return 0;
2501 }
2502
2503
2504 /*
2505   called by a daemon to inform us of the entire list of TCP tickles for
2506   a particular public address.
2507   this control should only be sent by the node that is currently serving
2508   that public address.
2509  */
2510 int32_t ctdb_control_set_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata)
2511 {
2512         struct ctdb_tickle_list_old *list = (struct ctdb_tickle_list_old *)indata.dptr;
2513         struct ctdb_tcp_array *tcparray;
2514         struct ctdb_vnn *vnn;
2515
2516         /* We must at least have tickles.num or else we cant verify the size
2517            of the received data blob
2518          */
2519         if (indata.dsize < offsetof(struct ctdb_tickle_list_old, connections)) {
2520                 DEBUG(DEBUG_ERR,("Bad indata in ctdb_tickle_list. Not enough data for the tickle.num field\n"));
2521                 return -1;
2522         }
2523
2524         /* verify that the size of data matches what we expect */
2525         if (indata.dsize < offsetof(struct ctdb_tickle_list_old, connections)
2526                          + sizeof(struct ctdb_connection) * list->num) {
2527                 DEBUG(DEBUG_ERR,("Bad indata in ctdb_tickle_list\n"));
2528                 return -1;
2529         }
2530
2531         DEBUG(DEBUG_INFO, ("Received tickle update for public address %s\n",
2532                            ctdb_addr_to_str(&list->addr)));
2533
2534         vnn = find_public_ip_vnn(ctdb, &list->addr);
2535         if (vnn == NULL) {
2536                 DEBUG(DEBUG_INFO,(__location__ " Could not set tcp tickle list, '%s' is not a public address\n",
2537                         ctdb_addr_to_str(&list->addr)));
2538
2539                 return 1;
2540         }
2541
2542         if (vnn->pnn == ctdb->pnn) {
2543                 DEBUG(DEBUG_INFO,
2544                       ("Ignoring redundant set tcp tickle list, this node hosts '%s'\n",
2545                        ctdb_addr_to_str(&list->addr)));
2546                 return 0;
2547         }
2548
2549         /* remove any old ticklelist we might have */
2550         talloc_free(vnn->tcp_array);
2551         vnn->tcp_array = NULL;
2552
2553         tcparray = talloc(vnn, struct ctdb_tcp_array);
2554         CTDB_NO_MEMORY(ctdb, tcparray);
2555
2556         tcparray->num = list->num;
2557
2558         tcparray->connections = talloc_array(tcparray, struct ctdb_connection, tcparray->num);
2559         CTDB_NO_MEMORY(ctdb, tcparray->connections);
2560
2561         memcpy(tcparray->connections, &list->connections[0],
2562                sizeof(struct ctdb_connection)*tcparray->num);
2563
2564         /* We now have a new fresh tickle list array for this vnn */
2565         vnn->tcp_array = tcparray;
2566
2567         return 0;
2568 }
2569
2570 /*
2571   called to return the full list of tickles for the puclic address associated 
2572   with the provided vnn
2573  */
2574 int32_t ctdb_control_get_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata)
2575 {
2576         ctdb_sock_addr *addr = (ctdb_sock_addr *)indata.dptr;
2577         struct ctdb_tickle_list_old *list;
2578         struct ctdb_tcp_array *tcparray;
2579         int num, i;
2580         struct ctdb_vnn *vnn;
2581         unsigned port;
2582
2583         vnn = find_public_ip_vnn(ctdb, addr);
2584         if (vnn == NULL) {
2585                 DEBUG(DEBUG_ERR,(__location__ " Could not get tcp tickle list, '%s' is not a public address\n",
2586                         ctdb_addr_to_str(addr)));
2587
2588                 return 1;
2589         }
2590
2591         port = ctdb_addr_to_port(addr);
2592
2593         tcparray = vnn->tcp_array;
2594         num = 0;
2595         if (tcparray != NULL) {
2596                 if (port == 0) {
2597                         /* All connections */
2598                         num = tcparray->num;
2599                 } else {
2600                         /* Count connections for port */
2601                         for (i = 0; i < tcparray->num; i++) {
2602                                 if (port == ctdb_addr_to_port(&tcparray->connections[i].dst)) {
2603                                         num++;
2604                                 }
2605                         }
2606                 }
2607         }
2608
2609         outdata->dsize = offsetof(struct ctdb_tickle_list_old, connections)
2610                         + sizeof(struct ctdb_connection) * num;
2611
2612         outdata->dptr  = talloc_size(outdata, outdata->dsize);
2613         CTDB_NO_MEMORY(ctdb, outdata->dptr);
2614         list = (struct ctdb_tickle_list_old *)outdata->dptr;
2615
2616         list->addr = *addr;
2617         list->num = num;
2618
2619         if (num == 0) {
2620                 return 0;
2621         }
2622
2623         num = 0;
2624         for (i = 0; i < tcparray->num; i++) {
2625                 if (port == 0 || \
2626                     port == ctdb_addr_to_port(&tcparray->connections[i].dst)) {
2627                         list->connections[num] = tcparray->connections[i];
2628                         num++;
2629                 }
2630         }
2631
2632         return 0;
2633 }
2634
2635
2636 /*
2637   set the list of all tcp tickles for a public address
2638  */
2639 static int ctdb_send_set_tcp_tickles_for_ip(struct ctdb_context *ctdb,
2640                                             ctdb_sock_addr *addr,
2641                                             struct ctdb_tcp_array *tcparray)
2642 {
2643         int ret, num;
2644         TDB_DATA data;
2645         struct ctdb_tickle_list_old *list;
2646
2647         if (tcparray) {
2648                 num = tcparray->num;
2649         } else {
2650                 num = 0;
2651         }
2652
2653         data.dsize = offsetof(struct ctdb_tickle_list_old, connections) +
2654                         sizeof(struct ctdb_connection) * num;
2655         data.dptr = talloc_size(ctdb, data.dsize);
2656         CTDB_NO_MEMORY(ctdb, data.dptr);
2657
2658         list = (struct ctdb_tickle_list_old *)data.dptr;
2659         list->addr = *addr;
2660         list->num = num;
2661         if (tcparray) {
2662                 memcpy(&list->connections[0], tcparray->connections, sizeof(struct ctdb_connection) * num);
2663         }
2664
2665         ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_ALL, 0,
2666                                        CTDB_CONTROL_SET_TCP_TICKLE_LIST,
2667                                        0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
2668         if (ret != 0) {
2669                 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set tcp tickles failed\n"));
2670                 return -1;
2671         }
2672
2673         talloc_free(data.dptr);
2674
2675         return ret;
2676 }
2677
2678
2679 /*
2680   perform tickle updates if required
2681  */
2682 static void ctdb_update_tcp_tickles(struct tevent_context *ev,
2683                                     struct tevent_timer *te,
2684                                     struct timeval t, void *private_data)
2685 {
2686         struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
2687         int ret;
2688         struct ctdb_vnn *vnn;
2689
2690         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2691                 /* we only send out updates for public addresses that 
2692                    we have taken over
2693                  */
2694                 if (ctdb->pnn != vnn->pnn) {
2695                         continue;
2696                 }
2697                 /* We only send out the updates if we need to */
2698                 if (!vnn->tcp_update_needed) {
2699                         continue;
2700                 }
2701                 ret = ctdb_send_set_tcp_tickles_for_ip(ctdb,
2702                                                        &vnn->public_address,
2703                                                        vnn->tcp_array);
2704                 if (ret != 0) {
2705                         DEBUG(DEBUG_ERR,("Failed to send the tickle update for public address %s\n",
2706                                 ctdb_addr_to_str(&vnn->public_address)));
2707                 } else {
2708                         DEBUG(DEBUG_INFO,
2709                               ("Sent tickle update for public address %s\n",
2710                                ctdb_addr_to_str(&vnn->public_address)));
2711                         vnn->tcp_update_needed = false;
2712                 }
2713         }
2714
2715         tevent_add_timer(ctdb->ev, ctdb->tickle_update_context,
2716                          timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
2717                          ctdb_update_tcp_tickles, ctdb);
2718 }
2719
2720 /*
2721   start periodic update of tcp tickles
2722  */
2723 void ctdb_start_tcp_tickle_update(struct ctdb_context *ctdb)
2724 {
2725         ctdb->tickle_update_context = talloc_new(ctdb);
2726
2727         tevent_add_timer(ctdb->ev, ctdb->tickle_update_context,
2728                          timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
2729                          ctdb_update_tcp_tickles, ctdb);
2730 }
2731
2732
2733
2734
2735 struct control_gratious_arp {
2736         struct ctdb_context *ctdb;
2737         ctdb_sock_addr addr;
2738         const char *iface;
2739         int count;
2740 };
2741
2742 /*
2743   send a control_gratuitous arp
2744  */
2745 static void send_gratious_arp(struct tevent_context *ev,
2746                               struct tevent_timer *te,
2747                               struct timeval t, void *private_data)
2748 {
2749         int ret;
2750         struct control_gratious_arp *arp = talloc_get_type(private_data, 
2751                                                         struct control_gratious_arp);
2752
2753         ret = ctdb_sys_send_arp(&arp->addr, arp->iface);
2754         if (ret != 0) {
2755                 DEBUG(DEBUG_ERR,(__location__ " sending of gratious arp on iface '%s' failed (%s)\n",
2756                                  arp->iface, strerror(errno)));
2757         }
2758
2759
2760         arp->count++;
2761         if (arp->count == CTDB_ARP_REPEAT) {
2762                 talloc_free(arp);
2763                 return;
2764         }
2765
2766         tevent_add_timer(arp->ctdb->ev, arp,
2767                          timeval_current_ofs(CTDB_ARP_INTERVAL, 0),
2768                          send_gratious_arp, arp);
2769 }
2770
2771
2772 /*
2773   send a gratious arp 
2774  */
2775 int32_t ctdb_control_send_gratious_arp(struct ctdb_context *ctdb, TDB_DATA indata)
2776 {
2777         struct ctdb_addr_info_old *gratious_arp = (struct ctdb_addr_info_old *)indata.dptr;
2778         struct control_gratious_arp *arp;
2779
2780         /* verify the size of indata */
2781         if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
2782                 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_gratious_arp structure. Got %u require %u bytes\n", 
2783                                  (unsigned)indata.dsize, 
2784                                  (unsigned)offsetof(struct ctdb_addr_info_old, iface)));
2785                 return -1;
2786         }
2787         if (indata.dsize != 
2788                 ( offsetof(struct ctdb_addr_info_old, iface)
2789                 + gratious_arp->len ) ){
2790
2791                 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2792                         "but should be %u bytes\n", 
2793                          (unsigned)indata.dsize, 
2794                          (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+gratious_arp->len)));
2795                 return -1;
2796         }
2797
2798
2799         arp = talloc(ctdb, struct control_gratious_arp);
2800         CTDB_NO_MEMORY(ctdb, arp);
2801
2802         arp->ctdb  = ctdb;
2803         arp->addr   = gratious_arp->addr;
2804         arp->iface = talloc_strdup(arp, gratious_arp->iface);
2805         CTDB_NO_MEMORY(ctdb, arp->iface);
2806         arp->count = 0;
2807
2808         tevent_add_timer(arp->ctdb->ev, arp,
2809                          timeval_zero(), send_gratious_arp, arp);
2810
2811         return 0;
2812 }
2813
2814 int32_t ctdb_control_add_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2815 {
2816         struct ctdb_addr_info_old *pub = (struct ctdb_addr_info_old *)indata.dptr;
2817         int ret;
2818
2819         /* verify the size of indata */
2820         if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
2821                 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_addr_info structure\n"));
2822                 return -1;
2823         }
2824         if (indata.dsize != 
2825                 ( offsetof(struct ctdb_addr_info_old, iface)
2826                 + pub->len ) ){
2827
2828                 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2829                         "but should be %u bytes\n", 
2830                          (unsigned)indata.dsize, 
2831                          (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+pub->len)));
2832                 return -1;
2833         }
2834
2835         DEBUG(DEBUG_NOTICE,("Add IP %s\n", ctdb_addr_to_str(&pub->addr)));
2836
2837         ret = ctdb_add_public_address(ctdb, &pub->addr, pub->mask, &pub->iface[0], true);
2838
2839         if (ret != 0) {
2840                 DEBUG(DEBUG_ERR,(__location__ " Failed to add public address\n"));
2841                 return -1;
2842         }
2843
2844         return 0;
2845 }
2846
2847 struct delete_ip_callback_state {
2848         struct ctdb_req_control_old *c;
2849 };
2850
2851 /*
2852   called when releaseip event finishes for del_public_address
2853  */
2854 static void delete_ip_callback(struct ctdb_context *ctdb,
2855                                int32_t status, TDB_DATA data,
2856                                const char *errormsg,
2857                                void *private_data)
2858 {
2859         struct delete_ip_callback_state *state =
2860                 talloc_get_type(private_data, struct delete_ip_callback_state);
2861
2862         /* If release failed then fail. */
2863         ctdb_request_control_reply(ctdb, state->c, NULL, status, errormsg);
2864         talloc_free(private_data);
2865 }
2866
2867 int32_t ctdb_control_del_public_address(struct ctdb_context *ctdb,
2868                                         struct ctdb_req_control_old *c,
2869                                         TDB_DATA indata, bool *async_reply)
2870 {
2871         struct ctdb_addr_info_old *pub = (struct ctdb_addr_info_old *)indata.dptr;
2872         struct ctdb_vnn *vnn;
2873
2874         /* verify the size of indata */
2875         if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
2876                 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_addr_info structure\n"));
2877                 return -1;
2878         }
2879         if (indata.dsize != 
2880                 ( offsetof(struct ctdb_addr_info_old, iface)
2881                 + pub->len ) ){
2882
2883                 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2884                         "but should be %u bytes\n", 
2885                          (unsigned)indata.dsize, 
2886                          (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+pub->len)));
2887                 return -1;
2888         }
2889
2890         DEBUG(DEBUG_NOTICE,("Delete IP %s\n", ctdb_addr_to_str(&pub->addr)));
2891
2892         /* walk over all public addresses until we find a match */
2893         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2894                 if (ctdb_same_ip(&vnn->public_address, &pub->addr)) {
2895                         if (vnn->pnn == ctdb->pnn) {
2896                                 struct delete_ip_callback_state *state;
2897                                 struct ctdb_public_ip *ip;
2898                                 TDB_DATA data;
2899                                 int ret;
2900
2901                                 vnn->delete_pending = true;
2902
2903                                 state = talloc(ctdb,
2904                                                struct delete_ip_callback_state);
2905                                 CTDB_NO_MEMORY(ctdb, state);
2906                                 state->c = c;
2907
2908                                 ip = talloc(state, struct ctdb_public_ip);
2909                                 if (ip == NULL) {
2910                                         DEBUG(DEBUG_ERR,
2911                                               (__location__ " Out of memory\n"));
2912                                         talloc_free(state);
2913                                         return -1;
2914                                 }
2915                                 ip->pnn = -1;
2916                                 ip->addr = pub->addr;
2917
2918                                 data.dsize = sizeof(struct ctdb_public_ip);
2919                                 data.dptr = (unsigned char *)ip;
2920
2921                                 ret = ctdb_daemon_send_control(ctdb,
2922                                                                ctdb_get_pnn(ctdb),
2923                                                                0,
2924                                                                CTDB_CONTROL_RELEASE_IP,
2925                                                                0, 0,
2926                                                                data,
2927                                                                delete_ip_callback,
2928                                                                state);
2929                                 if (ret == -1) {
2930                                         DEBUG(DEBUG_ERR,
2931                                               (__location__ "Unable to send "
2932                                                "CTDB_CONTROL_RELEASE_IP\n"));
2933                                         talloc_free(state);
2934                                         return -1;
2935                                 }
2936
2937                                 state->c = talloc_steal(state, c);
2938                                 *async_reply = true;
2939                         } else {
2940                                 /* This IP is not hosted on the
2941                                  * current node so just delete it
2942                                  * now. */
2943                                 do_delete_ip(ctdb, vnn);
2944                         }
2945
2946                         return 0;
2947                 }
2948         }
2949
2950         DEBUG(DEBUG_ERR,("Delete IP of unknown public IP address %s\n",
2951                          ctdb_addr_to_str(&pub->addr)));
2952         return -1;
2953 }
2954
2955
2956 struct ipreallocated_callback_state {
2957         struct ctdb_req_control_old *c;
2958 };
2959
2960 static void ctdb_ipreallocated_callback(struct ctdb_context *ctdb,
2961                                         int status, void *p)
2962 {
2963         struct ipreallocated_callback_state *state =
2964                 talloc_get_type(p, struct ipreallocated_callback_state);
2965
2966         if (status != 0) {
2967                 DEBUG(DEBUG_ERR,
2968                       (" \"ipreallocated\" event script failed (status %d)\n",
2969                        status));
2970                 if (status == -ETIME) {
2971                         ctdb_ban_self(ctdb);
2972                 }
2973         }
2974
2975         ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
2976         talloc_free(state);
2977 }
2978
2979 /* A control to run the ipreallocated event */
2980 int32_t ctdb_control_ipreallocated(struct ctdb_context *ctdb,
2981                                    struct ctdb_req_control_old *c,
2982                                    bool *async_reply)
2983 {
2984         int ret;
2985         struct ipreallocated_callback_state *state;
2986
2987         state = talloc(ctdb, struct ipreallocated_callback_state);
2988         CTDB_NO_MEMORY(ctdb, state);
2989
2990         DEBUG(DEBUG_INFO,(__location__ " Running \"ipreallocated\" event\n"));
2991
2992         ret = ctdb_event_script_callback(ctdb, state,
2993                                          ctdb_ipreallocated_callback, state,
2994                                          CTDB_EVENT_IPREALLOCATED,
2995                                          "%s", "");
2996
2997         if (ret != 0) {
2998                 DEBUG(DEBUG_ERR,("Failed to run \"ipreallocated\" event \n"));
2999                 talloc_free(state);
3000                 return -1;
3001         }
3002
3003         /* tell the control that we will be reply asynchronously */
3004         state->c    = talloc_steal(state, c);
3005         *async_reply = true;
3006
3007         return 0;
3008 }
3009
3010
3011 int update_ip_assignment_tree(struct ctdb_context *ctdb, struct ctdb_public_ip *ip)
3012 {
3013         struct public_ip_list *tmp_ip;
3014
3015         /* IP tree is never built if DisableIPFailover is set */
3016         if (ctdb->tunable.disable_ip_failover != 0) {
3017                 return 0;
3018         }
3019
3020         if (ctdb->ip_tree == NULL) {
3021                 DEBUG(DEBUG_ERR,("No ctdb->ip_tree yet. Failed to update ip assignment\n"));
3022                 return -1;
3023         }
3024
3025         tmp_ip = trbt_lookuparray32(ctdb->ip_tree, IP_KEYLEN, ip_key(&ip->addr));
3026         if (tmp_ip == NULL) {
3027                 DEBUG(DEBUG_ERR,(__location__ " Could not find record for address %s, update ip\n", ctdb_addr_to_str(&ip->addr)));
3028                 return -1;
3029         }
3030
3031         DEBUG(DEBUG_NOTICE,("Updated ip assignment tree for ip : %s from node %u to node %u\n", ctdb_addr_to_str(&ip->addr), tmp_ip->pnn, ip->pnn));
3032         tmp_ip->pnn = ip->pnn;
3033
3034         return 0;
3035 }
3036
3037 void clear_ip_assignment_tree(struct ctdb_context *ctdb)
3038 {
3039         TALLOC_FREE(ctdb->ip_tree);
3040 }
3041
3042 struct ctdb_reloadips_handle {
3043         struct ctdb_context *ctdb;
3044         struct ctdb_req_control_old *c;
3045         int status;
3046         int fd[2];
3047         pid_t child;
3048         struct tevent_fd *fde;
3049 };
3050
3051 static int ctdb_reloadips_destructor(struct ctdb_reloadips_handle *h)
3052 {
3053         if (h == h->ctdb->reload_ips) {
3054                 h->ctdb->reload_ips = NULL;
3055         }
3056         if (h->c != NULL) {
3057                 ctdb_request_control_reply(h->ctdb, h->c, NULL, h->status, NULL);
3058                 h->c = NULL;
3059         }
3060         ctdb_kill(h->ctdb, h->child, SIGKILL);
3061         return 0;
3062 }
3063
3064 static void ctdb_reloadips_timeout_event(struct tevent_context *ev,
3065                                          struct tevent_timer *te,
3066                                          struct timeval t, void *private_data)
3067 {
3068         struct ctdb_reloadips_handle *h = talloc_get_type(private_data, struct ctdb_reloadips_handle);
3069
3070         talloc_free(h);
3071 }
3072
3073 static void ctdb_reloadips_child_handler(struct tevent_context *ev,
3074                                          struct tevent_fd *fde,
3075                                          uint16_t flags, void *private_data)
3076 {
3077         struct ctdb_reloadips_handle *h = talloc_get_type(private_data, struct ctdb_reloadips_handle);
3078
3079         char res;
3080         int ret;
3081
3082         ret = sys_read(h->fd[0], &res, 1);
3083         if (ret < 1 || res != 0) {
3084                 DEBUG(DEBUG_ERR, (__location__ " Reloadips child process returned error\n"));
3085                 res = 1;
3086         }
3087         h->status = res;
3088
3089         talloc_free(h);
3090 }
3091
3092 static int ctdb_reloadips_child(struct ctdb_context *ctdb)
3093 {
3094         TALLOC_CTX *mem_ctx = talloc_new(NULL);
3095         struct ctdb_public_ip_list_old *ips;
3096         struct ctdb_vnn *vnn;
3097         struct client_async_data *async_data;
3098         struct timeval timeout;
3099         TDB_DATA data;
3100         struct ctdb_client_control_state *state;
3101         bool first_add;
3102         int i, ret;
3103
3104         CTDB_NO_MEMORY(ctdb, mem_ctx);
3105
3106         /* Read IPs from local node */
3107         ret = ctdb_ctrl_get_public_ips(ctdb, TAKEOVER_TIMEOUT(),
3108                                        CTDB_CURRENT_NODE, mem_ctx, &ips);
3109         if (ret != 0) {
3110                 DEBUG(DEBUG_ERR,
3111                       ("Unable to fetch public IPs from local node\n"));
3112                 talloc_free(mem_ctx);
3113                 return -1;
3114         }
3115
3116         /* Read IPs file - this is safe since this is a child process */
3117         ctdb->vnn = NULL;
3118         if (ctdb_set_public_addresses(ctdb, false) != 0) {
3119                 DEBUG(DEBUG_ERR,("Failed to re-read public addresses file\n"));
3120                 talloc_free(mem_ctx);
3121                 return -1;
3122         }
3123
3124         async_data = talloc_zero(mem_ctx, struct client_async_data);
3125         CTDB_NO_MEMORY(ctdb, async_data);
3126
3127         /* Compare IPs between node and file for IPs to be deleted */
3128         for (i = 0; i < ips->num; i++) {
3129                 /* */
3130                 for (vnn = ctdb->vnn; vnn; vnn = vnn->next) {
3131                         if (ctdb_same_ip(&vnn->public_address,
3132                                          &ips->ips[i].addr)) {
3133                                 /* IP is still in file */
3134                                 break;
3135                         }
3136                 }
3137
3138                 if (vnn == NULL) {
3139                         /* Delete IP ips->ips[i] */
3140                         struct ctdb_addr_info_old *pub;
3141
3142                         DEBUG(DEBUG_NOTICE,
3143                               ("IP %s no longer configured, deleting it\n",
3144                                ctdb_addr_to_str(&ips->ips[i].addr)));
3145
3146                         pub = talloc_zero(mem_ctx, struct ctdb_addr_info_old);
3147                         CTDB_NO_MEMORY(ctdb, pub);
3148
3149                         pub->addr  = ips->ips[i].addr;
3150                         pub->mask  = 0;
3151                         pub->len   = 0;
3152
3153                         timeout = TAKEOVER_TIMEOUT();
3154
3155                         data.dsize = offsetof(struct ctdb_addr_info_old,
3156                                               iface) + pub->len;
3157                         data.dptr = (uint8_t *)pub;
3158
3159                         state = ctdb_control_send(ctdb, CTDB_CURRENT_NODE, 0,
3160                                                   CTDB_CONTROL_DEL_PUBLIC_IP,
3161                                                   0, data, async_data,
3162                                                   &timeout, NULL);
3163                         if (state == NULL) {
3164                                 DEBUG(DEBUG_ERR,
3165                                       (__location__
3166                                        " failed sending CTDB_CONTROL_DEL_PUBLIC_IP\n"));
3167                                 goto failed;
3168                         }
3169
3170                         ctdb_client_async_add(async_data, state);
3171                 }
3172         }
3173
3174         /* Compare IPs between node and file for IPs to be added */
3175         first_add = true;
3176         for (vnn = ctdb->vnn; vnn; vnn = vnn->next) {
3177                 for (i = 0; i < ips->num; i++) {
3178                         if (ctdb_same_ip(&vnn->public_address,
3179                                          &ips->ips[i].addr)) {
3180                                 /* IP already on node */
3181                                 break;
3182                         }
3183                 }
3184                 if (i == ips->num) {
3185                         /* Add IP ips->ips[i] */
3186                         struct ctdb_addr_info_old *pub;
3187                         const char *ifaces = NULL;
3188                         uint32_t len;
3189                         int iface = 0;
3190
3191                         DEBUG(DEBUG_NOTICE,
3192                               ("New IP %s configured, adding it\n",
3193                                ctdb_addr_to_str(&vnn->public_address)));
3194                         if (first_add) {
3195                                 uint32_t pnn = ctdb_get_pnn(ctdb);
3196
3197                                 data.dsize = sizeof(pnn);
3198                                 data.dptr  = (uint8_t *)&pnn;
3199
3200                                 ret = ctdb_client_send_message(
3201                                         ctdb,
3202                                         CTDB_BROADCAST_CONNECTED,
3203                                         CTDB_SRVID_REBALANCE_NODE,
3204                                         data);
3205                                 if (ret != 0) {
3206                                         DEBUG(DEBUG_WARNING,
3207                                               ("Failed to send message to force node reallocation - IPs may be unbalanced\n"));
3208                                 }
3209
3210                                 first_add = false;
3211                         }
3212
3213                         ifaces = vnn->ifaces[0];
3214                         iface = 1;
3215                         while (vnn->ifaces[iface] != NULL) {
3216                                 ifaces = talloc_asprintf(vnn, "%s,%s", ifaces,
3217                                                          vnn->ifaces[iface]);
3218                                 iface++;
3219                         }
3220
3221                         len   = strlen(ifaces) + 1;
3222                         pub = talloc_zero_size(mem_ctx,
3223                                                offsetof(struct ctdb_addr_info_old, iface) + len);
3224                         CTDB_NO_MEMORY(ctdb, pub);
3225
3226                         pub->addr  = vnn->public_address;
3227                         pub->mask  = vnn->public_netmask_bits;
3228                         pub->len   = len;
3229                         memcpy(&pub->iface[0], ifaces, pub->len);
3230
3231                         timeout = TAKEOVER_TIMEOUT();
3232
3233                         data.dsize = offsetof(struct ctdb_addr_info_old,
3234                                               iface) + pub->len;
3235                         data.dptr = (uint8_t *)pub;
3236
3237                         state = ctdb_control_send(ctdb, CTDB_CURRENT_NODE, 0,
3238                                                   CTDB_CONTROL_ADD_PUBLIC_IP,
3239                                                   0, data, async_data,
3240                                                   &timeout, NULL);
3241                         if (state == NULL) {
3242                                 DEBUG(DEBUG_ERR,
3243                                       (__location__
3244                                        " failed sending CTDB_CONTROL_ADD_PUBLIC_IP\n"));
3245                                 goto failed;
3246                         }
3247
3248                         ctdb_client_async_add(async_data, state);
3249                 }
3250         }
3251
3252         if (ctdb_client_async_wait(ctdb, async_data) != 0) {
3253                 DEBUG(DEBUG_ERR,(__location__ " Add/delete IPs failed\n"));
3254                 goto failed;
3255         }
3256
3257         talloc_free(mem_ctx);
3258         return 0;
3259
3260 failed:
3261         talloc_free(mem_ctx);
3262         return -1;
3263 }
3264
3265 /* This control is sent to force the node to re-read the public addresses file
3266    and drop any addresses we should nnot longer host, and add new addresses
3267    that we are now able to host
3268 */
3269 int32_t ctdb_control_reload_public_ips(struct ctdb_context *ctdb, struct ctdb_req_control_old *c, bool *async_reply)
3270 {
3271         struct ctdb_reloadips_handle *h;
3272         pid_t parent = getpid();
3273
3274         if (ctdb->reload_ips != NULL) {
3275                 talloc_free(ctdb->reload_ips);
3276                 ctdb->reload_ips = NULL;
3277         }
3278
3279         h = talloc(ctdb, struct ctdb_reloadips_handle);
3280         CTDB_NO_MEMORY(ctdb, h);
3281         h->ctdb     = ctdb;
3282         h->c        = NULL;
3283         h->status   = -1;
3284         
3285         if (pipe(h->fd) == -1) {
3286                 DEBUG(DEBUG_ERR,("Failed to create pipe for ctdb_freeze_lock\n"));
3287                 talloc_free(h);
3288                 return -1;
3289         }
3290
3291         h->child = ctdb_fork(ctdb);
3292         if (h->child == (pid_t)-1) {
3293                 DEBUG(DEBUG_ERR, ("Failed to fork a child for reloadips\n"));
3294                 close(h->fd[0]);
3295                 close(h->fd[1]);
3296                 talloc_free(h);
3297                 return -1;
3298         }
3299
3300         /* child process */
3301         if (h->child == 0) {
3302                 signed char res = 0;
3303
3304                 close(h->fd[0]);
3305                 debug_extra = talloc_asprintf(NULL, "reloadips:");
3306
3307                 prctl_set_comment("ctdb_reloadips");
3308                 if (switch_from_server_to_client(ctdb, "reloadips-child") != 0) {
3309                         DEBUG(DEBUG_CRIT,("ERROR: Failed to switch reloadips child into client mode\n"));
3310                         res = -1;
3311                 } else {
3312                         res = ctdb_reloadips_child(ctdb);
3313                         if (res != 0) {
3314                                 DEBUG(DEBUG_ERR,("Failed to reload ips on local node\n"));
3315                         }
3316                 }
3317
3318                 sys_write(h->fd[1], &res, 1);
3319                 ctdb_wait_for_process_to_exit(parent);
3320                 _exit(0);
3321         }
3322
3323         h->c             = talloc_steal(h, c);
3324
3325         close(h->fd[1]);
3326         set_close_on_exec(h->fd[0]);
3327
3328         talloc_set_destructor(h, ctdb_reloadips_destructor);
3329
3330
3331         h->fde = tevent_add_fd(ctdb->ev, h, h->fd[0], TEVENT_FD_READ,
3332                                ctdb_reloadips_child_handler, (void *)h);
3333         tevent_fd_set_auto_close(h->fde);
3334
3335         tevent_add_timer(ctdb->ev, h, timeval_current_ofs(120, 0),
3336                          ctdb_reloadips_timeout_event, h);
3337
3338         /* we reply later */
3339         *async_reply = true;
3340         return 0;
3341 }