ctdb-ipalloc: Do not use node count or PNNs from CTDB context
[obnox/samba/samba-obnox.git] / ctdb / server / ctdb_takeover.c
1 /* 
2    ctdb ip takeover code
3
4    Copyright (C) Ronnie Sahlberg  2007
5    Copyright (C) Andrew Tridgell  2007
6    Copyright (C) Martin Schwenke  2011
7
8    This program is free software; you can redistribute it and/or modify
9    it under the terms of the GNU General Public License as published by
10    the Free Software Foundation; either version 3 of the License, or
11    (at your option) any later version.
12    
13    This program is distributed in the hope that it will be useful,
14    but WITHOUT ANY WARRANTY; without even the implied warranty of
15    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16    GNU General Public License for more details.
17    
18    You should have received a copy of the GNU General Public License
19    along with this program; if not, see <http://www.gnu.org/licenses/>.
20 */
21 #include "replace.h"
22 #include "system/network.h"
23 #include "system/filesys.h"
24 #include "system/time.h"
25 #include "system/wait.h"
26
27 #include <talloc.h>
28 #include <tevent.h>
29
30 #include "lib/util/dlinklist.h"
31 #include "lib/util/debug.h"
32 #include "lib/util/samba_util.h"
33 #include "lib/util/util_process.h"
34
35 #include "ctdb_private.h"
36 #include "ctdb_client.h"
37
38 #include "common/rb_tree.h"
39 #include "common/reqid.h"
40 #include "common/system.h"
41 #include "common/common.h"
42 #include "common/logging.h"
43
44 #include "server/ipalloc.h"
45
46 #define TAKEOVER_TIMEOUT() timeval_current_ofs(ctdb->tunable.takeover_timeout,0)
47
48 #define CTDB_ARP_INTERVAL 1
49 #define CTDB_ARP_REPEAT   3
50
51 struct ctdb_interface {
52         struct ctdb_interface *prev, *next;
53         const char *name;
54         bool link_up;
55         uint32_t references;
56 };
57
58 static const char *ctdb_vnn_iface_string(const struct ctdb_vnn *vnn)
59 {
60         if (vnn->iface) {
61                 return vnn->iface->name;
62         }
63
64         return "__none__";
65 }
66
67 static int ctdb_add_local_iface(struct ctdb_context *ctdb, const char *iface)
68 {
69         struct ctdb_interface *i;
70
71         if (strlen(iface) > CTDB_IFACE_SIZE) {
72                 DEBUG(DEBUG_ERR, ("Interface name too long \"%s\"\n", iface));
73                 return -1;
74         }
75
76         /* Verify that we don't have an entry for this ip yet */
77         for (i=ctdb->ifaces;i;i=i->next) {
78                 if (strcmp(i->name, iface) == 0) {
79                         return 0;
80                 }
81         }
82
83         /* create a new structure for this interface */
84         i = talloc_zero(ctdb, struct ctdb_interface);
85         CTDB_NO_MEMORY_FATAL(ctdb, i);
86         i->name = talloc_strdup(i, iface);
87         CTDB_NO_MEMORY(ctdb, i->name);
88
89         i->link_up = true;
90
91         DLIST_ADD(ctdb->ifaces, i);
92
93         return 0;
94 }
95
96 static bool vnn_has_interface_with_name(struct ctdb_vnn *vnn,
97                                         const char *name)
98 {
99         int n;
100
101         for (n = 0; vnn->ifaces[n] != NULL; n++) {
102                 if (strcmp(name, vnn->ifaces[n]) == 0) {
103                         return true;
104                 }
105         }
106
107         return false;
108 }
109
110 /* If any interfaces now have no possible IPs then delete them.  This
111  * implementation is naive (i.e. simple) rather than clever
112  * (i.e. complex).  Given that this is run on delip and that operation
113  * is rare, this doesn't need to be efficient - it needs to be
114  * foolproof.  One alternative is reference counting, where the logic
115  * is distributed and can, therefore, be broken in multiple places.
116  * Another alternative is to build a red-black tree of interfaces that
117  * can have addresses (by walking ctdb->vnn once) and then walking
118  * ctdb->ifaces once and deleting those not in the tree.  Let's go to
119  * one of those if the naive implementation causes problems...  :-)
120  */
121 static void ctdb_remove_orphaned_ifaces(struct ctdb_context *ctdb,
122                                         struct ctdb_vnn *vnn)
123 {
124         struct ctdb_interface *i, *next;
125
126         /* For each interface, check if there's an IP using it. */
127         for (i = ctdb->ifaces; i != NULL; i = next) {
128                 struct ctdb_vnn *tv;
129                 bool found;
130                 next = i->next;
131
132                 /* Only consider interfaces named in the given VNN. */
133                 if (!vnn_has_interface_with_name(vnn, i->name)) {
134                         continue;
135                 }
136
137                 /* Search for a vnn with this interface. */
138                 found = false;
139                 for (tv=ctdb->vnn; tv; tv=tv->next) {
140                         if (vnn_has_interface_with_name(tv, i->name)) {
141                                 found = true;
142                                 break;
143                         }
144                 }
145
146                 if (!found) {
147                         /* None of the VNNs are using this interface. */
148                         DLIST_REMOVE(ctdb->ifaces, i);
149                         talloc_free(i);
150                 }
151         }
152 }
153
154
155 static struct ctdb_interface *ctdb_find_iface(struct ctdb_context *ctdb,
156                                               const char *iface)
157 {
158         struct ctdb_interface *i;
159
160         for (i=ctdb->ifaces;i;i=i->next) {
161                 if (strcmp(i->name, iface) == 0) {
162                         return i;
163                 }
164         }
165
166         return NULL;
167 }
168
169 static struct ctdb_interface *ctdb_vnn_best_iface(struct ctdb_context *ctdb,
170                                                   struct ctdb_vnn *vnn)
171 {
172         int i;
173         struct ctdb_interface *cur = NULL;
174         struct ctdb_interface *best = NULL;
175
176         for (i=0; vnn->ifaces[i]; i++) {
177
178                 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
179                 if (cur == NULL) {
180                         continue;
181                 }
182
183                 if (!cur->link_up) {
184                         continue;
185                 }
186
187                 if (best == NULL) {
188                         best = cur;
189                         continue;
190                 }
191
192                 if (cur->references < best->references) {
193                         best = cur;
194                         continue;
195                 }
196         }
197
198         return best;
199 }
200
201 static int32_t ctdb_vnn_assign_iface(struct ctdb_context *ctdb,
202                                      struct ctdb_vnn *vnn)
203 {
204         struct ctdb_interface *best = NULL;
205
206         if (vnn->iface) {
207                 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
208                                    "still assigned to iface '%s'\n",
209                                    ctdb_addr_to_str(&vnn->public_address),
210                                    ctdb_vnn_iface_string(vnn)));
211                 return 0;
212         }
213
214         best = ctdb_vnn_best_iface(ctdb, vnn);
215         if (best == NULL) {
216                 DEBUG(DEBUG_ERR, (__location__ " public address '%s' "
217                                   "cannot assign to iface any iface\n",
218                                   ctdb_addr_to_str(&vnn->public_address)));
219                 return -1;
220         }
221
222         vnn->iface = best;
223         best->references++;
224         vnn->pnn = ctdb->pnn;
225
226         DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
227                            "now assigned to iface '%s' refs[%d]\n",
228                            ctdb_addr_to_str(&vnn->public_address),
229                            ctdb_vnn_iface_string(vnn),
230                            best->references));
231         return 0;
232 }
233
234 static void ctdb_vnn_unassign_iface(struct ctdb_context *ctdb,
235                                     struct ctdb_vnn *vnn)
236 {
237         DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
238                            "now unassigned (old iface '%s' refs[%d])\n",
239                            ctdb_addr_to_str(&vnn->public_address),
240                            ctdb_vnn_iface_string(vnn),
241                            vnn->iface?vnn->iface->references:0));
242         if (vnn->iface) {
243                 vnn->iface->references--;
244         }
245         vnn->iface = NULL;
246         if (vnn->pnn == ctdb->pnn) {
247                 vnn->pnn = -1;
248         }
249 }
250
251 static bool ctdb_vnn_available(struct ctdb_context *ctdb,
252                                struct ctdb_vnn *vnn)
253 {
254         int i;
255
256         /* Nodes that are not RUNNING can not host IPs */
257         if (ctdb->runstate != CTDB_RUNSTATE_RUNNING) {
258                 return false;
259         }
260
261         if (vnn->delete_pending) {
262                 return false;
263         }
264
265         if (vnn->iface && vnn->iface->link_up) {
266                 return true;
267         }
268
269         for (i=0; vnn->ifaces[i]; i++) {
270                 struct ctdb_interface *cur;
271
272                 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
273                 if (cur == NULL) {
274                         continue;
275                 }
276
277                 if (cur->link_up) {
278                         return true;
279                 }
280         }
281
282         return false;
283 }
284
285 struct ctdb_takeover_arp {
286         struct ctdb_context *ctdb;
287         uint32_t count;
288         ctdb_sock_addr addr;
289         struct ctdb_tcp_array *tcparray;
290         struct ctdb_vnn *vnn;
291 };
292
293
294 /*
295   lists of tcp endpoints
296  */
297 struct ctdb_tcp_list {
298         struct ctdb_tcp_list *prev, *next;
299         struct ctdb_connection connection;
300 };
301
302 /*
303   list of clients to kill on IP release
304  */
305 struct ctdb_client_ip {
306         struct ctdb_client_ip *prev, *next;
307         struct ctdb_context *ctdb;
308         ctdb_sock_addr addr;
309         uint32_t client_id;
310 };
311
312
313 /*
314   send a gratuitous arp
315  */
316 static void ctdb_control_send_arp(struct tevent_context *ev,
317                                   struct tevent_timer *te,
318                                   struct timeval t, void *private_data)
319 {
320         struct ctdb_takeover_arp *arp = talloc_get_type(private_data, 
321                                                         struct ctdb_takeover_arp);
322         int i, ret;
323         struct ctdb_tcp_array *tcparray;
324         const char *iface = ctdb_vnn_iface_string(arp->vnn);
325
326         ret = ctdb_sys_send_arp(&arp->addr, iface);
327         if (ret != 0) {
328                 DEBUG(DEBUG_CRIT,(__location__ " sending of arp failed on iface '%s' (%s)\n",
329                                   iface, strerror(errno)));
330         }
331
332         tcparray = arp->tcparray;
333         if (tcparray) {
334                 for (i=0;i<tcparray->num;i++) {
335                         struct ctdb_connection *tcon;
336
337                         tcon = &tcparray->connections[i];
338                         DEBUG(DEBUG_INFO,("sending tcp tickle ack for %u->%s:%u\n",
339                                 (unsigned)ntohs(tcon->dst.ip.sin_port),
340                                 ctdb_addr_to_str(&tcon->src),
341                                 (unsigned)ntohs(tcon->src.ip.sin_port)));
342                         ret = ctdb_sys_send_tcp(
343                                 &tcon->src,
344                                 &tcon->dst,
345                                 0, 0, 0);
346                         if (ret != 0) {
347                                 DEBUG(DEBUG_CRIT,(__location__ " Failed to send tcp tickle ack for %s\n",
348                                         ctdb_addr_to_str(&tcon->src)));
349                         }
350                 }
351         }
352
353         arp->count++;
354
355         if (arp->count == CTDB_ARP_REPEAT) {
356                 talloc_free(arp);
357                 return;
358         }
359
360         tevent_add_timer(arp->ctdb->ev, arp->vnn->takeover_ctx,
361                          timeval_current_ofs(CTDB_ARP_INTERVAL, 100000),
362                          ctdb_control_send_arp, arp);
363 }
364
365 static int32_t ctdb_announce_vnn_iface(struct ctdb_context *ctdb,
366                                        struct ctdb_vnn *vnn)
367 {
368         struct ctdb_takeover_arp *arp;
369         struct ctdb_tcp_array *tcparray;
370
371         if (!vnn->takeover_ctx) {
372                 vnn->takeover_ctx = talloc_new(vnn);
373                 if (!vnn->takeover_ctx) {
374                         return -1;
375                 }
376         }
377
378         arp = talloc_zero(vnn->takeover_ctx, struct ctdb_takeover_arp);
379         if (!arp) {
380                 return -1;
381         }
382
383         arp->ctdb = ctdb;
384         arp->addr = vnn->public_address;
385         arp->vnn  = vnn;
386
387         tcparray = vnn->tcp_array;
388         if (tcparray) {
389                 /* add all of the known tcp connections for this IP to the
390                    list of tcp connections to send tickle acks for */
391                 arp->tcparray = talloc_steal(arp, tcparray);
392
393                 vnn->tcp_array = NULL;
394                 vnn->tcp_update_needed = true;
395         }
396
397         tevent_add_timer(arp->ctdb->ev, vnn->takeover_ctx,
398                          timeval_zero(), ctdb_control_send_arp, arp);
399
400         return 0;
401 }
402
403 struct takeover_callback_state {
404         struct ctdb_req_control_old *c;
405         ctdb_sock_addr *addr;
406         struct ctdb_vnn *vnn;
407 };
408
409 struct ctdb_do_takeip_state {
410         struct ctdb_req_control_old *c;
411         struct ctdb_vnn *vnn;
412 };
413
414 /*
415   called when takeip event finishes
416  */
417 static void ctdb_do_takeip_callback(struct ctdb_context *ctdb, int status,
418                                     void *private_data)
419 {
420         struct ctdb_do_takeip_state *state =
421                 talloc_get_type(private_data, struct ctdb_do_takeip_state);
422         int32_t ret;
423         TDB_DATA data;
424
425         if (status != 0) {
426                 if (status == -ETIME) {
427                         ctdb_ban_self(ctdb);
428                 }
429                 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
430                                  ctdb_addr_to_str(&state->vnn->public_address),
431                                  ctdb_vnn_iface_string(state->vnn)));
432                 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
433
434                 talloc_free(state);
435                 return;
436         }
437
438         if (ctdb->do_checkpublicip) {
439
440         ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
441         if (ret != 0) {
442                 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
443                 talloc_free(state);
444                 return;
445         }
446
447         }
448
449         data.dptr  = (uint8_t *)ctdb_addr_to_str(&state->vnn->public_address);
450         data.dsize = strlen((char *)data.dptr) + 1;
451         DEBUG(DEBUG_INFO,(__location__ " sending TAKE_IP for '%s'\n", data.dptr));
452
453         ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_TAKE_IP, data);
454
455
456         /* the control succeeded */
457         ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
458         talloc_free(state);
459         return;
460 }
461
462 static int ctdb_takeip_destructor(struct ctdb_do_takeip_state *state)
463 {
464         state->vnn->update_in_flight = false;
465         return 0;
466 }
467
468 /*
469   take over an ip address
470  */
471 static int32_t ctdb_do_takeip(struct ctdb_context *ctdb,
472                               struct ctdb_req_control_old *c,
473                               struct ctdb_vnn *vnn)
474 {
475         int ret;
476         struct ctdb_do_takeip_state *state;
477
478         if (vnn->update_in_flight) {
479                 DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u rejected "
480                                     "update for this IP already in flight\n",
481                                     ctdb_addr_to_str(&vnn->public_address),
482                                     vnn->public_netmask_bits));
483                 return -1;
484         }
485
486         ret = ctdb_vnn_assign_iface(ctdb, vnn);
487         if (ret != 0) {
488                 DEBUG(DEBUG_ERR,("Takeover of IP %s/%u failed to "
489                                  "assign a usable interface\n",
490                                  ctdb_addr_to_str(&vnn->public_address),
491                                  vnn->public_netmask_bits));
492                 return -1;
493         }
494
495         state = talloc(vnn, struct ctdb_do_takeip_state);
496         CTDB_NO_MEMORY(ctdb, state);
497
498         state->c = talloc_steal(ctdb, c);
499         state->vnn   = vnn;
500
501         vnn->update_in_flight = true;
502         talloc_set_destructor(state, ctdb_takeip_destructor);
503
504         DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u on interface %s\n",
505                             ctdb_addr_to_str(&vnn->public_address),
506                             vnn->public_netmask_bits,
507                             ctdb_vnn_iface_string(vnn)));
508
509         ret = ctdb_event_script_callback(ctdb,
510                                          state,
511                                          ctdb_do_takeip_callback,
512                                          state,
513                                          CTDB_EVENT_TAKE_IP,
514                                          "%s %s %u",
515                                          ctdb_vnn_iface_string(vnn),
516                                          ctdb_addr_to_str(&vnn->public_address),
517                                          vnn->public_netmask_bits);
518
519         if (ret != 0) {
520                 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
521                         ctdb_addr_to_str(&vnn->public_address),
522                         ctdb_vnn_iface_string(vnn)));
523                 talloc_free(state);
524                 return -1;
525         }
526
527         return 0;
528 }
529
530 struct ctdb_do_updateip_state {
531         struct ctdb_req_control_old *c;
532         struct ctdb_interface *old;
533         struct ctdb_vnn *vnn;
534 };
535
536 /*
537   called when updateip event finishes
538  */
539 static void ctdb_do_updateip_callback(struct ctdb_context *ctdb, int status,
540                                       void *private_data)
541 {
542         struct ctdb_do_updateip_state *state =
543                 talloc_get_type(private_data, struct ctdb_do_updateip_state);
544         int32_t ret;
545
546         if (status != 0) {
547                 if (status == -ETIME) {
548                         ctdb_ban_self(ctdb);
549                 }
550                 DEBUG(DEBUG_ERR,(__location__ " Failed to move IP %s from interface %s to %s\n",
551                         ctdb_addr_to_str(&state->vnn->public_address),
552                         state->old->name,
553                         ctdb_vnn_iface_string(state->vnn)));
554
555                 /*
556                  * All we can do is reset the old interface
557                  * and let the next run fix it
558                  */
559                 ctdb_vnn_unassign_iface(ctdb, state->vnn);
560                 state->vnn->iface = state->old;
561                 state->vnn->iface->references++;
562
563                 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
564                 talloc_free(state);
565                 return;
566         }
567
568         if (ctdb->do_checkpublicip) {
569
570         ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
571         if (ret != 0) {
572                 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
573                 talloc_free(state);
574                 return;
575         }
576
577         }
578
579         /* the control succeeded */
580         ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
581         talloc_free(state);
582         return;
583 }
584
585 static int ctdb_updateip_destructor(struct ctdb_do_updateip_state *state)
586 {
587         state->vnn->update_in_flight = false;
588         return 0;
589 }
590
591 /*
592   update (move) an ip address
593  */
594 static int32_t ctdb_do_updateip(struct ctdb_context *ctdb,
595                                 struct ctdb_req_control_old *c,
596                                 struct ctdb_vnn *vnn)
597 {
598         int ret;
599         struct ctdb_do_updateip_state *state;
600         struct ctdb_interface *old = vnn->iface;
601         const char *new_name;
602
603         if (vnn->update_in_flight) {
604                 DEBUG(DEBUG_NOTICE,("Update of IP %s/%u rejected "
605                                     "update for this IP already in flight\n",
606                                     ctdb_addr_to_str(&vnn->public_address),
607                                     vnn->public_netmask_bits));
608                 return -1;
609         }
610
611         ctdb_vnn_unassign_iface(ctdb, vnn);
612         ret = ctdb_vnn_assign_iface(ctdb, vnn);
613         if (ret != 0) {
614                 DEBUG(DEBUG_ERR,("update of IP %s/%u failed to "
615                                  "assin a usable interface (old iface '%s')\n",
616                                  ctdb_addr_to_str(&vnn->public_address),
617                                  vnn->public_netmask_bits,
618                                  old->name));
619                 return -1;
620         }
621
622         new_name = ctdb_vnn_iface_string(vnn);
623         if (old->name != NULL && new_name != NULL && !strcmp(old->name, new_name)) {
624                 /* A benign update from one interface onto itself.
625                  * no need to run the eventscripts in this case, just return
626                  * success.
627                  */
628                 ctdb_request_control_reply(ctdb, c, NULL, 0, NULL);
629                 return 0;
630         }
631
632         state = talloc(vnn, struct ctdb_do_updateip_state);
633         CTDB_NO_MEMORY(ctdb, state);
634
635         state->c = talloc_steal(ctdb, c);
636         state->old = old;
637         state->vnn = vnn;
638
639         vnn->update_in_flight = true;
640         talloc_set_destructor(state, ctdb_updateip_destructor);
641
642         DEBUG(DEBUG_NOTICE,("Update of IP %s/%u from "
643                             "interface %s to %s\n",
644                             ctdb_addr_to_str(&vnn->public_address),
645                             vnn->public_netmask_bits,
646                             old->name,
647                             new_name));
648
649         ret = ctdb_event_script_callback(ctdb,
650                                          state,
651                                          ctdb_do_updateip_callback,
652                                          state,
653                                          CTDB_EVENT_UPDATE_IP,
654                                          "%s %s %s %u",
655                                          state->old->name,
656                                          new_name,
657                                          ctdb_addr_to_str(&vnn->public_address),
658                                          vnn->public_netmask_bits);
659         if (ret != 0) {
660                 DEBUG(DEBUG_ERR,(__location__ " Failed update IP %s from interface %s to %s\n",
661                                  ctdb_addr_to_str(&vnn->public_address),
662                                  old->name, new_name));
663                 talloc_free(state);
664                 return -1;
665         }
666
667         return 0;
668 }
669
670 /*
671   Find the vnn of the node that has a public ip address
672   returns -1 if the address is not known as a public address
673  */
674 static struct ctdb_vnn *find_public_ip_vnn(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
675 {
676         struct ctdb_vnn *vnn;
677
678         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
679                 if (ctdb_same_ip(&vnn->public_address, addr)) {
680                         return vnn;
681                 }
682         }
683
684         return NULL;
685 }
686
687 /*
688   take over an ip address
689  */
690 int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb,
691                                  struct ctdb_req_control_old *c,
692                                  TDB_DATA indata,
693                                  bool *async_reply)
694 {
695         int ret;
696         struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
697         struct ctdb_vnn *vnn;
698         bool have_ip = false;
699         bool do_updateip = false;
700         bool do_takeip = false;
701         struct ctdb_interface *best_iface = NULL;
702
703         if (pip->pnn != ctdb->pnn) {
704                 DEBUG(DEBUG_ERR,(__location__" takeoverip called for an ip '%s' "
705                                  "with pnn %d, but we're node %d\n",
706                                  ctdb_addr_to_str(&pip->addr),
707                                  pip->pnn, ctdb->pnn));
708                 return -1;
709         }
710
711         /* update out vnn list */
712         vnn = find_public_ip_vnn(ctdb, &pip->addr);
713         if (vnn == NULL) {
714                 DEBUG(DEBUG_INFO,("takeoverip called for an ip '%s' that is not a public address\n",
715                         ctdb_addr_to_str(&pip->addr)));
716                 return 0;
717         }
718
719         if (ctdb->tunable.disable_ip_failover == 0 && ctdb->do_checkpublicip) {
720                 have_ip = ctdb_sys_have_ip(&pip->addr);
721         }
722         best_iface = ctdb_vnn_best_iface(ctdb, vnn);
723         if (best_iface == NULL) {
724                 DEBUG(DEBUG_ERR,("takeoverip of IP %s/%u failed to find"
725                                  "a usable interface (old %s, have_ip %d)\n",
726                                  ctdb_addr_to_str(&vnn->public_address),
727                                  vnn->public_netmask_bits,
728                                  ctdb_vnn_iface_string(vnn),
729                                  have_ip));
730                 return -1;
731         }
732
733         if (vnn->iface == NULL && vnn->pnn == -1 && have_ip && best_iface != NULL) {
734                 DEBUG(DEBUG_ERR,("Taking over newly created ip\n"));
735                 have_ip = false;
736         }
737
738
739         if (vnn->iface == NULL && have_ip) {
740                 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
741                                   "but we have no interface assigned, has someone manually configured it? Ignore for now.\n",
742                                  ctdb_addr_to_str(&vnn->public_address)));
743                 return 0;
744         }
745
746         if (vnn->pnn != ctdb->pnn && have_ip && vnn->pnn != -1) {
747                 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
748                                   "and we have it on iface[%s], but it was assigned to node %d"
749                                   "and we are node %d, banning ourself\n",
750                                  ctdb_addr_to_str(&vnn->public_address),
751                                  ctdb_vnn_iface_string(vnn), vnn->pnn, ctdb->pnn));
752                 ctdb_ban_self(ctdb);
753                 return -1;
754         }
755
756         if (vnn->pnn == -1 && have_ip) {
757                 vnn->pnn = ctdb->pnn;
758                 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
759                                   "and we already have it on iface[%s], update local daemon\n",
760                                  ctdb_addr_to_str(&vnn->public_address),
761                                   ctdb_vnn_iface_string(vnn)));
762                 return 0;
763         }
764
765         if (vnn->iface) {
766                 if (vnn->iface != best_iface) {
767                         if (!vnn->iface->link_up) {
768                                 do_updateip = true;
769                         } else if (vnn->iface->references > (best_iface->references + 1)) {
770                                 /* only move when the rebalance gains something */
771                                         do_updateip = true;
772                         }
773                 }
774         }
775
776         if (!have_ip) {
777                 if (do_updateip) {
778                         ctdb_vnn_unassign_iface(ctdb, vnn);
779                         do_updateip = false;
780                 }
781                 do_takeip = true;
782         }
783
784         if (do_takeip) {
785                 ret = ctdb_do_takeip(ctdb, c, vnn);
786                 if (ret != 0) {
787                         return -1;
788                 }
789         } else if (do_updateip) {
790                 ret = ctdb_do_updateip(ctdb, c, vnn);
791                 if (ret != 0) {
792                         return -1;
793                 }
794         } else {
795                 /*
796                  * The interface is up and the kernel known the ip
797                  * => do nothing
798                  */
799                 DEBUG(DEBUG_INFO,("Redundant takeover of IP %s/%u on interface %s (ip already held)\n",
800                         ctdb_addr_to_str(&pip->addr),
801                         vnn->public_netmask_bits,
802                         ctdb_vnn_iface_string(vnn)));
803                 return 0;
804         }
805
806         /* tell ctdb_control.c that we will be replying asynchronously */
807         *async_reply = true;
808
809         return 0;
810 }
811
812 static void do_delete_ip(struct ctdb_context *ctdb, struct ctdb_vnn *vnn)
813 {
814         DLIST_REMOVE(ctdb->vnn, vnn);
815         ctdb_vnn_unassign_iface(ctdb, vnn);
816         ctdb_remove_orphaned_ifaces(ctdb, vnn);
817         talloc_free(vnn);
818 }
819
820 /*
821   called when releaseip event finishes
822  */
823 static void release_ip_callback(struct ctdb_context *ctdb, int status, 
824                                 void *private_data)
825 {
826         struct takeover_callback_state *state = 
827                 talloc_get_type(private_data, struct takeover_callback_state);
828         TDB_DATA data;
829
830         if (status == -ETIME) {
831                 ctdb_ban_self(ctdb);
832         }
833
834         if (ctdb->tunable.disable_ip_failover == 0 && ctdb->do_checkpublicip) {
835                 if  (ctdb_sys_have_ip(state->addr)) {
836                         DEBUG(DEBUG_ERR,
837                               ("IP %s still hosted during release IP callback, failing\n",
838                                ctdb_addr_to_str(state->addr)));
839                         ctdb_request_control_reply(ctdb, state->c,
840                                                    NULL, -1, NULL);
841                         talloc_free(state);
842                         return;
843                 }
844         }
845
846         /* send a message to all clients of this node telling them
847            that the cluster has been reconfigured and they should
848            release any sockets on this IP */
849         data.dptr = (uint8_t *)talloc_strdup(state, ctdb_addr_to_str(state->addr));
850         CTDB_NO_MEMORY_VOID(ctdb, data.dptr);
851         data.dsize = strlen((char *)data.dptr)+1;
852
853         DEBUG(DEBUG_INFO,(__location__ " sending RELEASE_IP for '%s'\n", data.dptr));
854
855         ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_RELEASE_IP, data);
856
857         ctdb_vnn_unassign_iface(ctdb, state->vnn);
858
859         /* Process the IP if it has been marked for deletion */
860         if (state->vnn->delete_pending) {
861                 do_delete_ip(ctdb, state->vnn);
862                 state->vnn = NULL;
863         }
864
865         /* the control succeeded */
866         ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
867         talloc_free(state);
868 }
869
870 static int ctdb_releaseip_destructor(struct takeover_callback_state *state)
871 {
872         if (state->vnn != NULL) {
873                 state->vnn->update_in_flight = false;
874         }
875         return 0;
876 }
877
878 /*
879   release an ip address
880  */
881 int32_t ctdb_control_release_ip(struct ctdb_context *ctdb, 
882                                 struct ctdb_req_control_old *c,
883                                 TDB_DATA indata, 
884                                 bool *async_reply)
885 {
886         int ret;
887         struct takeover_callback_state *state;
888         struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
889         struct ctdb_vnn *vnn;
890         char *iface;
891
892         /* update our vnn list */
893         vnn = find_public_ip_vnn(ctdb, &pip->addr);
894         if (vnn == NULL) {
895                 DEBUG(DEBUG_INFO,("releaseip called for an ip '%s' that is not a public address\n",
896                         ctdb_addr_to_str(&pip->addr)));
897                 return 0;
898         }
899         vnn->pnn = pip->pnn;
900
901         /* stop any previous arps */
902         talloc_free(vnn->takeover_ctx);
903         vnn->takeover_ctx = NULL;
904
905         /* Some ctdb tool commands (e.g. moveip) send
906          * lazy multicast to drop an IP from any node that isn't the
907          * intended new node.  The following causes makes ctdbd ignore
908          * a release for any address it doesn't host.
909          */
910         if (ctdb->tunable.disable_ip_failover == 0 && ctdb->do_checkpublicip) {
911                 if (!ctdb_sys_have_ip(&pip->addr)) {
912                         DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u on interface %s (ip not held)\n",
913                                 ctdb_addr_to_str(&pip->addr),
914                                 vnn->public_netmask_bits,
915                                 ctdb_vnn_iface_string(vnn)));
916                         ctdb_vnn_unassign_iface(ctdb, vnn);
917                         return 0;
918                 }
919         } else {
920                 if (vnn->iface == NULL) {
921                         DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u (ip not held)\n",
922                                            ctdb_addr_to_str(&pip->addr),
923                                            vnn->public_netmask_bits));
924                         return 0;
925                 }
926         }
927
928         /* There is a potential race between take_ip and us because we
929          * update the VNN via a callback that run when the
930          * eventscripts have been run.  Avoid the race by allowing one
931          * update to be in flight at a time.
932          */
933         if (vnn->update_in_flight) {
934                 DEBUG(DEBUG_NOTICE,("Release of IP %s/%u rejected "
935                                     "update for this IP already in flight\n",
936                                     ctdb_addr_to_str(&vnn->public_address),
937                                     vnn->public_netmask_bits));
938                 return -1;
939         }
940
941         iface = strdup(ctdb_vnn_iface_string(vnn));
942
943         DEBUG(DEBUG_NOTICE,("Release of IP %s/%u on interface %s  node:%d\n",
944                 ctdb_addr_to_str(&pip->addr),
945                 vnn->public_netmask_bits,
946                 iface,
947                 pip->pnn));
948
949         state = talloc(ctdb, struct takeover_callback_state);
950         if (state == NULL) {
951                 ctdb_set_error(ctdb, "Out of memory at %s:%d",
952                                __FILE__, __LINE__);
953                 free(iface);
954                 return -1;
955         }
956
957         state->c = talloc_steal(state, c);
958         state->addr = talloc(state, ctdb_sock_addr);       
959         if (state->addr == NULL) {
960                 ctdb_set_error(ctdb, "Out of memory at %s:%d",
961                                __FILE__, __LINE__);
962                 free(iface);
963                 talloc_free(state);
964                 return -1;
965         }
966         *state->addr = pip->addr;
967         state->vnn   = vnn;
968
969         vnn->update_in_flight = true;
970         talloc_set_destructor(state, ctdb_releaseip_destructor);
971
972         ret = ctdb_event_script_callback(ctdb, 
973                                          state, release_ip_callback, state,
974                                          CTDB_EVENT_RELEASE_IP,
975                                          "%s %s %u",
976                                          iface,
977                                          ctdb_addr_to_str(&pip->addr),
978                                          vnn->public_netmask_bits);
979         free(iface);
980         if (ret != 0) {
981                 DEBUG(DEBUG_ERR,(__location__ " Failed to release IP %s on interface %s\n",
982                         ctdb_addr_to_str(&pip->addr),
983                         ctdb_vnn_iface_string(vnn)));
984                 talloc_free(state);
985                 return -1;
986         }
987
988         /* tell the control that we will be reply asynchronously */
989         *async_reply = true;
990         return 0;
991 }
992
993 static int ctdb_add_public_address(struct ctdb_context *ctdb,
994                                    ctdb_sock_addr *addr,
995                                    unsigned mask, const char *ifaces,
996                                    bool check_address)
997 {
998         struct ctdb_vnn      *vnn;
999         uint32_t num = 0;
1000         char *tmp;
1001         const char *iface;
1002         int i;
1003         int ret;
1004
1005         tmp = strdup(ifaces);
1006         for (iface = strtok(tmp, ","); iface; iface = strtok(NULL, ",")) {
1007                 if (!ctdb_sys_check_iface_exists(iface)) {
1008                         DEBUG(DEBUG_CRIT,("Interface %s does not exist. Can not add public-address : %s\n", iface, ctdb_addr_to_str(addr)));
1009                         free(tmp);
1010                         return -1;
1011                 }
1012         }
1013         free(tmp);
1014
1015         /* Verify that we don't have an entry for this ip yet */
1016         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1017                 if (ctdb_same_sockaddr(addr, &vnn->public_address)) {
1018                         DEBUG(DEBUG_CRIT,("Same ip '%s' specified multiple times in the public address list \n", 
1019                                 ctdb_addr_to_str(addr)));
1020                         return -1;
1021                 }               
1022         }
1023
1024         /* create a new vnn structure for this ip address */
1025         vnn = talloc_zero(ctdb, struct ctdb_vnn);
1026         CTDB_NO_MEMORY_FATAL(ctdb, vnn);
1027         vnn->ifaces = talloc_array(vnn, const char *, num + 2);
1028         tmp = talloc_strdup(vnn, ifaces);
1029         CTDB_NO_MEMORY_FATAL(ctdb, tmp);
1030         for (iface = strtok(tmp, ","); iface; iface = strtok(NULL, ",")) {
1031                 vnn->ifaces = talloc_realloc(vnn, vnn->ifaces, const char *, num + 2);
1032                 CTDB_NO_MEMORY_FATAL(ctdb, vnn->ifaces);
1033                 vnn->ifaces[num] = talloc_strdup(vnn, iface);
1034                 CTDB_NO_MEMORY_FATAL(ctdb, vnn->ifaces[num]);
1035                 num++;
1036         }
1037         talloc_free(tmp);
1038         vnn->ifaces[num] = NULL;
1039         vnn->public_address      = *addr;
1040         vnn->public_netmask_bits = mask;
1041         vnn->pnn                 = -1;
1042         if (check_address) {
1043                 if (ctdb_sys_have_ip(addr)) {
1044                         DEBUG(DEBUG_ERR,("We are already hosting public address '%s'. setting PNN to ourself:%d\n", ctdb_addr_to_str(addr), ctdb->pnn));
1045                         vnn->pnn = ctdb->pnn;
1046                 }
1047         }
1048
1049         for (i=0; vnn->ifaces[i]; i++) {
1050                 ret = ctdb_add_local_iface(ctdb, vnn->ifaces[i]);
1051                 if (ret != 0) {
1052                         DEBUG(DEBUG_CRIT, (__location__ " failed to add iface[%s] "
1053                                            "for public_address[%s]\n",
1054                                            vnn->ifaces[i], ctdb_addr_to_str(addr)));
1055                         talloc_free(vnn);
1056                         return -1;
1057                 }
1058         }
1059
1060         DLIST_ADD(ctdb->vnn, vnn);
1061
1062         return 0;
1063 }
1064
1065 /*
1066   setup the public address lists from a file
1067 */
1068 int ctdb_set_public_addresses(struct ctdb_context *ctdb, bool check_addresses)
1069 {
1070         char **lines;
1071         int nlines;
1072         int i;
1073
1074         lines = file_lines_load(ctdb->public_addresses_file, &nlines, 0, ctdb);
1075         if (lines == NULL) {
1076                 ctdb_set_error(ctdb, "Failed to load public address list '%s'\n", ctdb->public_addresses_file);
1077                 return -1;
1078         }
1079         while (nlines > 0 && strcmp(lines[nlines-1], "") == 0) {
1080                 nlines--;
1081         }
1082
1083         for (i=0;i<nlines;i++) {
1084                 unsigned mask;
1085                 ctdb_sock_addr addr;
1086                 const char *addrstr;
1087                 const char *ifaces;
1088                 char *tok, *line;
1089
1090                 line = lines[i];
1091                 while ((*line == ' ') || (*line == '\t')) {
1092                         line++;
1093                 }
1094                 if (*line == '#') {
1095                         continue;
1096                 }
1097                 if (strcmp(line, "") == 0) {
1098                         continue;
1099                 }
1100                 tok = strtok(line, " \t");
1101                 addrstr = tok;
1102                 tok = strtok(NULL, " \t");
1103                 if (tok == NULL) {
1104                         if (NULL == ctdb->default_public_interface) {
1105                                 DEBUG(DEBUG_CRIT,("No default public interface and no interface specified at line %u of public address list\n",
1106                                          i+1));
1107                                 talloc_free(lines);
1108                                 return -1;
1109                         }
1110                         ifaces = ctdb->default_public_interface;
1111                 } else {
1112                         ifaces = tok;
1113                 }
1114
1115                 if (!addrstr || !parse_ip_mask(addrstr, ifaces, &addr, &mask)) {
1116                         DEBUG(DEBUG_CRIT,("Badly formed line %u in public address list\n", i+1));
1117                         talloc_free(lines);
1118                         return -1;
1119                 }
1120                 if (ctdb_add_public_address(ctdb, &addr, mask, ifaces, check_addresses)) {
1121                         DEBUG(DEBUG_CRIT,("Failed to add line %u to the public address list\n", i+1));
1122                         talloc_free(lines);
1123                         return -1;
1124                 }
1125         }
1126
1127
1128         talloc_free(lines);
1129         return 0;
1130 }
1131
1132 static void *add_ip_callback(void *parm, void *data)
1133 {
1134         struct public_ip_list *this_ip = parm;
1135         struct public_ip_list *prev_ip = data;
1136
1137         if (prev_ip == NULL) {
1138                 return parm;
1139         }
1140         if (this_ip->pnn == -1) {
1141                 this_ip->pnn = prev_ip->pnn;
1142         }
1143
1144         return parm;
1145 }
1146
1147 static int getips_count_callback(void *param, void *data)
1148 {
1149         struct public_ip_list **ip_list = (struct public_ip_list **)param;
1150         struct public_ip_list *new_ip = (struct public_ip_list *)data;
1151
1152         new_ip->next = *ip_list;
1153         *ip_list     = new_ip;
1154         return 0;
1155 }
1156
1157 static int verify_remote_ip_allocation(struct ctdb_context *ctdb,
1158                                        struct ctdb_public_ip_list *ips,
1159                                        uint32_t pnn);
1160
1161 static int ctdb_reload_remote_public_ips(struct ctdb_context *ctdb,
1162                                          struct ipalloc_state *ipalloc_state,
1163                                          struct ctdb_node_map_old *nodemap)
1164 {
1165         int j;
1166         int ret;
1167         struct ctdb_public_ip_list_old *ip_list;
1168
1169         if (ipalloc_state->num != nodemap->num) {
1170                 DEBUG(DEBUG_ERR,
1171                       (__location__
1172                        " ipalloc_state->num (%d) != nodemap->num (%d) invalid param\n",
1173                        ipalloc_state->num, nodemap->num));
1174                 return -1;
1175         }
1176
1177         for (j=0; j<nodemap->num; j++) {
1178                 if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) {
1179                         continue;
1180                 }
1181
1182                 /* Retrieve the list of known public IPs from the node */
1183                 ret = ctdb_ctrl_get_public_ips_flags(ctdb,
1184                                         TAKEOVER_TIMEOUT(),
1185                                         j,
1186                                         ipalloc_state->known_public_ips,
1187                                         0,
1188                                         &ip_list);
1189                 if (ret != 0) {
1190                         DEBUG(DEBUG_ERR,
1191                               ("Failed to read known public IPs from node: %u\n",
1192                                j));
1193                         return -1;
1194                 }
1195                 ipalloc_state->known_public_ips[j].num = ip_list->num;
1196                 /* This could be copied and freed.  However, ip_list
1197                  * is allocated off ipalloc_state->known_public_ips,
1198                  * so this is a safe hack.  This will go away in a
1199                  * while anyway... */
1200                 ipalloc_state->known_public_ips[j].ip = &ip_list->ips[0];
1201
1202                 if (ctdb->do_checkpublicip) {
1203                         verify_remote_ip_allocation(
1204                                 ctdb,
1205                                 &ipalloc_state->known_public_ips[j],
1206                                 j);
1207                 }
1208
1209                 /* Retrieve the list of available public IPs from the node */
1210                 ret = ctdb_ctrl_get_public_ips_flags(ctdb,
1211                                         TAKEOVER_TIMEOUT(),
1212                                         j,
1213                                         ipalloc_state->available_public_ips,
1214                                         CTDB_PUBLIC_IP_FLAGS_ONLY_AVAILABLE,
1215                                         &ip_list);
1216                 if (ret != 0) {
1217                         DEBUG(DEBUG_ERR,
1218                               ("Failed to read available public IPs from node: %u\n",
1219                                j));
1220                         return -1;
1221                 }
1222                 ipalloc_state->available_public_ips[j].num = ip_list->num;
1223                 /* This could be copied and freed.  However, ip_list
1224                  * is allocated off ipalloc_state->available_public_ips,
1225                  * so this is a safe hack.  This will go away in a
1226                  * while anyway... */
1227                 ipalloc_state->available_public_ips[j].ip = &ip_list->ips[0];
1228         }
1229
1230         return 0;
1231 }
1232
1233 static struct public_ip_list *
1234 create_merged_ip_list(struct ctdb_context *ctdb, struct ipalloc_state *ipalloc_state)
1235 {
1236         int i, j;
1237         struct public_ip_list *ip_list;
1238         struct ctdb_public_ip_list *public_ips;
1239
1240         TALLOC_FREE(ctdb->ip_tree);
1241         ctdb->ip_tree = trbt_create(ctdb, 0);
1242
1243         if (ipalloc_state->known_public_ips == NULL) {
1244                 DEBUG(DEBUG_ERR, ("Known public IPs not set\n"));
1245                 return NULL;
1246         }
1247
1248         for (i=0; i < ipalloc_state->num; i++) {
1249
1250                 public_ips = &ipalloc_state->known_public_ips[i];
1251
1252                 for (j=0; j < public_ips->num; j++) {
1253                         struct public_ip_list *tmp_ip;
1254
1255                         tmp_ip = talloc_zero(ctdb->ip_tree, struct public_ip_list);
1256                         CTDB_NO_MEMORY_NULL(ctdb, tmp_ip);
1257                         /* Do not use information about IP addresses hosted
1258                          * on other nodes, it may not be accurate */
1259                         if (public_ips->ip[j].pnn == i) {
1260                                 tmp_ip->pnn = public_ips->ip[j].pnn;
1261                         } else {
1262                                 tmp_ip->pnn = -1;
1263                         }
1264                         tmp_ip->addr = public_ips->ip[j].addr;
1265                         tmp_ip->next = NULL;
1266
1267                         trbt_insertarray32_callback(ctdb->ip_tree,
1268                                 IP_KEYLEN, ip_key(&public_ips->ip[j].addr),
1269                                 add_ip_callback,
1270                                 tmp_ip);
1271                 }
1272         }
1273
1274         ip_list = NULL;
1275         trbt_traversearray32(ctdb->ip_tree, IP_KEYLEN, getips_count_callback, &ip_list);
1276
1277         return ip_list;
1278 }
1279
1280 static bool all_nodes_are_disabled(struct ctdb_node_map_old *nodemap)
1281 {
1282         int i;
1283
1284         for (i=0;i<nodemap->num;i++) {
1285                 if (!(nodemap->nodes[i].flags & (NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED))) {
1286                         /* Found one completely healthy node */
1287                         return false;
1288                 }
1289         }
1290
1291         return true;
1292 }
1293
1294 struct get_tunable_callback_data {
1295         const char *tunable;
1296         uint32_t *out;
1297         bool fatal;
1298 };
1299
1300 static void get_tunable_callback(struct ctdb_context *ctdb, uint32_t pnn,
1301                                  int32_t res, TDB_DATA outdata,
1302                                  void *callback)
1303 {
1304         struct get_tunable_callback_data *cd =
1305                 (struct get_tunable_callback_data *)callback;
1306         int size;
1307
1308         if (res != 0) {
1309                 /* Already handled in fail callback */
1310                 return;
1311         }
1312
1313         if (outdata.dsize != sizeof(uint32_t)) {
1314                 DEBUG(DEBUG_ERR,("Wrong size of returned data when reading \"%s\" tunable from node %d. Expected %d bytes but received %d bytes\n",
1315                                  cd->tunable, pnn, (int)sizeof(uint32_t),
1316                                  (int)outdata.dsize));
1317                 cd->fatal = true;
1318                 return;
1319         }
1320
1321         size = talloc_array_length(cd->out);
1322         if (pnn >= size) {
1323                 DEBUG(DEBUG_ERR,("Got %s reply from node %d but nodemap only has %d entries\n",
1324                                  cd->tunable, pnn, size));
1325                 return;
1326         }
1327
1328                 
1329         cd->out[pnn] = *(uint32_t *)outdata.dptr;
1330 }
1331
1332 static void get_tunable_fail_callback(struct ctdb_context *ctdb, uint32_t pnn,
1333                                        int32_t res, TDB_DATA outdata,
1334                                        void *callback)
1335 {
1336         struct get_tunable_callback_data *cd =
1337                 (struct get_tunable_callback_data *)callback;
1338
1339         switch (res) {
1340         case -ETIME:
1341                 DEBUG(DEBUG_ERR,
1342                       ("Timed out getting tunable \"%s\" from node %d\n",
1343                        cd->tunable, pnn));
1344                 cd->fatal = true;
1345                 break;
1346         case -EINVAL:
1347         case -1:
1348                 DEBUG(DEBUG_WARNING,
1349                       ("Tunable \"%s\" not implemented on node %d\n",
1350                        cd->tunable, pnn));
1351                 break;
1352         default:
1353                 DEBUG(DEBUG_ERR,
1354                       ("Unexpected error getting tunable \"%s\" from node %d\n",
1355                        cd->tunable, pnn));
1356                 cd->fatal = true;
1357         }
1358 }
1359
1360 static uint32_t *get_tunable_from_nodes(struct ctdb_context *ctdb,
1361                                         TALLOC_CTX *tmp_ctx,
1362                                         struct ctdb_node_map_old *nodemap,
1363                                         const char *tunable,
1364                                         uint32_t default_value)
1365 {
1366         TDB_DATA data;
1367         struct ctdb_control_get_tunable *t;
1368         uint32_t *nodes;
1369         uint32_t *tvals;
1370         struct get_tunable_callback_data callback_data;
1371         int i;
1372
1373         tvals = talloc_array(tmp_ctx, uint32_t, nodemap->num);
1374         CTDB_NO_MEMORY_NULL(ctdb, tvals);
1375         for (i=0; i<nodemap->num; i++) {
1376                 tvals[i] = default_value;
1377         }
1378                 
1379         callback_data.out = tvals;
1380         callback_data.tunable = tunable;
1381         callback_data.fatal = false;
1382
1383         data.dsize = offsetof(struct ctdb_control_get_tunable, name) + strlen(tunable) + 1;
1384         data.dptr  = talloc_size(tmp_ctx, data.dsize);
1385         t = (struct ctdb_control_get_tunable *)data.dptr;
1386         t->length = strlen(tunable)+1;
1387         memcpy(t->name, tunable, t->length);
1388         nodes = list_of_connected_nodes(ctdb, nodemap, tmp_ctx, true);
1389         if (ctdb_client_async_control(ctdb, CTDB_CONTROL_GET_TUNABLE,
1390                                       nodes, 0, TAKEOVER_TIMEOUT(),
1391                                       false, data,
1392                                       get_tunable_callback,
1393                                       get_tunable_fail_callback,
1394                                       &callback_data) != 0) {
1395                 if (callback_data.fatal) {
1396                         talloc_free(tvals);
1397                         tvals = NULL;
1398                 }
1399         }
1400         talloc_free(nodes);
1401         talloc_free(data.dptr);
1402
1403         return tvals;
1404 }
1405
1406 /* Set internal flags for IP allocation:
1407  *   Clear ip flags
1408  *   Set NOIPTAKOVER ip flags from per-node NoIPTakeover tunable
1409  *   Set NOIPHOST ip flag for each INACTIVE node
1410  *   if all nodes are disabled:
1411  *     Set NOIPHOST ip flags from per-node NoIPHostOnAllDisabled tunable
1412  *   else
1413  *     Set NOIPHOST ip flags for disabled nodes
1414  */
1415 static void set_ipflags_internal(struct ipalloc_state *ipalloc_state,
1416                                  struct ctdb_node_map_old *nodemap,
1417                                  uint32_t *tval_noiptakeover,
1418                                  uint32_t *tval_noiphostonalldisabled)
1419 {
1420         int i;
1421
1422         for (i=0;i<nodemap->num;i++) {
1423                 /* Can not take IPs on node with NoIPTakeover set */
1424                 if (tval_noiptakeover[i] != 0) {
1425                         ipalloc_state->noiptakeover[i] = true;
1426                 }
1427
1428                 /* Can not host IPs on INACTIVE node */
1429                 if (nodemap->nodes[i].flags & NODE_FLAGS_INACTIVE) {
1430                         ipalloc_state->noiphost[i] = true;
1431                 }
1432         }
1433
1434         if (all_nodes_are_disabled(nodemap)) {
1435                 /* If all nodes are disabled, can not host IPs on node
1436                  * with NoIPHostOnAllDisabled set
1437                  */
1438                 for (i=0;i<nodemap->num;i++) {
1439                         if (tval_noiphostonalldisabled[i] != 0) {
1440                                 ipalloc_state->noiphost[i] = true;
1441                         }
1442                 }
1443         } else {
1444                 /* If some nodes are not disabled, then can not host
1445                  * IPs on DISABLED node
1446                  */
1447                 for (i=0;i<nodemap->num;i++) {
1448                         if (nodemap->nodes[i].flags & NODE_FLAGS_DISABLED) {
1449                                 ipalloc_state->noiphost[i] = true;
1450                         }
1451                 }
1452         }
1453 }
1454
1455 static bool set_ipflags(struct ctdb_context *ctdb,
1456                         struct ipalloc_state *ipalloc_state,
1457                         struct ctdb_node_map_old *nodemap)
1458 {
1459         uint32_t *tval_noiptakeover;
1460         uint32_t *tval_noiphostonalldisabled;
1461
1462         tval_noiptakeover = get_tunable_from_nodes(ctdb, ipalloc_state, nodemap,
1463                                                    "NoIPTakeover", 0);
1464         if (tval_noiptakeover == NULL) {
1465                 return false;
1466         }
1467
1468         tval_noiphostonalldisabled =
1469                 get_tunable_from_nodes(ctdb, ipalloc_state, nodemap,
1470                                        "NoIPHostOnAllDisabled", 0);
1471         if (tval_noiphostonalldisabled == NULL) {
1472                 /* Caller frees tmp_ctx */
1473                 return false;
1474         }
1475
1476         set_ipflags_internal(ipalloc_state, nodemap,
1477                              tval_noiptakeover,
1478                              tval_noiphostonalldisabled);
1479
1480         talloc_free(tval_noiptakeover);
1481         talloc_free(tval_noiphostonalldisabled);
1482
1483         return true;
1484 }
1485
1486 static struct ipalloc_state * ipalloc_state_init(struct ctdb_context *ctdb,
1487                                                  TALLOC_CTX *mem_ctx)
1488 {
1489         struct ipalloc_state *ipalloc_state =
1490                 talloc_zero(mem_ctx, struct ipalloc_state);
1491         if (ipalloc_state == NULL) {
1492                 DEBUG(DEBUG_ERR, (__location__ " Out of memory\n"));
1493                 return NULL;
1494         }
1495
1496         ipalloc_state->num = ctdb->num_nodes;
1497
1498         ipalloc_state->known_public_ips =
1499                 talloc_zero_array(ipalloc_state,
1500                                   struct ctdb_public_ip_list,
1501                                   ipalloc_state->num);
1502         if (ipalloc_state->known_public_ips == NULL) {
1503                 DEBUG(DEBUG_ERR, (__location__ " Out of memory\n"));
1504                 goto fail;
1505         }
1506
1507         ipalloc_state->available_public_ips =
1508                 talloc_zero_array(ipalloc_state,
1509                                   struct ctdb_public_ip_list,
1510                                   ipalloc_state->num);
1511         if (ipalloc_state->available_public_ips == NULL) {
1512                 DEBUG(DEBUG_ERR, (__location__ " Out of memory\n"));
1513                 goto fail;
1514         }
1515         ipalloc_state->noiptakeover =
1516                 talloc_zero_array(ipalloc_state,
1517                                   bool,
1518                                   ipalloc_state->num);
1519         if (ipalloc_state->noiptakeover == NULL) {
1520                 DEBUG(DEBUG_ERR, (__location__ " Out of memory\n"));
1521                 goto fail;
1522         }
1523         ipalloc_state->noiphost =
1524                 talloc_zero_array(ipalloc_state,
1525                                   bool,
1526                                   ipalloc_state->num);
1527         if (ipalloc_state->noiphost == NULL) {
1528                 DEBUG(DEBUG_ERR, (__location__ " Out of memory\n"));
1529                 goto fail;
1530         }
1531
1532         if (1 == ctdb->tunable.lcp2_public_ip_assignment) {
1533                 ipalloc_state->algorithm = IPALLOC_LCP2;
1534         } else if (1 == ctdb->tunable.deterministic_public_ips) {
1535                 ipalloc_state->algorithm = IPALLOC_DETERMINISTIC;
1536         } else {
1537                 ipalloc_state->algorithm = IPALLOC_NONDETERMINISTIC;
1538         }
1539
1540         ipalloc_state->no_ip_failback = ctdb->tunable.no_ip_failback;
1541
1542         return ipalloc_state;
1543 fail:
1544         talloc_free(ipalloc_state);
1545         return NULL;
1546 }
1547
1548 struct takeover_callback_data {
1549         uint32_t num_nodes;
1550         unsigned int *fail_count;
1551 };
1552
1553 static struct takeover_callback_data *
1554 takeover_callback_data_init(TALLOC_CTX *mem_ctx,
1555                             uint32_t num_nodes)
1556 {
1557         static struct takeover_callback_data *takeover_data;
1558
1559         takeover_data = talloc_zero(mem_ctx, struct takeover_callback_data);
1560         if (takeover_data == NULL) {
1561                 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
1562                 return NULL;
1563         }
1564
1565         takeover_data->fail_count = talloc_zero_array(takeover_data,
1566                                                       unsigned int, num_nodes);
1567         if (takeover_data->fail_count == NULL) {
1568                 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
1569                 talloc_free(takeover_data);
1570                 return NULL;
1571         }
1572
1573         takeover_data->num_nodes = num_nodes;
1574
1575         return takeover_data;
1576 }
1577
1578 static void takeover_run_fail_callback(struct ctdb_context *ctdb,
1579                                        uint32_t node_pnn, int32_t res,
1580                                        TDB_DATA outdata, void *callback_data)
1581 {
1582         struct takeover_callback_data *cd =
1583                 talloc_get_type_abort(callback_data,
1584                                       struct takeover_callback_data);
1585
1586         if (node_pnn >= cd->num_nodes) {
1587                 DEBUG(DEBUG_ERR, (__location__ " invalid PNN %u\n", node_pnn));
1588                 return;
1589         }
1590
1591         if (cd->fail_count[node_pnn] == 0) {
1592                 DEBUG(DEBUG_ERR,
1593                       ("Node %u failed the takeover run\n", node_pnn));
1594         }
1595
1596         cd->fail_count[node_pnn]++;
1597 }
1598
1599 static void takeover_run_process_failures(struct ctdb_context *ctdb,
1600                                           struct takeover_callback_data *tcd)
1601 {
1602         unsigned int max_fails = 0;
1603         uint32_t max_pnn = -1;
1604         uint32_t i;
1605
1606         for (i = 0; i < tcd->num_nodes; i++) {
1607                 if (tcd->fail_count[i] > max_fails) {
1608                         max_pnn = i;
1609                         max_fails = tcd->fail_count[i];
1610                 }
1611         }
1612
1613         if (max_fails > 0) {
1614                 int ret;
1615                 TDB_DATA data;
1616
1617                 DEBUG(DEBUG_ERR,
1618                       ("Sending banning credits to %u with fail count %u\n",
1619                        max_pnn, max_fails));
1620
1621                 data.dptr = (uint8_t *)&max_pnn;
1622                 data.dsize = sizeof(uint32_t);
1623                 ret = ctdb_client_send_message(ctdb,
1624                                                CTDB_BROADCAST_CONNECTED,
1625                                                CTDB_SRVID_BANNING,
1626                                                data);
1627                 if (ret != 0) {
1628                         DEBUG(DEBUG_ERR,
1629                               ("Failed to set banning credits for node %u\n",
1630                                max_pnn));
1631                 }
1632         }
1633 }
1634
1635 /*
1636  * Recalculate the allocation of public IPs to nodes and have the
1637  * nodes host their allocated addresses.
1638  *
1639  * - Allocate memory for IP allocation state, including per node
1640  *   arrays
1641  * - Populate IP allocation algorithm in IP allocation state
1642  * - Populate local value of tunable NoIPFailback in IP allocation
1643      state - this is really a cluster-wide configuration variable and
1644      only the value form the master node is used
1645  * - Retrieve tunables NoIPTakeover and NoIPHostOnAllDisabled from all
1646  *   connected nodes - this is done separately so tunable values can
1647  *   be faked in unit testing
1648  * - Populate NoIPTakover tunable in IP allocation state
1649  * - Populate NoIPHost in IP allocation state, derived from node flags
1650  *   and NoIPHostOnAllDisabled tunable
1651  * - Retrieve and populate known and available IP lists in IP
1652  *   allocation state
1653  * - If no available IP addresses then early exit
1654  * - Build list of (known IPs, currently assigned node)
1655  * - Populate list of nodes to force rebalance - internal structure,
1656  *   currently no way to fetch, only used by LCP2 for nodes that have
1657  *   had new IP addresses added
1658  * - Run IP allocation algorithm
1659  * - Send RELEASE_IP to all nodes for IPs they should not host
1660  * - Send TAKE_IP to all nodes for IPs they should host
1661  * - Send IPREALLOCATED to all nodes (with backward compatibility hack)
1662  */
1663 int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map_old *nodemap,
1664                       uint32_t *force_rebalance_nodes)
1665 {
1666         int i, ret;
1667         struct ctdb_public_ip ip;
1668         uint32_t *nodes;
1669         struct public_ip_list *all_ips, *tmp_ip;
1670         TDB_DATA data;
1671         struct timeval timeout;
1672         struct client_async_data *async_data;
1673         struct ctdb_client_control_state *state;
1674         TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
1675         struct ipalloc_state *ipalloc_state;
1676         struct takeover_callback_data *takeover_data;
1677         bool can_host_ips;
1678
1679         /* Initialise fail callback data to be used with
1680          * takeover_run_fail_callback().  A failure in any of the
1681          * following steps will cause an early return, so this can be
1682          * reused for each of those steps without re-initialising. */
1683         takeover_data = takeover_callback_data_init(tmp_ctx,
1684                                                     nodemap->num);
1685         if (takeover_data == NULL) {
1686                 talloc_free(tmp_ctx);
1687                 return -1;
1688         }
1689
1690         /*
1691          * ip failover is completely disabled, just send out the 
1692          * ipreallocated event.
1693          */
1694         if (ctdb->tunable.disable_ip_failover != 0) {
1695                 goto ipreallocated;
1696         }
1697
1698         ipalloc_state = ipalloc_state_init(ctdb, tmp_ctx);
1699         if (ipalloc_state == NULL) {
1700                 talloc_free(tmp_ctx);
1701                 return -1;
1702         }
1703
1704         if (!set_ipflags(ctdb, ipalloc_state, nodemap)) {
1705                 DEBUG(DEBUG_ERR,("Failed to set IP flags - aborting takeover run\n"));
1706                 talloc_free(tmp_ctx);
1707                 return -1;
1708         }
1709
1710         /* Fetch known/available public IPs from each active node */
1711         ret = ctdb_reload_remote_public_ips(ctdb, ipalloc_state, nodemap);
1712         if (ret != 0) {
1713                 talloc_free(tmp_ctx);
1714                 return -1;
1715         }
1716
1717         /* Short-circuit IP allocation if no node has available IPs */
1718         can_host_ips = false;
1719         for (i=0; i < ipalloc_state->num; i++) {
1720                 if (ipalloc_state->available_public_ips[i].num != 0) {
1721                         can_host_ips = true;
1722                 }
1723         }
1724         if (!can_host_ips) {
1725                 DEBUG(DEBUG_WARNING,("No nodes available to host public IPs yet\n"));
1726                 goto ipreallocated;
1727         }
1728
1729         /* since nodes only know about those public addresses that
1730            can be served by that particular node, no single node has
1731            a full list of all public addresses that exist in the cluster.
1732            Walk over all node structures and create a merged list of
1733            all public addresses that exist in the cluster.
1734
1735            keep the tree of ips around as ctdb->ip_tree
1736         */
1737         all_ips = create_merged_ip_list(ctdb, ipalloc_state);
1738         ipalloc_state->all_ips = all_ips;
1739
1740         ipalloc_state->force_rebalance_nodes = force_rebalance_nodes;
1741
1742         /* Do the IP reassignment calculations */
1743         ipalloc(ipalloc_state);
1744
1745         /* Now tell all nodes to release any public IPs should not
1746          * host.  This will be a NOOP on nodes that don't currently
1747          * hold the given IP.
1748          */
1749         async_data = talloc_zero(tmp_ctx, struct client_async_data);
1750         CTDB_NO_MEMORY_FATAL(ctdb, async_data);
1751
1752         async_data->fail_callback = takeover_run_fail_callback;
1753         async_data->callback_data = takeover_data;
1754
1755         ZERO_STRUCT(ip); /* Avoid valgrind warnings for union */
1756
1757         /* Send a RELEASE_IP to all nodes that should not be hosting
1758          * each IP.  For each IP, all but one of these will be
1759          * redundant.  However, the redundant ones are used to tell
1760          * nodes which node should be hosting the IP so that commands
1761          * like "ctdb ip" can display a particular nodes idea of who
1762          * is hosting what. */
1763         for (i=0;i<nodemap->num;i++) {
1764                 /* don't talk to unconnected nodes, but do talk to banned nodes */
1765                 if (nodemap->nodes[i].flags & NODE_FLAGS_DISCONNECTED) {
1766                         continue;
1767                 }
1768
1769                 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1770                         if (tmp_ip->pnn == nodemap->nodes[i].pnn) {
1771                                 /* This node should be serving this
1772                                    vnn so don't tell it to release the ip
1773                                 */
1774                                 continue;
1775                         }
1776                         ip.pnn  = tmp_ip->pnn;
1777                         ip.addr = tmp_ip->addr;
1778
1779                         timeout = TAKEOVER_TIMEOUT();
1780                         data.dsize = sizeof(ip);
1781                         data.dptr  = (uint8_t *)&ip;
1782                         state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
1783                                                   0, CTDB_CONTROL_RELEASE_IP, 0,
1784                                                   data, async_data,
1785                                                   &timeout, NULL);
1786                         if (state == NULL) {
1787                                 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_RELEASE_IP to node %u\n", nodemap->nodes[i].pnn));
1788                                 talloc_free(tmp_ctx);
1789                                 return -1;
1790                         }
1791
1792                         ctdb_client_async_add(async_data, state);
1793                 }
1794         }
1795         if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1796                 DEBUG(DEBUG_ERR,
1797                       ("Async control CTDB_CONTROL_RELEASE_IP failed\n"));
1798                 goto fail;
1799         }
1800         talloc_free(async_data);
1801
1802
1803         /* For each IP, send a TAKOVER_IP to the node that should be
1804          * hosting it.  Many of these will often be redundant (since
1805          * the allocation won't have changed) but they can be useful
1806          * to recover from inconsistencies. */
1807         async_data = talloc_zero(tmp_ctx, struct client_async_data);
1808         CTDB_NO_MEMORY_FATAL(ctdb, async_data);
1809
1810         async_data->fail_callback = takeover_run_fail_callback;
1811         async_data->callback_data = takeover_data;
1812
1813         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1814                 if (tmp_ip->pnn == -1) {
1815                         /* this IP won't be taken over */
1816                         continue;
1817                 }
1818
1819                 ip.pnn  = tmp_ip->pnn;
1820                 ip.addr = tmp_ip->addr;
1821
1822                 timeout = TAKEOVER_TIMEOUT();
1823                 data.dsize = sizeof(ip);
1824                 data.dptr  = (uint8_t *)&ip;
1825                 state = ctdb_control_send(ctdb, tmp_ip->pnn,
1826                                           0, CTDB_CONTROL_TAKEOVER_IP, 0,
1827                                           data, async_data, &timeout, NULL);
1828                 if (state == NULL) {
1829                         DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_TAKEOVER_IP to node %u\n", tmp_ip->pnn));
1830                         talloc_free(tmp_ctx);
1831                         return -1;
1832                 }
1833
1834                 ctdb_client_async_add(async_data, state);
1835         }
1836         if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1837                 DEBUG(DEBUG_ERR,
1838                       ("Async control CTDB_CONTROL_TAKEOVER_IP failed\n"));
1839                 goto fail;
1840         }
1841
1842 ipreallocated:
1843         /*
1844          * Tell all nodes to run eventscripts to process the
1845          * "ipreallocated" event.  This can do a lot of things,
1846          * including restarting services to reconfigure them if public
1847          * IPs have moved.  Once upon a time this event only used to
1848          * update natgw.
1849          */
1850         nodes = list_of_connected_nodes(ctdb, nodemap, tmp_ctx, true);
1851         ret = ctdb_client_async_control(ctdb, CTDB_CONTROL_IPREALLOCATED,
1852                                         nodes, 0, TAKEOVER_TIMEOUT(),
1853                                         false, tdb_null,
1854                                         NULL, takeover_run_fail_callback,
1855                                         takeover_data);
1856         if (ret != 0) {
1857                 DEBUG(DEBUG_ERR,
1858                       ("Async CTDB_CONTROL_IPREALLOCATED control failed\n"));
1859                 goto fail;
1860         }
1861
1862         talloc_free(tmp_ctx);
1863         return ret;
1864
1865 fail:
1866         takeover_run_process_failures(ctdb, takeover_data);
1867         talloc_free(tmp_ctx);
1868         return -1;
1869 }
1870
1871
1872 /*
1873   destroy a ctdb_client_ip structure
1874  */
1875 static int ctdb_client_ip_destructor(struct ctdb_client_ip *ip)
1876 {
1877         DEBUG(DEBUG_DEBUG,("destroying client tcp for %s:%u (client_id %u)\n",
1878                 ctdb_addr_to_str(&ip->addr),
1879                 ntohs(ip->addr.ip.sin_port),
1880                 ip->client_id));
1881
1882         DLIST_REMOVE(ip->ctdb->client_ip_list, ip);
1883         return 0;
1884 }
1885
1886 /*
1887   called by a client to inform us of a TCP connection that it is managing
1888   that should tickled with an ACK when IP takeover is done
1889  */
1890 int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id,
1891                                 TDB_DATA indata)
1892 {
1893         struct ctdb_client *client = reqid_find(ctdb->idr, client_id, struct ctdb_client);
1894         struct ctdb_connection *tcp_sock = NULL;
1895         struct ctdb_tcp_list *tcp;
1896         struct ctdb_connection t;
1897         int ret;
1898         TDB_DATA data;
1899         struct ctdb_client_ip *ip;
1900         struct ctdb_vnn *vnn;
1901         ctdb_sock_addr addr;
1902
1903         /* If we don't have public IPs, tickles are useless */
1904         if (ctdb->vnn == NULL) {
1905                 return 0;
1906         }
1907
1908         tcp_sock = (struct ctdb_connection *)indata.dptr;
1909
1910         addr = tcp_sock->src;
1911         ctdb_canonicalize_ip(&addr,  &tcp_sock->src);
1912         addr = tcp_sock->dst;
1913         ctdb_canonicalize_ip(&addr, &tcp_sock->dst);
1914
1915         ZERO_STRUCT(addr);
1916         memcpy(&addr, &tcp_sock->dst, sizeof(addr));
1917         vnn = find_public_ip_vnn(ctdb, &addr);
1918         if (vnn == NULL) {
1919                 switch (addr.sa.sa_family) {
1920                 case AF_INET:
1921                         if (ntohl(addr.ip.sin_addr.s_addr) != INADDR_LOOPBACK) {
1922                                 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public address.\n", 
1923                                         ctdb_addr_to_str(&addr)));
1924                         }
1925                         break;
1926                 case AF_INET6:
1927                         DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public ipv6 address.\n", 
1928                                 ctdb_addr_to_str(&addr)));
1929                         break;
1930                 default:
1931                         DEBUG(DEBUG_ERR,(__location__ " Unknown family type %d\n", addr.sa.sa_family));
1932                 }
1933
1934                 return 0;
1935         }
1936
1937         if (vnn->pnn != ctdb->pnn) {
1938                 DEBUG(DEBUG_ERR,("Attempt to register tcp client for IP %s we don't hold - failing (client_id %u pid %u)\n",
1939                         ctdb_addr_to_str(&addr),
1940                         client_id, client->pid));
1941                 /* failing this call will tell smbd to die */
1942                 return -1;
1943         }
1944
1945         ip = talloc(client, struct ctdb_client_ip);
1946         CTDB_NO_MEMORY(ctdb, ip);
1947
1948         ip->ctdb      = ctdb;
1949         ip->addr      = addr;
1950         ip->client_id = client_id;
1951         talloc_set_destructor(ip, ctdb_client_ip_destructor);
1952         DLIST_ADD(ctdb->client_ip_list, ip);
1953
1954         tcp = talloc(client, struct ctdb_tcp_list);
1955         CTDB_NO_MEMORY(ctdb, tcp);
1956
1957         tcp->connection.src = tcp_sock->src;
1958         tcp->connection.dst = tcp_sock->dst;
1959
1960         DLIST_ADD(client->tcp_list, tcp);
1961
1962         t.src = tcp_sock->src;
1963         t.dst = tcp_sock->dst;
1964
1965         data.dptr = (uint8_t *)&t;
1966         data.dsize = sizeof(t);
1967
1968         switch (addr.sa.sa_family) {
1969         case AF_INET:
1970                 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1971                         (unsigned)ntohs(tcp_sock->dst.ip.sin_port),
1972                         ctdb_addr_to_str(&tcp_sock->src),
1973                         (unsigned)ntohs(tcp_sock->src.ip.sin_port), client_id, client->pid));
1974                 break;
1975         case AF_INET6:
1976                 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1977                         (unsigned)ntohs(tcp_sock->dst.ip6.sin6_port),
1978                         ctdb_addr_to_str(&tcp_sock->src),
1979                         (unsigned)ntohs(tcp_sock->src.ip6.sin6_port), client_id, client->pid));
1980                 break;
1981         default:
1982                 DEBUG(DEBUG_ERR,(__location__ " Unknown family %d\n", addr.sa.sa_family));
1983         }
1984
1985
1986         /* tell all nodes about this tcp connection */
1987         ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0, 
1988                                        CTDB_CONTROL_TCP_ADD,
1989                                        0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
1990         if (ret != 0) {
1991                 DEBUG(DEBUG_ERR,(__location__ " Failed to send CTDB_CONTROL_TCP_ADD\n"));
1992                 return -1;
1993         }
1994
1995         return 0;
1996 }
1997
1998 /*
1999   find a tcp address on a list
2000  */
2001 static struct ctdb_connection *ctdb_tcp_find(struct ctdb_tcp_array *array,
2002                                            struct ctdb_connection *tcp)
2003 {
2004         int i;
2005
2006         if (array == NULL) {
2007                 return NULL;
2008         }
2009
2010         for (i=0;i<array->num;i++) {
2011                 if (ctdb_same_sockaddr(&array->connections[i].src, &tcp->src) &&
2012                     ctdb_same_sockaddr(&array->connections[i].dst, &tcp->dst)) {
2013                         return &array->connections[i];
2014                 }
2015         }
2016         return NULL;
2017 }
2018
2019
2020
2021 /*
2022   called by a daemon to inform us of a TCP connection that one of its
2023   clients managing that should tickled with an ACK when IP takeover is
2024   done
2025  */
2026 int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata, bool tcp_update_needed)
2027 {
2028         struct ctdb_connection *p = (struct ctdb_connection *)indata.dptr;
2029         struct ctdb_tcp_array *tcparray;
2030         struct ctdb_connection tcp;
2031         struct ctdb_vnn *vnn;
2032
2033         /* If we don't have public IPs, tickles are useless */
2034         if (ctdb->vnn == NULL) {
2035                 return 0;
2036         }
2037
2038         vnn = find_public_ip_vnn(ctdb, &p->dst);
2039         if (vnn == NULL) {
2040                 DEBUG(DEBUG_INFO,(__location__ " got TCP_ADD control for an address which is not a public address '%s'\n",
2041                         ctdb_addr_to_str(&p->dst)));
2042
2043                 return -1;
2044         }
2045
2046
2047         tcparray = vnn->tcp_array;
2048
2049         /* If this is the first tickle */
2050         if (tcparray == NULL) {
2051                 tcparray = talloc(vnn, struct ctdb_tcp_array);
2052                 CTDB_NO_MEMORY(ctdb, tcparray);
2053                 vnn->tcp_array = tcparray;
2054
2055                 tcparray->num = 0;
2056                 tcparray->connections = talloc_size(tcparray, sizeof(struct ctdb_connection));
2057                 CTDB_NO_MEMORY(ctdb, tcparray->connections);
2058
2059                 tcparray->connections[tcparray->num].src = p->src;
2060                 tcparray->connections[tcparray->num].dst = p->dst;
2061                 tcparray->num++;
2062
2063                 if (tcp_update_needed) {
2064                         vnn->tcp_update_needed = true;
2065                 }
2066                 return 0;
2067         }
2068
2069
2070         /* Do we already have this tickle ?*/
2071         tcp.src = p->src;
2072         tcp.dst = p->dst;
2073         if (ctdb_tcp_find(tcparray, &tcp) != NULL) {
2074                 DEBUG(DEBUG_DEBUG,("Already had tickle info for %s:%u for vnn:%u\n",
2075                         ctdb_addr_to_str(&tcp.dst),
2076                         ntohs(tcp.dst.ip.sin_port),
2077                         vnn->pnn));
2078                 return 0;
2079         }
2080
2081         /* A new tickle, we must add it to the array */
2082         tcparray->connections = talloc_realloc(tcparray, tcparray->connections,
2083                                         struct ctdb_connection,
2084                                         tcparray->num+1);
2085         CTDB_NO_MEMORY(ctdb, tcparray->connections);
2086
2087         tcparray->connections[tcparray->num].src = p->src;
2088         tcparray->connections[tcparray->num].dst = p->dst;
2089         tcparray->num++;
2090
2091         DEBUG(DEBUG_INFO,("Added tickle info for %s:%u from vnn %u\n",
2092                 ctdb_addr_to_str(&tcp.dst),
2093                 ntohs(tcp.dst.ip.sin_port),
2094                 vnn->pnn));
2095
2096         if (tcp_update_needed) {
2097                 vnn->tcp_update_needed = true;
2098         }
2099
2100         return 0;
2101 }
2102
2103
2104 static void ctdb_remove_connection(struct ctdb_vnn *vnn, struct ctdb_connection *conn)
2105 {
2106         struct ctdb_connection *tcpp;
2107
2108         if (vnn == NULL) {
2109                 return;
2110         }
2111
2112         /* if the array is empty we cant remove it
2113            and we don't need to do anything
2114          */
2115         if (vnn->tcp_array == NULL) {
2116                 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist (array is empty) %s:%u\n",
2117                         ctdb_addr_to_str(&conn->dst),
2118                         ntohs(conn->dst.ip.sin_port)));
2119                 return;
2120         }
2121
2122
2123         /* See if we know this connection
2124            if we don't know this connection  then we dont need to do anything
2125          */
2126         tcpp = ctdb_tcp_find(vnn->tcp_array, conn);
2127         if (tcpp == NULL) {
2128                 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist %s:%u\n",
2129                         ctdb_addr_to_str(&conn->dst),
2130                         ntohs(conn->dst.ip.sin_port)));
2131                 return;
2132         }
2133
2134
2135         /* We need to remove this entry from the array.
2136            Instead of allocating a new array and copying data to it
2137            we cheat and just copy the last entry in the existing array
2138            to the entry that is to be removed and just shring the 
2139            ->num field
2140          */
2141         *tcpp = vnn->tcp_array->connections[vnn->tcp_array->num - 1];
2142         vnn->tcp_array->num--;
2143
2144         /* If we deleted the last entry we also need to remove the entire array
2145          */
2146         if (vnn->tcp_array->num == 0) {
2147                 talloc_free(vnn->tcp_array);
2148                 vnn->tcp_array = NULL;
2149         }               
2150
2151         vnn->tcp_update_needed = true;
2152
2153         DEBUG(DEBUG_INFO,("Removed tickle info for %s:%u\n",
2154                 ctdb_addr_to_str(&conn->src),
2155                 ntohs(conn->src.ip.sin_port)));
2156 }
2157
2158
2159 /*
2160   called by a daemon to inform us of a TCP connection that one of its
2161   clients used are no longer needed in the tickle database
2162  */
2163 int32_t ctdb_control_tcp_remove(struct ctdb_context *ctdb, TDB_DATA indata)
2164 {
2165         struct ctdb_vnn *vnn;
2166         struct ctdb_connection *conn = (struct ctdb_connection *)indata.dptr;
2167
2168         /* If we don't have public IPs, tickles are useless */
2169         if (ctdb->vnn == NULL) {
2170                 return 0;
2171         }
2172
2173         vnn = find_public_ip_vnn(ctdb, &conn->dst);
2174         if (vnn == NULL) {
2175                 DEBUG(DEBUG_ERR,
2176                       (__location__ " unable to find public address %s\n",
2177                        ctdb_addr_to_str(&conn->dst)));
2178                 return 0;
2179         }
2180
2181         ctdb_remove_connection(vnn, conn);
2182
2183         return 0;
2184 }
2185
2186
2187 /*
2188   Called when another daemon starts - causes all tickles for all
2189   public addresses we are serving to be sent to the new node on the
2190   next check.  This actually causes the next scheduled call to
2191   tdb_update_tcp_tickles() to update all nodes.  This is simple and
2192   doesn't require careful error handling.
2193  */
2194 int32_t ctdb_control_startup(struct ctdb_context *ctdb, uint32_t pnn)
2195 {
2196         struct ctdb_vnn *vnn;
2197
2198         DEBUG(DEBUG_INFO, ("Received startup control from node %lu\n",
2199                            (unsigned long) pnn));
2200
2201         for (vnn = ctdb->vnn; vnn != NULL; vnn = vnn->next) {
2202                 vnn->tcp_update_needed = true;
2203         }
2204
2205         return 0;
2206 }
2207
2208
2209 /*
2210   called when a client structure goes away - hook to remove
2211   elements from the tcp_list in all daemons
2212  */
2213 void ctdb_takeover_client_destructor_hook(struct ctdb_client *client)
2214 {
2215         while (client->tcp_list) {
2216                 struct ctdb_vnn *vnn;
2217                 struct ctdb_tcp_list *tcp = client->tcp_list;
2218                 struct ctdb_connection *conn = &tcp->connection;
2219
2220                 DLIST_REMOVE(client->tcp_list, tcp);
2221
2222                 vnn = find_public_ip_vnn(client->ctdb,
2223                                          &conn->dst);
2224                 if (vnn == NULL) {
2225                         DEBUG(DEBUG_ERR,
2226                               (__location__ " unable to find public address %s\n",
2227                                ctdb_addr_to_str(&conn->dst)));
2228                         continue;
2229                 }
2230
2231                 /* If the IP address is hosted on this node then
2232                  * remove the connection. */
2233                 if (vnn->pnn == client->ctdb->pnn) {
2234                         ctdb_remove_connection(vnn, conn);
2235                 }
2236
2237                 /* Otherwise this function has been called because the
2238                  * server IP address has been released to another node
2239                  * and the client has exited.  This means that we
2240                  * should not delete the connection information.  The
2241                  * takeover node processes connections too. */
2242         }
2243 }
2244
2245
2246 void ctdb_release_all_ips(struct ctdb_context *ctdb)
2247 {
2248         struct ctdb_vnn *vnn;
2249         int count = 0;
2250         TDB_DATA data;
2251
2252         if (ctdb->tunable.disable_ip_failover == 1) {
2253                 return;
2254         }
2255
2256         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2257                 if (!ctdb_sys_have_ip(&vnn->public_address)) {
2258                         ctdb_vnn_unassign_iface(ctdb, vnn);
2259                         continue;
2260                 }
2261                 if (!vnn->iface) {
2262                         continue;
2263                 }
2264
2265                 /* Don't allow multiple releases at once.  Some code,
2266                  * particularly ctdb_tickle_sentenced_connections() is
2267                  * not re-entrant */
2268                 if (vnn->update_in_flight) {
2269                         DEBUG(DEBUG_WARNING,
2270                               (__location__
2271                                " Not releasing IP %s/%u on interface %s, an update is already in progess\n",
2272                                     ctdb_addr_to_str(&vnn->public_address),
2273                                     vnn->public_netmask_bits,
2274                                     ctdb_vnn_iface_string(vnn)));
2275                         continue;
2276                 }
2277                 vnn->update_in_flight = true;
2278
2279                 DEBUG(DEBUG_INFO,("Release of IP %s/%u on interface %s node:-1\n",
2280                                     ctdb_addr_to_str(&vnn->public_address),
2281                                     vnn->public_netmask_bits,
2282                                     ctdb_vnn_iface_string(vnn)));
2283
2284                 ctdb_event_script_args(ctdb, CTDB_EVENT_RELEASE_IP, "%s %s %u",
2285                                   ctdb_vnn_iface_string(vnn),
2286                                   ctdb_addr_to_str(&vnn->public_address),
2287                                   vnn->public_netmask_bits);
2288
2289                 data.dptr = (uint8_t *)talloc_strdup(
2290                                 vnn, ctdb_addr_to_str(&vnn->public_address));
2291                 if (data.dptr != NULL) {
2292                         data.dsize = strlen((char *)data.dptr) + 1;
2293                         ctdb_daemon_send_message(ctdb, ctdb->pnn,
2294                                                  CTDB_SRVID_RELEASE_IP, data);
2295                         talloc_free(data.dptr);
2296                 }
2297
2298                 ctdb_vnn_unassign_iface(ctdb, vnn);
2299                 vnn->update_in_flight = false;
2300                 count++;
2301         }
2302
2303         DEBUG(DEBUG_NOTICE,(__location__ " Released %d public IPs\n", count));
2304 }
2305
2306
2307 /*
2308   get list of public IPs
2309  */
2310 int32_t ctdb_control_get_public_ips(struct ctdb_context *ctdb, 
2311                                     struct ctdb_req_control_old *c, TDB_DATA *outdata)
2312 {
2313         int i, num, len;
2314         struct ctdb_public_ip_list_old *ips;
2315         struct ctdb_vnn *vnn;
2316         bool only_available = false;
2317
2318         if (c->flags & CTDB_PUBLIC_IP_FLAGS_ONLY_AVAILABLE) {
2319                 only_available = true;
2320         }
2321
2322         /* count how many public ip structures we have */
2323         num = 0;
2324         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2325                 num++;
2326         }
2327
2328         len = offsetof(struct ctdb_public_ip_list_old, ips) +
2329                 num*sizeof(struct ctdb_public_ip);
2330         ips = talloc_zero_size(outdata, len);
2331         CTDB_NO_MEMORY(ctdb, ips);
2332
2333         i = 0;
2334         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2335                 if (only_available && !ctdb_vnn_available(ctdb, vnn)) {
2336                         continue;
2337                 }
2338                 ips->ips[i].pnn  = vnn->pnn;
2339                 ips->ips[i].addr = vnn->public_address;
2340                 i++;
2341         }
2342         ips->num = i;
2343         len = offsetof(struct ctdb_public_ip_list_old, ips) +
2344                 i*sizeof(struct ctdb_public_ip);
2345
2346         outdata->dsize = len;
2347         outdata->dptr  = (uint8_t *)ips;
2348
2349         return 0;
2350 }
2351
2352
2353 int32_t ctdb_control_get_public_ip_info(struct ctdb_context *ctdb,
2354                                         struct ctdb_req_control_old *c,
2355                                         TDB_DATA indata,
2356                                         TDB_DATA *outdata)
2357 {
2358         int i, num, len;
2359         ctdb_sock_addr *addr;
2360         struct ctdb_public_ip_info_old *info;
2361         struct ctdb_vnn *vnn;
2362
2363         addr = (ctdb_sock_addr *)indata.dptr;
2364
2365         vnn = find_public_ip_vnn(ctdb, addr);
2366         if (vnn == NULL) {
2367                 DEBUG(DEBUG_ERR,(__location__ " Could not get public ip info, "
2368                                  "'%s'not a public address\n",
2369                                  ctdb_addr_to_str(addr)));
2370                 return -1;
2371         }
2372
2373         /* count how many public ip structures we have */
2374         num = 0;
2375         for (;vnn->ifaces[num];) {
2376                 num++;
2377         }
2378
2379         len = offsetof(struct ctdb_public_ip_info_old, ifaces) +
2380                 num*sizeof(struct ctdb_iface);
2381         info = talloc_zero_size(outdata, len);
2382         CTDB_NO_MEMORY(ctdb, info);
2383
2384         info->ip.addr = vnn->public_address;
2385         info->ip.pnn = vnn->pnn;
2386         info->active_idx = 0xFFFFFFFF;
2387
2388         for (i=0; vnn->ifaces[i]; i++) {
2389                 struct ctdb_interface *cur;
2390
2391                 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
2392                 if (cur == NULL) {
2393                         DEBUG(DEBUG_CRIT, (__location__ " internal error iface[%s] unknown\n",
2394                                            vnn->ifaces[i]));
2395                         return -1;
2396                 }
2397                 if (vnn->iface == cur) {
2398                         info->active_idx = i;
2399                 }
2400                 strncpy(info->ifaces[i].name, cur->name,
2401                         sizeof(info->ifaces[i].name));
2402                 info->ifaces[i].name[sizeof(info->ifaces[i].name)-1] = '\0';
2403                 info->ifaces[i].link_state = cur->link_up;
2404                 info->ifaces[i].references = cur->references;
2405         }
2406         info->num = i;
2407         len = offsetof(struct ctdb_public_ip_info_old, ifaces) +
2408                 i*sizeof(struct ctdb_iface);
2409
2410         outdata->dsize = len;
2411         outdata->dptr  = (uint8_t *)info;
2412
2413         return 0;
2414 }
2415
2416 int32_t ctdb_control_get_ifaces(struct ctdb_context *ctdb,
2417                                 struct ctdb_req_control_old *c,
2418                                 TDB_DATA *outdata)
2419 {
2420         int i, num, len;
2421         struct ctdb_iface_list_old *ifaces;
2422         struct ctdb_interface *cur;
2423
2424         /* count how many public ip structures we have */
2425         num = 0;
2426         for (cur=ctdb->ifaces;cur;cur=cur->next) {
2427                 num++;
2428         }
2429
2430         len = offsetof(struct ctdb_iface_list_old, ifaces) +
2431                 num*sizeof(struct ctdb_iface);
2432         ifaces = talloc_zero_size(outdata, len);
2433         CTDB_NO_MEMORY(ctdb, ifaces);
2434
2435         i = 0;
2436         for (cur=ctdb->ifaces;cur;cur=cur->next) {
2437                 strncpy(ifaces->ifaces[i].name, cur->name,
2438                         sizeof(ifaces->ifaces[i].name));
2439                 ifaces->ifaces[i].name[sizeof(ifaces->ifaces[i].name)-1] = '\0';
2440                 ifaces->ifaces[i].link_state = cur->link_up;
2441                 ifaces->ifaces[i].references = cur->references;
2442                 i++;
2443         }
2444         ifaces->num = i;
2445         len = offsetof(struct ctdb_iface_list_old, ifaces) +
2446                 i*sizeof(struct ctdb_iface);
2447
2448         outdata->dsize = len;
2449         outdata->dptr  = (uint8_t *)ifaces;
2450
2451         return 0;
2452 }
2453
2454 int32_t ctdb_control_set_iface_link(struct ctdb_context *ctdb,
2455                                     struct ctdb_req_control_old *c,
2456                                     TDB_DATA indata)
2457 {
2458         struct ctdb_iface *info;
2459         struct ctdb_interface *iface;
2460         bool link_up = false;
2461
2462         info = (struct ctdb_iface *)indata.dptr;
2463
2464         if (info->name[CTDB_IFACE_SIZE] != '\0') {
2465                 int len = strnlen(info->name, CTDB_IFACE_SIZE);
2466                 DEBUG(DEBUG_ERR, (__location__ " name[%*.*s] not terminated\n",
2467                                   len, len, info->name));
2468                 return -1;
2469         }
2470
2471         switch (info->link_state) {
2472         case 0:
2473                 link_up = false;
2474                 break;
2475         case 1:
2476                 link_up = true;
2477                 break;
2478         default:
2479                 DEBUG(DEBUG_ERR, (__location__ " link_state[%u] invalid\n",
2480                                   (unsigned int)info->link_state));
2481                 return -1;
2482         }
2483
2484         if (info->references != 0) {
2485                 DEBUG(DEBUG_ERR, (__location__ " references[%u] should be 0\n",
2486                                   (unsigned int)info->references));
2487                 return -1;
2488         }
2489
2490         iface = ctdb_find_iface(ctdb, info->name);
2491         if (iface == NULL) {
2492                 return -1;
2493         }
2494
2495         if (link_up == iface->link_up) {
2496                 return 0;
2497         }
2498
2499         DEBUG(iface->link_up?DEBUG_ERR:DEBUG_NOTICE,
2500               ("iface[%s] has changed it's link status %s => %s\n",
2501                iface->name,
2502                iface->link_up?"up":"down",
2503                link_up?"up":"down"));
2504
2505         iface->link_up = link_up;
2506         return 0;
2507 }
2508
2509
2510 /*
2511   called by a daemon to inform us of the entire list of TCP tickles for
2512   a particular public address.
2513   this control should only be sent by the node that is currently serving
2514   that public address.
2515  */
2516 int32_t ctdb_control_set_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata)
2517 {
2518         struct ctdb_tickle_list_old *list = (struct ctdb_tickle_list_old *)indata.dptr;
2519         struct ctdb_tcp_array *tcparray;
2520         struct ctdb_vnn *vnn;
2521
2522         /* We must at least have tickles.num or else we cant verify the size
2523            of the received data blob
2524          */
2525         if (indata.dsize < offsetof(struct ctdb_tickle_list_old, connections)) {
2526                 DEBUG(DEBUG_ERR,("Bad indata in ctdb_tickle_list. Not enough data for the tickle.num field\n"));
2527                 return -1;
2528         }
2529
2530         /* verify that the size of data matches what we expect */
2531         if (indata.dsize < offsetof(struct ctdb_tickle_list_old, connections)
2532                          + sizeof(struct ctdb_connection) * list->num) {
2533                 DEBUG(DEBUG_ERR,("Bad indata in ctdb_tickle_list\n"));
2534                 return -1;
2535         }
2536
2537         DEBUG(DEBUG_INFO, ("Received tickle update for public address %s\n",
2538                            ctdb_addr_to_str(&list->addr)));
2539
2540         vnn = find_public_ip_vnn(ctdb, &list->addr);
2541         if (vnn == NULL) {
2542                 DEBUG(DEBUG_INFO,(__location__ " Could not set tcp tickle list, '%s' is not a public address\n",
2543                         ctdb_addr_to_str(&list->addr)));
2544
2545                 return 1;
2546         }
2547
2548         if (vnn->pnn == ctdb->pnn) {
2549                 DEBUG(DEBUG_INFO,
2550                       ("Ignoring redundant set tcp tickle list, this node hosts '%s'\n",
2551                        ctdb_addr_to_str(&list->addr)));
2552                 return 0;
2553         }
2554
2555         /* remove any old ticklelist we might have */
2556         talloc_free(vnn->tcp_array);
2557         vnn->tcp_array = NULL;
2558
2559         tcparray = talloc(vnn, struct ctdb_tcp_array);
2560         CTDB_NO_MEMORY(ctdb, tcparray);
2561
2562         tcparray->num = list->num;
2563
2564         tcparray->connections = talloc_array(tcparray, struct ctdb_connection, tcparray->num);
2565         CTDB_NO_MEMORY(ctdb, tcparray->connections);
2566
2567         memcpy(tcparray->connections, &list->connections[0],
2568                sizeof(struct ctdb_connection)*tcparray->num);
2569
2570         /* We now have a new fresh tickle list array for this vnn */
2571         vnn->tcp_array = tcparray;
2572
2573         return 0;
2574 }
2575
2576 /*
2577   called to return the full list of tickles for the puclic address associated 
2578   with the provided vnn
2579  */
2580 int32_t ctdb_control_get_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata)
2581 {
2582         ctdb_sock_addr *addr = (ctdb_sock_addr *)indata.dptr;
2583         struct ctdb_tickle_list_old *list;
2584         struct ctdb_tcp_array *tcparray;
2585         int num, i;
2586         struct ctdb_vnn *vnn;
2587         unsigned port;
2588
2589         vnn = find_public_ip_vnn(ctdb, addr);
2590         if (vnn == NULL) {
2591                 DEBUG(DEBUG_ERR,(__location__ " Could not get tcp tickle list, '%s' is not a public address\n",
2592                         ctdb_addr_to_str(addr)));
2593
2594                 return 1;
2595         }
2596
2597         port = ctdb_addr_to_port(addr);
2598
2599         tcparray = vnn->tcp_array;
2600         num = 0;
2601         if (tcparray != NULL) {
2602                 if (port == 0) {
2603                         /* All connections */
2604                         num = tcparray->num;
2605                 } else {
2606                         /* Count connections for port */
2607                         for (i = 0; i < tcparray->num; i++) {
2608                                 if (port == ctdb_addr_to_port(&tcparray->connections[i].dst)) {
2609                                         num++;
2610                                 }
2611                         }
2612                 }
2613         }
2614
2615         outdata->dsize = offsetof(struct ctdb_tickle_list_old, connections)
2616                         + sizeof(struct ctdb_connection) * num;
2617
2618         outdata->dptr  = talloc_size(outdata, outdata->dsize);
2619         CTDB_NO_MEMORY(ctdb, outdata->dptr);
2620         list = (struct ctdb_tickle_list_old *)outdata->dptr;
2621
2622         list->addr = *addr;
2623         list->num = num;
2624
2625         if (num == 0) {
2626                 return 0;
2627         }
2628
2629         num = 0;
2630         for (i = 0; i < tcparray->num; i++) {
2631                 if (port == 0 || \
2632                     port == ctdb_addr_to_port(&tcparray->connections[i].dst)) {
2633                         list->connections[num] = tcparray->connections[i];
2634                         num++;
2635                 }
2636         }
2637
2638         return 0;
2639 }
2640
2641
2642 /*
2643   set the list of all tcp tickles for a public address
2644  */
2645 static int ctdb_send_set_tcp_tickles_for_ip(struct ctdb_context *ctdb,
2646                                             ctdb_sock_addr *addr,
2647                                             struct ctdb_tcp_array *tcparray)
2648 {
2649         int ret, num;
2650         TDB_DATA data;
2651         struct ctdb_tickle_list_old *list;
2652
2653         if (tcparray) {
2654                 num = tcparray->num;
2655         } else {
2656                 num = 0;
2657         }
2658
2659         data.dsize = offsetof(struct ctdb_tickle_list_old, connections) +
2660                         sizeof(struct ctdb_connection) * num;
2661         data.dptr = talloc_size(ctdb, data.dsize);
2662         CTDB_NO_MEMORY(ctdb, data.dptr);
2663
2664         list = (struct ctdb_tickle_list_old *)data.dptr;
2665         list->addr = *addr;
2666         list->num = num;
2667         if (tcparray) {
2668                 memcpy(&list->connections[0], tcparray->connections, sizeof(struct ctdb_connection) * num);
2669         }
2670
2671         ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_ALL, 0,
2672                                        CTDB_CONTROL_SET_TCP_TICKLE_LIST,
2673                                        0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
2674         if (ret != 0) {
2675                 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set tcp tickles failed\n"));
2676                 return -1;
2677         }
2678
2679         talloc_free(data.dptr);
2680
2681         return ret;
2682 }
2683
2684
2685 /*
2686   perform tickle updates if required
2687  */
2688 static void ctdb_update_tcp_tickles(struct tevent_context *ev,
2689                                     struct tevent_timer *te,
2690                                     struct timeval t, void *private_data)
2691 {
2692         struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
2693         int ret;
2694         struct ctdb_vnn *vnn;
2695
2696         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2697                 /* we only send out updates for public addresses that 
2698                    we have taken over
2699                  */
2700                 if (ctdb->pnn != vnn->pnn) {
2701                         continue;
2702                 }
2703                 /* We only send out the updates if we need to */
2704                 if (!vnn->tcp_update_needed) {
2705                         continue;
2706                 }
2707                 ret = ctdb_send_set_tcp_tickles_for_ip(ctdb,
2708                                                        &vnn->public_address,
2709                                                        vnn->tcp_array);
2710                 if (ret != 0) {
2711                         DEBUG(DEBUG_ERR,("Failed to send the tickle update for public address %s\n",
2712                                 ctdb_addr_to_str(&vnn->public_address)));
2713                 } else {
2714                         DEBUG(DEBUG_INFO,
2715                               ("Sent tickle update for public address %s\n",
2716                                ctdb_addr_to_str(&vnn->public_address)));
2717                         vnn->tcp_update_needed = false;
2718                 }
2719         }
2720
2721         tevent_add_timer(ctdb->ev, ctdb->tickle_update_context,
2722                          timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
2723                          ctdb_update_tcp_tickles, ctdb);
2724 }
2725
2726 /*
2727   start periodic update of tcp tickles
2728  */
2729 void ctdb_start_tcp_tickle_update(struct ctdb_context *ctdb)
2730 {
2731         ctdb->tickle_update_context = talloc_new(ctdb);
2732
2733         tevent_add_timer(ctdb->ev, ctdb->tickle_update_context,
2734                          timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
2735                          ctdb_update_tcp_tickles, ctdb);
2736 }
2737
2738
2739
2740
2741 struct control_gratious_arp {
2742         struct ctdb_context *ctdb;
2743         ctdb_sock_addr addr;
2744         const char *iface;
2745         int count;
2746 };
2747
2748 /*
2749   send a control_gratuitous arp
2750  */
2751 static void send_gratious_arp(struct tevent_context *ev,
2752                               struct tevent_timer *te,
2753                               struct timeval t, void *private_data)
2754 {
2755         int ret;
2756         struct control_gratious_arp *arp = talloc_get_type(private_data, 
2757                                                         struct control_gratious_arp);
2758
2759         ret = ctdb_sys_send_arp(&arp->addr, arp->iface);
2760         if (ret != 0) {
2761                 DEBUG(DEBUG_ERR,(__location__ " sending of gratious arp on iface '%s' failed (%s)\n",
2762                                  arp->iface, strerror(errno)));
2763         }
2764
2765
2766         arp->count++;
2767         if (arp->count == CTDB_ARP_REPEAT) {
2768                 talloc_free(arp);
2769                 return;
2770         }
2771
2772         tevent_add_timer(arp->ctdb->ev, arp,
2773                          timeval_current_ofs(CTDB_ARP_INTERVAL, 0),
2774                          send_gratious_arp, arp);
2775 }
2776
2777
2778 /*
2779   send a gratious arp 
2780  */
2781 int32_t ctdb_control_send_gratious_arp(struct ctdb_context *ctdb, TDB_DATA indata)
2782 {
2783         struct ctdb_addr_info_old *gratious_arp = (struct ctdb_addr_info_old *)indata.dptr;
2784         struct control_gratious_arp *arp;
2785
2786         /* verify the size of indata */
2787         if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
2788                 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_gratious_arp structure. Got %u require %u bytes\n", 
2789                                  (unsigned)indata.dsize, 
2790                                  (unsigned)offsetof(struct ctdb_addr_info_old, iface)));
2791                 return -1;
2792         }
2793         if (indata.dsize != 
2794                 ( offsetof(struct ctdb_addr_info_old, iface)
2795                 + gratious_arp->len ) ){
2796
2797                 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2798                         "but should be %u bytes\n", 
2799                          (unsigned)indata.dsize, 
2800                          (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+gratious_arp->len)));
2801                 return -1;
2802         }
2803
2804
2805         arp = talloc(ctdb, struct control_gratious_arp);
2806         CTDB_NO_MEMORY(ctdb, arp);
2807
2808         arp->ctdb  = ctdb;
2809         arp->addr   = gratious_arp->addr;
2810         arp->iface = talloc_strdup(arp, gratious_arp->iface);
2811         CTDB_NO_MEMORY(ctdb, arp->iface);
2812         arp->count = 0;
2813
2814         tevent_add_timer(arp->ctdb->ev, arp,
2815                          timeval_zero(), send_gratious_arp, arp);
2816
2817         return 0;
2818 }
2819
2820 int32_t ctdb_control_add_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2821 {
2822         struct ctdb_addr_info_old *pub = (struct ctdb_addr_info_old *)indata.dptr;
2823         int ret;
2824
2825         /* verify the size of indata */
2826         if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
2827                 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_addr_info structure\n"));
2828                 return -1;
2829         }
2830         if (indata.dsize != 
2831                 ( offsetof(struct ctdb_addr_info_old, iface)
2832                 + pub->len ) ){
2833
2834                 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2835                         "but should be %u bytes\n", 
2836                          (unsigned)indata.dsize, 
2837                          (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+pub->len)));
2838                 return -1;
2839         }
2840
2841         DEBUG(DEBUG_NOTICE,("Add IP %s\n", ctdb_addr_to_str(&pub->addr)));
2842
2843         ret = ctdb_add_public_address(ctdb, &pub->addr, pub->mask, &pub->iface[0], true);
2844
2845         if (ret != 0) {
2846                 DEBUG(DEBUG_ERR,(__location__ " Failed to add public address\n"));
2847                 return -1;
2848         }
2849
2850         return 0;
2851 }
2852
2853 struct delete_ip_callback_state {
2854         struct ctdb_req_control_old *c;
2855 };
2856
2857 /*
2858   called when releaseip event finishes for del_public_address
2859  */
2860 static void delete_ip_callback(struct ctdb_context *ctdb,
2861                                int32_t status, TDB_DATA data,
2862                                const char *errormsg,
2863                                void *private_data)
2864 {
2865         struct delete_ip_callback_state *state =
2866                 talloc_get_type(private_data, struct delete_ip_callback_state);
2867
2868         /* If release failed then fail. */
2869         ctdb_request_control_reply(ctdb, state->c, NULL, status, errormsg);
2870         talloc_free(private_data);
2871 }
2872
2873 int32_t ctdb_control_del_public_address(struct ctdb_context *ctdb,
2874                                         struct ctdb_req_control_old *c,
2875                                         TDB_DATA indata, bool *async_reply)
2876 {
2877         struct ctdb_addr_info_old *pub = (struct ctdb_addr_info_old *)indata.dptr;
2878         struct ctdb_vnn *vnn;
2879
2880         /* verify the size of indata */
2881         if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
2882                 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_addr_info structure\n"));
2883                 return -1;
2884         }
2885         if (indata.dsize != 
2886                 ( offsetof(struct ctdb_addr_info_old, iface)
2887                 + pub->len ) ){
2888
2889                 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2890                         "but should be %u bytes\n", 
2891                          (unsigned)indata.dsize, 
2892                          (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+pub->len)));
2893                 return -1;
2894         }
2895
2896         DEBUG(DEBUG_NOTICE,("Delete IP %s\n", ctdb_addr_to_str(&pub->addr)));
2897
2898         /* walk over all public addresses until we find a match */
2899         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2900                 if (ctdb_same_ip(&vnn->public_address, &pub->addr)) {
2901                         if (vnn->pnn == ctdb->pnn) {
2902                                 struct delete_ip_callback_state *state;
2903                                 struct ctdb_public_ip *ip;
2904                                 TDB_DATA data;
2905                                 int ret;
2906
2907                                 vnn->delete_pending = true;
2908
2909                                 state = talloc(ctdb,
2910                                                struct delete_ip_callback_state);
2911                                 CTDB_NO_MEMORY(ctdb, state);
2912                                 state->c = c;
2913
2914                                 ip = talloc(state, struct ctdb_public_ip);
2915                                 if (ip == NULL) {
2916                                         DEBUG(DEBUG_ERR,
2917                                               (__location__ " Out of memory\n"));
2918                                         talloc_free(state);
2919                                         return -1;
2920                                 }
2921                                 ip->pnn = -1;
2922                                 ip->addr = pub->addr;
2923
2924                                 data.dsize = sizeof(struct ctdb_public_ip);
2925                                 data.dptr = (unsigned char *)ip;
2926
2927                                 ret = ctdb_daemon_send_control(ctdb,
2928                                                                ctdb_get_pnn(ctdb),
2929                                                                0,
2930                                                                CTDB_CONTROL_RELEASE_IP,
2931                                                                0, 0,
2932                                                                data,
2933                                                                delete_ip_callback,
2934                                                                state);
2935                                 if (ret == -1) {
2936                                         DEBUG(DEBUG_ERR,
2937                                               (__location__ "Unable to send "
2938                                                "CTDB_CONTROL_RELEASE_IP\n"));
2939                                         talloc_free(state);
2940                                         return -1;
2941                                 }
2942
2943                                 state->c = talloc_steal(state, c);
2944                                 *async_reply = true;
2945                         } else {
2946                                 /* This IP is not hosted on the
2947                                  * current node so just delete it
2948                                  * now. */
2949                                 do_delete_ip(ctdb, vnn);
2950                         }
2951
2952                         return 0;
2953                 }
2954         }
2955
2956         DEBUG(DEBUG_ERR,("Delete IP of unknown public IP address %s\n",
2957                          ctdb_addr_to_str(&pub->addr)));
2958         return -1;
2959 }
2960
2961
2962 struct ipreallocated_callback_state {
2963         struct ctdb_req_control_old *c;
2964 };
2965
2966 static void ctdb_ipreallocated_callback(struct ctdb_context *ctdb,
2967                                         int status, void *p)
2968 {
2969         struct ipreallocated_callback_state *state =
2970                 talloc_get_type(p, struct ipreallocated_callback_state);
2971
2972         if (status != 0) {
2973                 DEBUG(DEBUG_ERR,
2974                       (" \"ipreallocated\" event script failed (status %d)\n",
2975                        status));
2976                 if (status == -ETIME) {
2977                         ctdb_ban_self(ctdb);
2978                 }
2979         }
2980
2981         ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
2982         talloc_free(state);
2983 }
2984
2985 /* A control to run the ipreallocated event */
2986 int32_t ctdb_control_ipreallocated(struct ctdb_context *ctdb,
2987                                    struct ctdb_req_control_old *c,
2988                                    bool *async_reply)
2989 {
2990         int ret;
2991         struct ipreallocated_callback_state *state;
2992
2993         state = talloc(ctdb, struct ipreallocated_callback_state);
2994         CTDB_NO_MEMORY(ctdb, state);
2995
2996         DEBUG(DEBUG_INFO,(__location__ " Running \"ipreallocated\" event\n"));
2997
2998         ret = ctdb_event_script_callback(ctdb, state,
2999                                          ctdb_ipreallocated_callback, state,
3000                                          CTDB_EVENT_IPREALLOCATED,
3001                                          "%s", "");
3002
3003         if (ret != 0) {
3004                 DEBUG(DEBUG_ERR,("Failed to run \"ipreallocated\" event \n"));
3005                 talloc_free(state);
3006                 return -1;
3007         }
3008
3009         /* tell the control that we will be reply asynchronously */
3010         state->c    = talloc_steal(state, c);
3011         *async_reply = true;
3012
3013         return 0;
3014 }
3015
3016
3017 /* This function is called from the recovery daemon to verify that a remote
3018    node has the expected ip allocation.
3019    This is verified against ctdb->ip_tree
3020 */
3021 static int verify_remote_ip_allocation(struct ctdb_context *ctdb,
3022                                        struct ctdb_public_ip_list *ips,
3023                                        uint32_t pnn)
3024 {
3025         struct public_ip_list *tmp_ip;
3026         int i;
3027
3028         if (ctdb->ip_tree == NULL) {
3029                 /* don't know the expected allocation yet, assume remote node
3030                    is correct. */
3031                 return 0;
3032         }
3033
3034         if (ips == NULL) {
3035                 return 0;
3036         }
3037
3038         for (i=0; i<ips->num; i++) {
3039                 tmp_ip = trbt_lookuparray32(ctdb->ip_tree, IP_KEYLEN, ip_key(&ips->ip[i].addr));
3040                 if (tmp_ip == NULL) {
3041                         DEBUG(DEBUG_ERR,("Node %u has new or unknown public IP %s\n", pnn, ctdb_addr_to_str(&ips->ip[i].addr)));
3042                         return -1;
3043                 }
3044
3045                 if (tmp_ip->pnn == -1 || ips->ip[i].pnn == -1) {
3046                         continue;
3047                 }
3048
3049                 if (tmp_ip->pnn != ips->ip[i].pnn) {
3050                         DEBUG(DEBUG_ERR,
3051                               ("Inconsistent IP allocation - node %u thinks %s is held by node %u while it is assigned to node %u\n",
3052                                pnn,
3053                                ctdb_addr_to_str(&ips->ip[i].addr),
3054                                ips->ip[i].pnn, tmp_ip->pnn));
3055                         return -1;
3056                 }
3057         }
3058
3059         return 0;
3060 }
3061
3062 int update_ip_assignment_tree(struct ctdb_context *ctdb, struct ctdb_public_ip *ip)
3063 {
3064         struct public_ip_list *tmp_ip;
3065
3066         /* IP tree is never built if DisableIPFailover is set */
3067         if (ctdb->tunable.disable_ip_failover != 0) {
3068                 return 0;
3069         }
3070
3071         if (ctdb->ip_tree == NULL) {
3072                 DEBUG(DEBUG_ERR,("No ctdb->ip_tree yet. Failed to update ip assignment\n"));
3073                 return -1;
3074         }
3075
3076         tmp_ip = trbt_lookuparray32(ctdb->ip_tree, IP_KEYLEN, ip_key(&ip->addr));
3077         if (tmp_ip == NULL) {
3078                 DEBUG(DEBUG_ERR,(__location__ " Could not find record for address %s, update ip\n", ctdb_addr_to_str(&ip->addr)));
3079                 return -1;
3080         }
3081
3082         DEBUG(DEBUG_NOTICE,("Updated ip assignment tree for ip : %s from node %u to node %u\n", ctdb_addr_to_str(&ip->addr), tmp_ip->pnn, ip->pnn));
3083         tmp_ip->pnn = ip->pnn;
3084
3085         return 0;
3086 }
3087
3088 void clear_ip_assignment_tree(struct ctdb_context *ctdb)
3089 {
3090         TALLOC_FREE(ctdb->ip_tree);
3091 }
3092
3093 struct ctdb_reloadips_handle {
3094         struct ctdb_context *ctdb;
3095         struct ctdb_req_control_old *c;
3096         int status;
3097         int fd[2];
3098         pid_t child;
3099         struct tevent_fd *fde;
3100 };
3101
3102 static int ctdb_reloadips_destructor(struct ctdb_reloadips_handle *h)
3103 {
3104         if (h == h->ctdb->reload_ips) {
3105                 h->ctdb->reload_ips = NULL;
3106         }
3107         if (h->c != NULL) {
3108                 ctdb_request_control_reply(h->ctdb, h->c, NULL, h->status, NULL);
3109                 h->c = NULL;
3110         }
3111         ctdb_kill(h->ctdb, h->child, SIGKILL);
3112         return 0;
3113 }
3114
3115 static void ctdb_reloadips_timeout_event(struct tevent_context *ev,
3116                                          struct tevent_timer *te,
3117                                          struct timeval t, void *private_data)
3118 {
3119         struct ctdb_reloadips_handle *h = talloc_get_type(private_data, struct ctdb_reloadips_handle);
3120
3121         talloc_free(h);
3122 }
3123
3124 static void ctdb_reloadips_child_handler(struct tevent_context *ev,
3125                                          struct tevent_fd *fde,
3126                                          uint16_t flags, void *private_data)
3127 {
3128         struct ctdb_reloadips_handle *h = talloc_get_type(private_data, struct ctdb_reloadips_handle);
3129
3130         char res;
3131         int ret;
3132
3133         ret = sys_read(h->fd[0], &res, 1);
3134         if (ret < 1 || res != 0) {
3135                 DEBUG(DEBUG_ERR, (__location__ " Reloadips child process returned error\n"));
3136                 res = 1;
3137         }
3138         h->status = res;
3139
3140         talloc_free(h);
3141 }
3142
3143 static int ctdb_reloadips_child(struct ctdb_context *ctdb)
3144 {
3145         TALLOC_CTX *mem_ctx = talloc_new(NULL);
3146         struct ctdb_public_ip_list_old *ips;
3147         struct ctdb_vnn *vnn;
3148         struct client_async_data *async_data;
3149         struct timeval timeout;
3150         TDB_DATA data;
3151         struct ctdb_client_control_state *state;
3152         bool first_add;
3153         int i, ret;
3154
3155         CTDB_NO_MEMORY(ctdb, mem_ctx);
3156
3157         /* Read IPs from local node */
3158         ret = ctdb_ctrl_get_public_ips(ctdb, TAKEOVER_TIMEOUT(),
3159                                        CTDB_CURRENT_NODE, mem_ctx, &ips);
3160         if (ret != 0) {
3161                 DEBUG(DEBUG_ERR,
3162                       ("Unable to fetch public IPs from local node\n"));
3163                 talloc_free(mem_ctx);
3164                 return -1;
3165         }
3166
3167         /* Read IPs file - this is safe since this is a child process */
3168         ctdb->vnn = NULL;
3169         if (ctdb_set_public_addresses(ctdb, false) != 0) {
3170                 DEBUG(DEBUG_ERR,("Failed to re-read public addresses file\n"));
3171                 talloc_free(mem_ctx);
3172                 return -1;
3173         }
3174
3175         async_data = talloc_zero(mem_ctx, struct client_async_data);
3176         CTDB_NO_MEMORY(ctdb, async_data);
3177
3178         /* Compare IPs between node and file for IPs to be deleted */
3179         for (i = 0; i < ips->num; i++) {
3180                 /* */
3181                 for (vnn = ctdb->vnn; vnn; vnn = vnn->next) {
3182                         if (ctdb_same_ip(&vnn->public_address,
3183                                          &ips->ips[i].addr)) {
3184                                 /* IP is still in file */
3185                                 break;
3186                         }
3187                 }
3188
3189                 if (vnn == NULL) {
3190                         /* Delete IP ips->ips[i] */
3191                         struct ctdb_addr_info_old *pub;
3192
3193                         DEBUG(DEBUG_NOTICE,
3194                               ("IP %s no longer configured, deleting it\n",
3195                                ctdb_addr_to_str(&ips->ips[i].addr)));
3196
3197                         pub = talloc_zero(mem_ctx, struct ctdb_addr_info_old);
3198                         CTDB_NO_MEMORY(ctdb, pub);
3199
3200                         pub->addr  = ips->ips[i].addr;
3201                         pub->mask  = 0;
3202                         pub->len   = 0;
3203
3204                         timeout = TAKEOVER_TIMEOUT();
3205
3206                         data.dsize = offsetof(struct ctdb_addr_info_old,
3207                                               iface) + pub->len;
3208                         data.dptr = (uint8_t *)pub;
3209
3210                         state = ctdb_control_send(ctdb, CTDB_CURRENT_NODE, 0,
3211                                                   CTDB_CONTROL_DEL_PUBLIC_IP,
3212                                                   0, data, async_data,
3213                                                   &timeout, NULL);
3214                         if (state == NULL) {
3215                                 DEBUG(DEBUG_ERR,
3216                                       (__location__
3217                                        " failed sending CTDB_CONTROL_DEL_PUBLIC_IP\n"));
3218                                 goto failed;
3219                         }
3220
3221                         ctdb_client_async_add(async_data, state);
3222                 }
3223         }
3224
3225         /* Compare IPs between node and file for IPs to be added */
3226         first_add = true;
3227         for (vnn = ctdb->vnn; vnn; vnn = vnn->next) {
3228                 for (i = 0; i < ips->num; i++) {
3229                         if (ctdb_same_ip(&vnn->public_address,
3230                                          &ips->ips[i].addr)) {
3231                                 /* IP already on node */
3232                                 break;
3233                         }
3234                 }
3235                 if (i == ips->num) {
3236                         /* Add IP ips->ips[i] */
3237                         struct ctdb_addr_info_old *pub;
3238                         const char *ifaces = NULL;
3239                         uint32_t len;
3240                         int iface = 0;
3241
3242                         DEBUG(DEBUG_NOTICE,
3243                               ("New IP %s configured, adding it\n",
3244                                ctdb_addr_to_str(&vnn->public_address)));
3245                         if (first_add) {
3246                                 uint32_t pnn = ctdb_get_pnn(ctdb);
3247
3248                                 data.dsize = sizeof(pnn);
3249                                 data.dptr  = (uint8_t *)&pnn;
3250
3251                                 ret = ctdb_client_send_message(
3252                                         ctdb,
3253                                         CTDB_BROADCAST_CONNECTED,
3254                                         CTDB_SRVID_REBALANCE_NODE,
3255                                         data);
3256                                 if (ret != 0) {
3257                                         DEBUG(DEBUG_WARNING,
3258                                               ("Failed to send message to force node reallocation - IPs may be unbalanced\n"));
3259                                 }
3260
3261                                 first_add = false;
3262                         }
3263
3264                         ifaces = vnn->ifaces[0];
3265                         iface = 1;
3266                         while (vnn->ifaces[iface] != NULL) {
3267                                 ifaces = talloc_asprintf(vnn, "%s,%s", ifaces,
3268                                                          vnn->ifaces[iface]);
3269                                 iface++;
3270                         }
3271
3272                         len   = strlen(ifaces) + 1;
3273                         pub = talloc_zero_size(mem_ctx,
3274                                                offsetof(struct ctdb_addr_info_old, iface) + len);
3275                         CTDB_NO_MEMORY(ctdb, pub);
3276
3277                         pub->addr  = vnn->public_address;
3278                         pub->mask  = vnn->public_netmask_bits;
3279                         pub->len   = len;
3280                         memcpy(&pub->iface[0], ifaces, pub->len);
3281
3282                         timeout = TAKEOVER_TIMEOUT();
3283
3284                         data.dsize = offsetof(struct ctdb_addr_info_old,
3285                                               iface) + pub->len;
3286                         data.dptr = (uint8_t *)pub;
3287
3288                         state = ctdb_control_send(ctdb, CTDB_CURRENT_NODE, 0,
3289                                                   CTDB_CONTROL_ADD_PUBLIC_IP,
3290                                                   0, data, async_data,
3291                                                   &timeout, NULL);
3292                         if (state == NULL) {
3293                                 DEBUG(DEBUG_ERR,
3294                                       (__location__
3295                                        " failed sending CTDB_CONTROL_ADD_PUBLIC_IP\n"));
3296                                 goto failed;
3297                         }
3298
3299                         ctdb_client_async_add(async_data, state);
3300                 }
3301         }
3302
3303         if (ctdb_client_async_wait(ctdb, async_data) != 0) {
3304                 DEBUG(DEBUG_ERR,(__location__ " Add/delete IPs failed\n"));
3305                 goto failed;
3306         }
3307
3308         talloc_free(mem_ctx);
3309         return 0;
3310
3311 failed:
3312         talloc_free(mem_ctx);
3313         return -1;
3314 }
3315
3316 /* This control is sent to force the node to re-read the public addresses file
3317    and drop any addresses we should nnot longer host, and add new addresses
3318    that we are now able to host
3319 */
3320 int32_t ctdb_control_reload_public_ips(struct ctdb_context *ctdb, struct ctdb_req_control_old *c, bool *async_reply)
3321 {
3322         struct ctdb_reloadips_handle *h;
3323         pid_t parent = getpid();
3324
3325         if (ctdb->reload_ips != NULL) {
3326                 talloc_free(ctdb->reload_ips);
3327                 ctdb->reload_ips = NULL;
3328         }
3329
3330         h = talloc(ctdb, struct ctdb_reloadips_handle);
3331         CTDB_NO_MEMORY(ctdb, h);
3332         h->ctdb     = ctdb;
3333         h->c        = NULL;
3334         h->status   = -1;
3335         
3336         if (pipe(h->fd) == -1) {
3337                 DEBUG(DEBUG_ERR,("Failed to create pipe for ctdb_freeze_lock\n"));
3338                 talloc_free(h);
3339                 return -1;
3340         }
3341
3342         h->child = ctdb_fork(ctdb);
3343         if (h->child == (pid_t)-1) {
3344                 DEBUG(DEBUG_ERR, ("Failed to fork a child for reloadips\n"));
3345                 close(h->fd[0]);
3346                 close(h->fd[1]);
3347                 talloc_free(h);
3348                 return -1;
3349         }
3350
3351         /* child process */
3352         if (h->child == 0) {
3353                 signed char res = 0;
3354
3355                 close(h->fd[0]);
3356                 debug_extra = talloc_asprintf(NULL, "reloadips:");
3357
3358                 prctl_set_comment("ctdb_reloadips");
3359                 if (switch_from_server_to_client(ctdb, "reloadips-child") != 0) {
3360                         DEBUG(DEBUG_CRIT,("ERROR: Failed to switch reloadips child into client mode\n"));
3361                         res = -1;
3362                 } else {
3363                         res = ctdb_reloadips_child(ctdb);
3364                         if (res != 0) {
3365                                 DEBUG(DEBUG_ERR,("Failed to reload ips on local node\n"));
3366                         }
3367                 }
3368
3369                 sys_write(h->fd[1], &res, 1);
3370                 ctdb_wait_for_process_to_exit(parent);
3371                 _exit(0);
3372         }
3373
3374         h->c             = talloc_steal(h, c);
3375
3376         close(h->fd[1]);
3377         set_close_on_exec(h->fd[0]);
3378
3379         talloc_set_destructor(h, ctdb_reloadips_destructor);
3380
3381
3382         h->fde = tevent_add_fd(ctdb->ev, h, h->fd[0], TEVENT_FD_READ,
3383                                ctdb_reloadips_child_handler, (void *)h);
3384         tevent_fd_set_auto_close(h->fde);
3385
3386         tevent_add_timer(ctdb->ev, h, timeval_current_ofs(120, 0),
3387                          ctdb_reloadips_timeout_event, h);
3388
3389         /* we reply later */
3390         *async_reply = true;
3391         return 0;
3392 }