e0c0ca110e3fa217c32695066520c5e53d5caa99
[obnox/samba/samba-obnox.git] / ctdb / server / ctdb_takeover.c
1 /* 
2    ctdb ip takeover code
3
4    Copyright (C) Ronnie Sahlberg  2007
5    Copyright (C) Andrew Tridgell  2007
6    Copyright (C) Martin Schwenke  2011
7
8    This program is free software; you can redistribute it and/or modify
9    it under the terms of the GNU General Public License as published by
10    the Free Software Foundation; either version 3 of the License, or
11    (at your option) any later version.
12    
13    This program is distributed in the hope that it will be useful,
14    but WITHOUT ANY WARRANTY; without even the implied warranty of
15    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16    GNU General Public License for more details.
17    
18    You should have received a copy of the GNU General Public License
19    along with this program; if not, see <http://www.gnu.org/licenses/>.
20 */
21 #include "replace.h"
22 #include "system/network.h"
23 #include "system/filesys.h"
24 #include "system/time.h"
25 #include "system/wait.h"
26
27 #include <talloc.h>
28 #include <tevent.h>
29
30 #include "lib/util/dlinklist.h"
31 #include "lib/util/debug.h"
32 #include "lib/util/samba_util.h"
33 #include "lib/util/util_process.h"
34
35 #include "ctdb_private.h"
36 #include "ctdb_client.h"
37
38 #include "common/rb_tree.h"
39 #include "common/reqid.h"
40 #include "common/system.h"
41 #include "common/common.h"
42 #include "common/logging.h"
43
44 #include "server/ipalloc.h"
45
46 #define TAKEOVER_TIMEOUT() timeval_current_ofs(ctdb->tunable.takeover_timeout,0)
47
48 #define CTDB_ARP_INTERVAL 1
49 #define CTDB_ARP_REPEAT   3
50
51 struct ctdb_interface {
52         struct ctdb_interface *prev, *next;
53         const char *name;
54         bool link_up;
55         uint32_t references;
56 };
57
58 static const char *ctdb_vnn_iface_string(const struct ctdb_vnn *vnn)
59 {
60         if (vnn->iface) {
61                 return vnn->iface->name;
62         }
63
64         return "__none__";
65 }
66
67 static int ctdb_add_local_iface(struct ctdb_context *ctdb, const char *iface)
68 {
69         struct ctdb_interface *i;
70
71         if (strlen(iface) > CTDB_IFACE_SIZE) {
72                 DEBUG(DEBUG_ERR, ("Interface name too long \"%s\"\n", iface));
73                 return -1;
74         }
75
76         /* Verify that we don't have an entry for this ip yet */
77         for (i=ctdb->ifaces;i;i=i->next) {
78                 if (strcmp(i->name, iface) == 0) {
79                         return 0;
80                 }
81         }
82
83         /* create a new structure for this interface */
84         i = talloc_zero(ctdb, struct ctdb_interface);
85         CTDB_NO_MEMORY_FATAL(ctdb, i);
86         i->name = talloc_strdup(i, iface);
87         CTDB_NO_MEMORY(ctdb, i->name);
88
89         i->link_up = true;
90
91         DLIST_ADD(ctdb->ifaces, i);
92
93         return 0;
94 }
95
96 static bool vnn_has_interface_with_name(struct ctdb_vnn *vnn,
97                                         const char *name)
98 {
99         int n;
100
101         for (n = 0; vnn->ifaces[n] != NULL; n++) {
102                 if (strcmp(name, vnn->ifaces[n]) == 0) {
103                         return true;
104                 }
105         }
106
107         return false;
108 }
109
110 /* If any interfaces now have no possible IPs then delete them.  This
111  * implementation is naive (i.e. simple) rather than clever
112  * (i.e. complex).  Given that this is run on delip and that operation
113  * is rare, this doesn't need to be efficient - it needs to be
114  * foolproof.  One alternative is reference counting, where the logic
115  * is distributed and can, therefore, be broken in multiple places.
116  * Another alternative is to build a red-black tree of interfaces that
117  * can have addresses (by walking ctdb->vnn once) and then walking
118  * ctdb->ifaces once and deleting those not in the tree.  Let's go to
119  * one of those if the naive implementation causes problems...  :-)
120  */
121 static void ctdb_remove_orphaned_ifaces(struct ctdb_context *ctdb,
122                                         struct ctdb_vnn *vnn)
123 {
124         struct ctdb_interface *i, *next;
125
126         /* For each interface, check if there's an IP using it. */
127         for (i = ctdb->ifaces; i != NULL; i = next) {
128                 struct ctdb_vnn *tv;
129                 bool found;
130                 next = i->next;
131
132                 /* Only consider interfaces named in the given VNN. */
133                 if (!vnn_has_interface_with_name(vnn, i->name)) {
134                         continue;
135                 }
136
137                 /* Search for a vnn with this interface. */
138                 found = false;
139                 for (tv=ctdb->vnn; tv; tv=tv->next) {
140                         if (vnn_has_interface_with_name(tv, i->name)) {
141                                 found = true;
142                                 break;
143                         }
144                 }
145
146                 if (!found) {
147                         /* None of the VNNs are using this interface. */
148                         DLIST_REMOVE(ctdb->ifaces, i);
149                         talloc_free(i);
150                 }
151         }
152 }
153
154
155 static struct ctdb_interface *ctdb_find_iface(struct ctdb_context *ctdb,
156                                               const char *iface)
157 {
158         struct ctdb_interface *i;
159
160         for (i=ctdb->ifaces;i;i=i->next) {
161                 if (strcmp(i->name, iface) == 0) {
162                         return i;
163                 }
164         }
165
166         return NULL;
167 }
168
169 static struct ctdb_interface *ctdb_vnn_best_iface(struct ctdb_context *ctdb,
170                                                   struct ctdb_vnn *vnn)
171 {
172         int i;
173         struct ctdb_interface *cur = NULL;
174         struct ctdb_interface *best = NULL;
175
176         for (i=0; vnn->ifaces[i]; i++) {
177
178                 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
179                 if (cur == NULL) {
180                         continue;
181                 }
182
183                 if (!cur->link_up) {
184                         continue;
185                 }
186
187                 if (best == NULL) {
188                         best = cur;
189                         continue;
190                 }
191
192                 if (cur->references < best->references) {
193                         best = cur;
194                         continue;
195                 }
196         }
197
198         return best;
199 }
200
201 static int32_t ctdb_vnn_assign_iface(struct ctdb_context *ctdb,
202                                      struct ctdb_vnn *vnn)
203 {
204         struct ctdb_interface *best = NULL;
205
206         if (vnn->iface) {
207                 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
208                                    "still assigned to iface '%s'\n",
209                                    ctdb_addr_to_str(&vnn->public_address),
210                                    ctdb_vnn_iface_string(vnn)));
211                 return 0;
212         }
213
214         best = ctdb_vnn_best_iface(ctdb, vnn);
215         if (best == NULL) {
216                 DEBUG(DEBUG_ERR, (__location__ " public address '%s' "
217                                   "cannot assign to iface any iface\n",
218                                   ctdb_addr_to_str(&vnn->public_address)));
219                 return -1;
220         }
221
222         vnn->iface = best;
223         best->references++;
224         vnn->pnn = ctdb->pnn;
225
226         DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
227                            "now assigned to iface '%s' refs[%d]\n",
228                            ctdb_addr_to_str(&vnn->public_address),
229                            ctdb_vnn_iface_string(vnn),
230                            best->references));
231         return 0;
232 }
233
234 static void ctdb_vnn_unassign_iface(struct ctdb_context *ctdb,
235                                     struct ctdb_vnn *vnn)
236 {
237         DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
238                            "now unassigned (old iface '%s' refs[%d])\n",
239                            ctdb_addr_to_str(&vnn->public_address),
240                            ctdb_vnn_iface_string(vnn),
241                            vnn->iface?vnn->iface->references:0));
242         if (vnn->iface) {
243                 vnn->iface->references--;
244         }
245         vnn->iface = NULL;
246         if (vnn->pnn == ctdb->pnn) {
247                 vnn->pnn = -1;
248         }
249 }
250
251 static bool ctdb_vnn_available(struct ctdb_context *ctdb,
252                                struct ctdb_vnn *vnn)
253 {
254         int i;
255
256         /* Nodes that are not RUNNING can not host IPs */
257         if (ctdb->runstate != CTDB_RUNSTATE_RUNNING) {
258                 return false;
259         }
260
261         if (vnn->delete_pending) {
262                 return false;
263         }
264
265         if (vnn->iface && vnn->iface->link_up) {
266                 return true;
267         }
268
269         for (i=0; vnn->ifaces[i]; i++) {
270                 struct ctdb_interface *cur;
271
272                 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
273                 if (cur == NULL) {
274                         continue;
275                 }
276
277                 if (cur->link_up) {
278                         return true;
279                 }
280         }
281
282         return false;
283 }
284
285 struct ctdb_takeover_arp {
286         struct ctdb_context *ctdb;
287         uint32_t count;
288         ctdb_sock_addr addr;
289         struct ctdb_tcp_array *tcparray;
290         struct ctdb_vnn *vnn;
291 };
292
293
294 /*
295   lists of tcp endpoints
296  */
297 struct ctdb_tcp_list {
298         struct ctdb_tcp_list *prev, *next;
299         struct ctdb_connection connection;
300 };
301
302 /*
303   list of clients to kill on IP release
304  */
305 struct ctdb_client_ip {
306         struct ctdb_client_ip *prev, *next;
307         struct ctdb_context *ctdb;
308         ctdb_sock_addr addr;
309         uint32_t client_id;
310 };
311
312
313 /*
314   send a gratuitous arp
315  */
316 static void ctdb_control_send_arp(struct tevent_context *ev,
317                                   struct tevent_timer *te,
318                                   struct timeval t, void *private_data)
319 {
320         struct ctdb_takeover_arp *arp = talloc_get_type(private_data, 
321                                                         struct ctdb_takeover_arp);
322         int i, ret;
323         struct ctdb_tcp_array *tcparray;
324         const char *iface = ctdb_vnn_iface_string(arp->vnn);
325
326         ret = ctdb_sys_send_arp(&arp->addr, iface);
327         if (ret != 0) {
328                 DEBUG(DEBUG_CRIT,(__location__ " sending of arp failed on iface '%s' (%s)\n",
329                                   iface, strerror(errno)));
330         }
331
332         tcparray = arp->tcparray;
333         if (tcparray) {
334                 for (i=0;i<tcparray->num;i++) {
335                         struct ctdb_connection *tcon;
336
337                         tcon = &tcparray->connections[i];
338                         DEBUG(DEBUG_INFO,("sending tcp tickle ack for %u->%s:%u\n",
339                                 (unsigned)ntohs(tcon->dst.ip.sin_port),
340                                 ctdb_addr_to_str(&tcon->src),
341                                 (unsigned)ntohs(tcon->src.ip.sin_port)));
342                         ret = ctdb_sys_send_tcp(
343                                 &tcon->src,
344                                 &tcon->dst,
345                                 0, 0, 0);
346                         if (ret != 0) {
347                                 DEBUG(DEBUG_CRIT,(__location__ " Failed to send tcp tickle ack for %s\n",
348                                         ctdb_addr_to_str(&tcon->src)));
349                         }
350                 }
351         }
352
353         arp->count++;
354
355         if (arp->count == CTDB_ARP_REPEAT) {
356                 talloc_free(arp);
357                 return;
358         }
359
360         tevent_add_timer(arp->ctdb->ev, arp->vnn->takeover_ctx,
361                          timeval_current_ofs(CTDB_ARP_INTERVAL, 100000),
362                          ctdb_control_send_arp, arp);
363 }
364
365 static int32_t ctdb_announce_vnn_iface(struct ctdb_context *ctdb,
366                                        struct ctdb_vnn *vnn)
367 {
368         struct ctdb_takeover_arp *arp;
369         struct ctdb_tcp_array *tcparray;
370
371         if (!vnn->takeover_ctx) {
372                 vnn->takeover_ctx = talloc_new(vnn);
373                 if (!vnn->takeover_ctx) {
374                         return -1;
375                 }
376         }
377
378         arp = talloc_zero(vnn->takeover_ctx, struct ctdb_takeover_arp);
379         if (!arp) {
380                 return -1;
381         }
382
383         arp->ctdb = ctdb;
384         arp->addr = vnn->public_address;
385         arp->vnn  = vnn;
386
387         tcparray = vnn->tcp_array;
388         if (tcparray) {
389                 /* add all of the known tcp connections for this IP to the
390                    list of tcp connections to send tickle acks for */
391                 arp->tcparray = talloc_steal(arp, tcparray);
392
393                 vnn->tcp_array = NULL;
394                 vnn->tcp_update_needed = true;
395         }
396
397         tevent_add_timer(arp->ctdb->ev, vnn->takeover_ctx,
398                          timeval_zero(), ctdb_control_send_arp, arp);
399
400         return 0;
401 }
402
403 struct takeover_callback_state {
404         struct ctdb_req_control_old *c;
405         ctdb_sock_addr *addr;
406         struct ctdb_vnn *vnn;
407 };
408
409 struct ctdb_do_takeip_state {
410         struct ctdb_req_control_old *c;
411         struct ctdb_vnn *vnn;
412 };
413
414 /*
415   called when takeip event finishes
416  */
417 static void ctdb_do_takeip_callback(struct ctdb_context *ctdb, int status,
418                                     void *private_data)
419 {
420         struct ctdb_do_takeip_state *state =
421                 talloc_get_type(private_data, struct ctdb_do_takeip_state);
422         int32_t ret;
423         TDB_DATA data;
424
425         if (status != 0) {
426                 if (status == -ETIME) {
427                         ctdb_ban_self(ctdb);
428                 }
429                 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
430                                  ctdb_addr_to_str(&state->vnn->public_address),
431                                  ctdb_vnn_iface_string(state->vnn)));
432                 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
433
434                 talloc_free(state);
435                 return;
436         }
437
438         if (ctdb->do_checkpublicip) {
439
440         ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
441         if (ret != 0) {
442                 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
443                 talloc_free(state);
444                 return;
445         }
446
447         }
448
449         data.dptr  = (uint8_t *)ctdb_addr_to_str(&state->vnn->public_address);
450         data.dsize = strlen((char *)data.dptr) + 1;
451         DEBUG(DEBUG_INFO,(__location__ " sending TAKE_IP for '%s'\n", data.dptr));
452
453         ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_TAKE_IP, data);
454
455
456         /* the control succeeded */
457         ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
458         talloc_free(state);
459         return;
460 }
461
462 static int ctdb_takeip_destructor(struct ctdb_do_takeip_state *state)
463 {
464         state->vnn->update_in_flight = false;
465         return 0;
466 }
467
468 /*
469   take over an ip address
470  */
471 static int32_t ctdb_do_takeip(struct ctdb_context *ctdb,
472                               struct ctdb_req_control_old *c,
473                               struct ctdb_vnn *vnn)
474 {
475         int ret;
476         struct ctdb_do_takeip_state *state;
477
478         if (vnn->update_in_flight) {
479                 DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u rejected "
480                                     "update for this IP already in flight\n",
481                                     ctdb_addr_to_str(&vnn->public_address),
482                                     vnn->public_netmask_bits));
483                 return -1;
484         }
485
486         ret = ctdb_vnn_assign_iface(ctdb, vnn);
487         if (ret != 0) {
488                 DEBUG(DEBUG_ERR,("Takeover of IP %s/%u failed to "
489                                  "assign a usable interface\n",
490                                  ctdb_addr_to_str(&vnn->public_address),
491                                  vnn->public_netmask_bits));
492                 return -1;
493         }
494
495         state = talloc(vnn, struct ctdb_do_takeip_state);
496         CTDB_NO_MEMORY(ctdb, state);
497
498         state->c = talloc_steal(ctdb, c);
499         state->vnn   = vnn;
500
501         vnn->update_in_flight = true;
502         talloc_set_destructor(state, ctdb_takeip_destructor);
503
504         DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u on interface %s\n",
505                             ctdb_addr_to_str(&vnn->public_address),
506                             vnn->public_netmask_bits,
507                             ctdb_vnn_iface_string(vnn)));
508
509         ret = ctdb_event_script_callback(ctdb,
510                                          state,
511                                          ctdb_do_takeip_callback,
512                                          state,
513                                          CTDB_EVENT_TAKE_IP,
514                                          "%s %s %u",
515                                          ctdb_vnn_iface_string(vnn),
516                                          ctdb_addr_to_str(&vnn->public_address),
517                                          vnn->public_netmask_bits);
518
519         if (ret != 0) {
520                 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
521                         ctdb_addr_to_str(&vnn->public_address),
522                         ctdb_vnn_iface_string(vnn)));
523                 talloc_free(state);
524                 return -1;
525         }
526
527         return 0;
528 }
529
530 struct ctdb_do_updateip_state {
531         struct ctdb_req_control_old *c;
532         struct ctdb_interface *old;
533         struct ctdb_vnn *vnn;
534 };
535
536 /*
537   called when updateip event finishes
538  */
539 static void ctdb_do_updateip_callback(struct ctdb_context *ctdb, int status,
540                                       void *private_data)
541 {
542         struct ctdb_do_updateip_state *state =
543                 talloc_get_type(private_data, struct ctdb_do_updateip_state);
544         int32_t ret;
545
546         if (status != 0) {
547                 if (status == -ETIME) {
548                         ctdb_ban_self(ctdb);
549                 }
550                 DEBUG(DEBUG_ERR,(__location__ " Failed to move IP %s from interface %s to %s\n",
551                         ctdb_addr_to_str(&state->vnn->public_address),
552                         state->old->name,
553                         ctdb_vnn_iface_string(state->vnn)));
554
555                 /*
556                  * All we can do is reset the old interface
557                  * and let the next run fix it
558                  */
559                 ctdb_vnn_unassign_iface(ctdb, state->vnn);
560                 state->vnn->iface = state->old;
561                 state->vnn->iface->references++;
562
563                 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
564                 talloc_free(state);
565                 return;
566         }
567
568         if (ctdb->do_checkpublicip) {
569
570         ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
571         if (ret != 0) {
572                 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
573                 talloc_free(state);
574                 return;
575         }
576
577         }
578
579         /* the control succeeded */
580         ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
581         talloc_free(state);
582         return;
583 }
584
585 static int ctdb_updateip_destructor(struct ctdb_do_updateip_state *state)
586 {
587         state->vnn->update_in_flight = false;
588         return 0;
589 }
590
591 /*
592   update (move) an ip address
593  */
594 static int32_t ctdb_do_updateip(struct ctdb_context *ctdb,
595                                 struct ctdb_req_control_old *c,
596                                 struct ctdb_vnn *vnn)
597 {
598         int ret;
599         struct ctdb_do_updateip_state *state;
600         struct ctdb_interface *old = vnn->iface;
601         const char *new_name;
602
603         if (vnn->update_in_flight) {
604                 DEBUG(DEBUG_NOTICE,("Update of IP %s/%u rejected "
605                                     "update for this IP already in flight\n",
606                                     ctdb_addr_to_str(&vnn->public_address),
607                                     vnn->public_netmask_bits));
608                 return -1;
609         }
610
611         ctdb_vnn_unassign_iface(ctdb, vnn);
612         ret = ctdb_vnn_assign_iface(ctdb, vnn);
613         if (ret != 0) {
614                 DEBUG(DEBUG_ERR,("update of IP %s/%u failed to "
615                                  "assin a usable interface (old iface '%s')\n",
616                                  ctdb_addr_to_str(&vnn->public_address),
617                                  vnn->public_netmask_bits,
618                                  old->name));
619                 return -1;
620         }
621
622         new_name = ctdb_vnn_iface_string(vnn);
623         if (old->name != NULL && new_name != NULL && !strcmp(old->name, new_name)) {
624                 /* A benign update from one interface onto itself.
625                  * no need to run the eventscripts in this case, just return
626                  * success.
627                  */
628                 ctdb_request_control_reply(ctdb, c, NULL, 0, NULL);
629                 return 0;
630         }
631
632         state = talloc(vnn, struct ctdb_do_updateip_state);
633         CTDB_NO_MEMORY(ctdb, state);
634
635         state->c = talloc_steal(ctdb, c);
636         state->old = old;
637         state->vnn = vnn;
638
639         vnn->update_in_flight = true;
640         talloc_set_destructor(state, ctdb_updateip_destructor);
641
642         DEBUG(DEBUG_NOTICE,("Update of IP %s/%u from "
643                             "interface %s to %s\n",
644                             ctdb_addr_to_str(&vnn->public_address),
645                             vnn->public_netmask_bits,
646                             old->name,
647                             new_name));
648
649         ret = ctdb_event_script_callback(ctdb,
650                                          state,
651                                          ctdb_do_updateip_callback,
652                                          state,
653                                          CTDB_EVENT_UPDATE_IP,
654                                          "%s %s %s %u",
655                                          state->old->name,
656                                          new_name,
657                                          ctdb_addr_to_str(&vnn->public_address),
658                                          vnn->public_netmask_bits);
659         if (ret != 0) {
660                 DEBUG(DEBUG_ERR,(__location__ " Failed update IP %s from interface %s to %s\n",
661                                  ctdb_addr_to_str(&vnn->public_address),
662                                  old->name, new_name));
663                 talloc_free(state);
664                 return -1;
665         }
666
667         return 0;
668 }
669
670 /*
671   Find the vnn of the node that has a public ip address
672   returns -1 if the address is not known as a public address
673  */
674 static struct ctdb_vnn *find_public_ip_vnn(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
675 {
676         struct ctdb_vnn *vnn;
677
678         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
679                 if (ctdb_same_ip(&vnn->public_address, addr)) {
680                         return vnn;
681                 }
682         }
683
684         return NULL;
685 }
686
687 /*
688   take over an ip address
689  */
690 int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb,
691                                  struct ctdb_req_control_old *c,
692                                  TDB_DATA indata,
693                                  bool *async_reply)
694 {
695         int ret;
696         struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
697         struct ctdb_vnn *vnn;
698         bool have_ip = false;
699         bool do_updateip = false;
700         bool do_takeip = false;
701         struct ctdb_interface *best_iface = NULL;
702
703         if (pip->pnn != ctdb->pnn) {
704                 DEBUG(DEBUG_ERR,(__location__" takeoverip called for an ip '%s' "
705                                  "with pnn %d, but we're node %d\n",
706                                  ctdb_addr_to_str(&pip->addr),
707                                  pip->pnn, ctdb->pnn));
708                 return -1;
709         }
710
711         /* update out vnn list */
712         vnn = find_public_ip_vnn(ctdb, &pip->addr);
713         if (vnn == NULL) {
714                 DEBUG(DEBUG_INFO,("takeoverip called for an ip '%s' that is not a public address\n",
715                         ctdb_addr_to_str(&pip->addr)));
716                 return 0;
717         }
718
719         if (ctdb->tunable.disable_ip_failover == 0 && ctdb->do_checkpublicip) {
720                 have_ip = ctdb_sys_have_ip(&pip->addr);
721         }
722         best_iface = ctdb_vnn_best_iface(ctdb, vnn);
723         if (best_iface == NULL) {
724                 DEBUG(DEBUG_ERR,("takeoverip of IP %s/%u failed to find"
725                                  "a usable interface (old %s, have_ip %d)\n",
726                                  ctdb_addr_to_str(&vnn->public_address),
727                                  vnn->public_netmask_bits,
728                                  ctdb_vnn_iface_string(vnn),
729                                  have_ip));
730                 return -1;
731         }
732
733         if (vnn->iface == NULL && vnn->pnn == -1 && have_ip && best_iface != NULL) {
734                 DEBUG(DEBUG_ERR,("Taking over newly created ip\n"));
735                 have_ip = false;
736         }
737
738
739         if (vnn->iface == NULL && have_ip) {
740                 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
741                                   "but we have no interface assigned, has someone manually configured it? Ignore for now.\n",
742                                  ctdb_addr_to_str(&vnn->public_address)));
743                 return 0;
744         }
745
746         if (vnn->pnn != ctdb->pnn && have_ip && vnn->pnn != -1) {
747                 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
748                                   "and we have it on iface[%s], but it was assigned to node %d"
749                                   "and we are node %d, banning ourself\n",
750                                  ctdb_addr_to_str(&vnn->public_address),
751                                  ctdb_vnn_iface_string(vnn), vnn->pnn, ctdb->pnn));
752                 ctdb_ban_self(ctdb);
753                 return -1;
754         }
755
756         if (vnn->pnn == -1 && have_ip) {
757                 vnn->pnn = ctdb->pnn;
758                 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
759                                   "and we already have it on iface[%s], update local daemon\n",
760                                  ctdb_addr_to_str(&vnn->public_address),
761                                   ctdb_vnn_iface_string(vnn)));
762                 return 0;
763         }
764
765         if (vnn->iface) {
766                 if (vnn->iface != best_iface) {
767                         if (!vnn->iface->link_up) {
768                                 do_updateip = true;
769                         } else if (vnn->iface->references > (best_iface->references + 1)) {
770                                 /* only move when the rebalance gains something */
771                                         do_updateip = true;
772                         }
773                 }
774         }
775
776         if (!have_ip) {
777                 if (do_updateip) {
778                         ctdb_vnn_unassign_iface(ctdb, vnn);
779                         do_updateip = false;
780                 }
781                 do_takeip = true;
782         }
783
784         if (do_takeip) {
785                 ret = ctdb_do_takeip(ctdb, c, vnn);
786                 if (ret != 0) {
787                         return -1;
788                 }
789         } else if (do_updateip) {
790                 ret = ctdb_do_updateip(ctdb, c, vnn);
791                 if (ret != 0) {
792                         return -1;
793                 }
794         } else {
795                 /*
796                  * The interface is up and the kernel known the ip
797                  * => do nothing
798                  */
799                 DEBUG(DEBUG_INFO,("Redundant takeover of IP %s/%u on interface %s (ip already held)\n",
800                         ctdb_addr_to_str(&pip->addr),
801                         vnn->public_netmask_bits,
802                         ctdb_vnn_iface_string(vnn)));
803                 return 0;
804         }
805
806         /* tell ctdb_control.c that we will be replying asynchronously */
807         *async_reply = true;
808
809         return 0;
810 }
811
812 static void do_delete_ip(struct ctdb_context *ctdb, struct ctdb_vnn *vnn)
813 {
814         DLIST_REMOVE(ctdb->vnn, vnn);
815         ctdb_vnn_unassign_iface(ctdb, vnn);
816         ctdb_remove_orphaned_ifaces(ctdb, vnn);
817         talloc_free(vnn);
818 }
819
820 /*
821   called when releaseip event finishes
822  */
823 static void release_ip_callback(struct ctdb_context *ctdb, int status, 
824                                 void *private_data)
825 {
826         struct takeover_callback_state *state = 
827                 talloc_get_type(private_data, struct takeover_callback_state);
828         TDB_DATA data;
829
830         if (status == -ETIME) {
831                 ctdb_ban_self(ctdb);
832         }
833
834         if (ctdb->tunable.disable_ip_failover == 0 && ctdb->do_checkpublicip) {
835                 if  (ctdb_sys_have_ip(state->addr)) {
836                         DEBUG(DEBUG_ERR,
837                               ("IP %s still hosted during release IP callback, failing\n",
838                                ctdb_addr_to_str(state->addr)));
839                         ctdb_request_control_reply(ctdb, state->c,
840                                                    NULL, -1, NULL);
841                         talloc_free(state);
842                         return;
843                 }
844         }
845
846         /* send a message to all clients of this node telling them
847            that the cluster has been reconfigured and they should
848            release any sockets on this IP */
849         data.dptr = (uint8_t *)talloc_strdup(state, ctdb_addr_to_str(state->addr));
850         CTDB_NO_MEMORY_VOID(ctdb, data.dptr);
851         data.dsize = strlen((char *)data.dptr)+1;
852
853         DEBUG(DEBUG_INFO,(__location__ " sending RELEASE_IP for '%s'\n", data.dptr));
854
855         ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_RELEASE_IP, data);
856
857         ctdb_vnn_unassign_iface(ctdb, state->vnn);
858
859         /* Process the IP if it has been marked for deletion */
860         if (state->vnn->delete_pending) {
861                 do_delete_ip(ctdb, state->vnn);
862                 state->vnn = NULL;
863         }
864
865         /* the control succeeded */
866         ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
867         talloc_free(state);
868 }
869
870 static int ctdb_releaseip_destructor(struct takeover_callback_state *state)
871 {
872         if (state->vnn != NULL) {
873                 state->vnn->update_in_flight = false;
874         }
875         return 0;
876 }
877
878 /*
879   release an ip address
880  */
881 int32_t ctdb_control_release_ip(struct ctdb_context *ctdb, 
882                                 struct ctdb_req_control_old *c,
883                                 TDB_DATA indata, 
884                                 bool *async_reply)
885 {
886         int ret;
887         struct takeover_callback_state *state;
888         struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
889         struct ctdb_vnn *vnn;
890         char *iface;
891
892         /* update our vnn list */
893         vnn = find_public_ip_vnn(ctdb, &pip->addr);
894         if (vnn == NULL) {
895                 DEBUG(DEBUG_INFO,("releaseip called for an ip '%s' that is not a public address\n",
896                         ctdb_addr_to_str(&pip->addr)));
897                 return 0;
898         }
899         vnn->pnn = pip->pnn;
900
901         /* stop any previous arps */
902         talloc_free(vnn->takeover_ctx);
903         vnn->takeover_ctx = NULL;
904
905         /* Some ctdb tool commands (e.g. moveip) send
906          * lazy multicast to drop an IP from any node that isn't the
907          * intended new node.  The following causes makes ctdbd ignore
908          * a release for any address it doesn't host.
909          */
910         if (ctdb->tunable.disable_ip_failover == 0 && ctdb->do_checkpublicip) {
911                 if (!ctdb_sys_have_ip(&pip->addr)) {
912                         DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u on interface %s (ip not held)\n",
913                                 ctdb_addr_to_str(&pip->addr),
914                                 vnn->public_netmask_bits,
915                                 ctdb_vnn_iface_string(vnn)));
916                         ctdb_vnn_unassign_iface(ctdb, vnn);
917                         return 0;
918                 }
919         } else {
920                 if (vnn->iface == NULL) {
921                         DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u (ip not held)\n",
922                                            ctdb_addr_to_str(&pip->addr),
923                                            vnn->public_netmask_bits));
924                         return 0;
925                 }
926         }
927
928         /* There is a potential race between take_ip and us because we
929          * update the VNN via a callback that run when the
930          * eventscripts have been run.  Avoid the race by allowing one
931          * update to be in flight at a time.
932          */
933         if (vnn->update_in_flight) {
934                 DEBUG(DEBUG_NOTICE,("Release of IP %s/%u rejected "
935                                     "update for this IP already in flight\n",
936                                     ctdb_addr_to_str(&vnn->public_address),
937                                     vnn->public_netmask_bits));
938                 return -1;
939         }
940
941         iface = strdup(ctdb_vnn_iface_string(vnn));
942
943         DEBUG(DEBUG_NOTICE,("Release of IP %s/%u on interface %s  node:%d\n",
944                 ctdb_addr_to_str(&pip->addr),
945                 vnn->public_netmask_bits,
946                 iface,
947                 pip->pnn));
948
949         state = talloc(ctdb, struct takeover_callback_state);
950         if (state == NULL) {
951                 ctdb_set_error(ctdb, "Out of memory at %s:%d",
952                                __FILE__, __LINE__);
953                 free(iface);
954                 return -1;
955         }
956
957         state->c = talloc_steal(state, c);
958         state->addr = talloc(state, ctdb_sock_addr);       
959         if (state->addr == NULL) {
960                 ctdb_set_error(ctdb, "Out of memory at %s:%d",
961                                __FILE__, __LINE__);
962                 free(iface);
963                 talloc_free(state);
964                 return -1;
965         }
966         *state->addr = pip->addr;
967         state->vnn   = vnn;
968
969         vnn->update_in_flight = true;
970         talloc_set_destructor(state, ctdb_releaseip_destructor);
971
972         ret = ctdb_event_script_callback(ctdb, 
973                                          state, release_ip_callback, state,
974                                          CTDB_EVENT_RELEASE_IP,
975                                          "%s %s %u",
976                                          iface,
977                                          ctdb_addr_to_str(&pip->addr),
978                                          vnn->public_netmask_bits);
979         free(iface);
980         if (ret != 0) {
981                 DEBUG(DEBUG_ERR,(__location__ " Failed to release IP %s on interface %s\n",
982                         ctdb_addr_to_str(&pip->addr),
983                         ctdb_vnn_iface_string(vnn)));
984                 talloc_free(state);
985                 return -1;
986         }
987
988         /* tell the control that we will be reply asynchronously */
989         *async_reply = true;
990         return 0;
991 }
992
993 static int ctdb_add_public_address(struct ctdb_context *ctdb,
994                                    ctdb_sock_addr *addr,
995                                    unsigned mask, const char *ifaces,
996                                    bool check_address)
997 {
998         struct ctdb_vnn      *vnn;
999         uint32_t num = 0;
1000         char *tmp;
1001         const char *iface;
1002         int i;
1003         int ret;
1004
1005         tmp = strdup(ifaces);
1006         for (iface = strtok(tmp, ","); iface; iface = strtok(NULL, ",")) {
1007                 if (!ctdb_sys_check_iface_exists(iface)) {
1008                         DEBUG(DEBUG_CRIT,("Interface %s does not exist. Can not add public-address : %s\n", iface, ctdb_addr_to_str(addr)));
1009                         free(tmp);
1010                         return -1;
1011                 }
1012         }
1013         free(tmp);
1014
1015         /* Verify that we don't have an entry for this ip yet */
1016         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1017                 if (ctdb_same_sockaddr(addr, &vnn->public_address)) {
1018                         DEBUG(DEBUG_CRIT,("Same ip '%s' specified multiple times in the public address list \n", 
1019                                 ctdb_addr_to_str(addr)));
1020                         return -1;
1021                 }               
1022         }
1023
1024         /* create a new vnn structure for this ip address */
1025         vnn = talloc_zero(ctdb, struct ctdb_vnn);
1026         CTDB_NO_MEMORY_FATAL(ctdb, vnn);
1027         vnn->ifaces = talloc_array(vnn, const char *, num + 2);
1028         tmp = talloc_strdup(vnn, ifaces);
1029         CTDB_NO_MEMORY_FATAL(ctdb, tmp);
1030         for (iface = strtok(tmp, ","); iface; iface = strtok(NULL, ",")) {
1031                 vnn->ifaces = talloc_realloc(vnn, vnn->ifaces, const char *, num + 2);
1032                 CTDB_NO_MEMORY_FATAL(ctdb, vnn->ifaces);
1033                 vnn->ifaces[num] = talloc_strdup(vnn, iface);
1034                 CTDB_NO_MEMORY_FATAL(ctdb, vnn->ifaces[num]);
1035                 num++;
1036         }
1037         talloc_free(tmp);
1038         vnn->ifaces[num] = NULL;
1039         vnn->public_address      = *addr;
1040         vnn->public_netmask_bits = mask;
1041         vnn->pnn                 = -1;
1042         if (check_address) {
1043                 if (ctdb_sys_have_ip(addr)) {
1044                         DEBUG(DEBUG_ERR,("We are already hosting public address '%s'. setting PNN to ourself:%d\n", ctdb_addr_to_str(addr), ctdb->pnn));
1045                         vnn->pnn = ctdb->pnn;
1046                 }
1047         }
1048
1049         for (i=0; vnn->ifaces[i]; i++) {
1050                 ret = ctdb_add_local_iface(ctdb, vnn->ifaces[i]);
1051                 if (ret != 0) {
1052                         DEBUG(DEBUG_CRIT, (__location__ " failed to add iface[%s] "
1053                                            "for public_address[%s]\n",
1054                                            vnn->ifaces[i], ctdb_addr_to_str(addr)));
1055                         talloc_free(vnn);
1056                         return -1;
1057                 }
1058         }
1059
1060         DLIST_ADD(ctdb->vnn, vnn);
1061
1062         return 0;
1063 }
1064
1065 /*
1066   setup the public address lists from a file
1067 */
1068 int ctdb_set_public_addresses(struct ctdb_context *ctdb, bool check_addresses)
1069 {
1070         char **lines;
1071         int nlines;
1072         int i;
1073
1074         lines = file_lines_load(ctdb->public_addresses_file, &nlines, 0, ctdb);
1075         if (lines == NULL) {
1076                 ctdb_set_error(ctdb, "Failed to load public address list '%s'\n", ctdb->public_addresses_file);
1077                 return -1;
1078         }
1079         while (nlines > 0 && strcmp(lines[nlines-1], "") == 0) {
1080                 nlines--;
1081         }
1082
1083         for (i=0;i<nlines;i++) {
1084                 unsigned mask;
1085                 ctdb_sock_addr addr;
1086                 const char *addrstr;
1087                 const char *ifaces;
1088                 char *tok, *line;
1089
1090                 line = lines[i];
1091                 while ((*line == ' ') || (*line == '\t')) {
1092                         line++;
1093                 }
1094                 if (*line == '#') {
1095                         continue;
1096                 }
1097                 if (strcmp(line, "") == 0) {
1098                         continue;
1099                 }
1100                 tok = strtok(line, " \t");
1101                 addrstr = tok;
1102                 tok = strtok(NULL, " \t");
1103                 if (tok == NULL) {
1104                         if (NULL == ctdb->default_public_interface) {
1105                                 DEBUG(DEBUG_CRIT,("No default public interface and no interface specified at line %u of public address list\n",
1106                                          i+1));
1107                                 talloc_free(lines);
1108                                 return -1;
1109                         }
1110                         ifaces = ctdb->default_public_interface;
1111                 } else {
1112                         ifaces = tok;
1113                 }
1114
1115                 if (!addrstr || !parse_ip_mask(addrstr, ifaces, &addr, &mask)) {
1116                         DEBUG(DEBUG_CRIT,("Badly formed line %u in public address list\n", i+1));
1117                         talloc_free(lines);
1118                         return -1;
1119                 }
1120                 if (ctdb_add_public_address(ctdb, &addr, mask, ifaces, check_addresses)) {
1121                         DEBUG(DEBUG_CRIT,("Failed to add line %u to the public address list\n", i+1));
1122                         talloc_free(lines);
1123                         return -1;
1124                 }
1125         }
1126
1127
1128         talloc_free(lines);
1129         return 0;
1130 }
1131
1132 static void *add_ip_callback(void *parm, void *data)
1133 {
1134         struct public_ip_list *this_ip = parm;
1135         struct public_ip_list *prev_ip = data;
1136
1137         if (prev_ip == NULL) {
1138                 return parm;
1139         }
1140         if (this_ip->pnn == -1) {
1141                 this_ip->pnn = prev_ip->pnn;
1142         }
1143
1144         return parm;
1145 }
1146
1147 static int getips_count_callback(void *param, void *data)
1148 {
1149         struct public_ip_list **ip_list = (struct public_ip_list **)param;
1150         struct public_ip_list *new_ip = (struct public_ip_list *)data;
1151
1152         new_ip->next = *ip_list;
1153         *ip_list     = new_ip;
1154         return 0;
1155 }
1156
1157 static int ctdb_reload_remote_public_ips(struct ctdb_context *ctdb,
1158                                          struct ipalloc_state *ipalloc_state,
1159                                          struct ctdb_node_map_old *nodemap)
1160 {
1161         int j;
1162         int ret;
1163         struct ctdb_public_ip_list_old *ip_list;
1164
1165         if (ipalloc_state->num != nodemap->num) {
1166                 DEBUG(DEBUG_ERR,
1167                       (__location__
1168                        " ipalloc_state->num (%d) != nodemap->num (%d) invalid param\n",
1169                        ipalloc_state->num, nodemap->num));
1170                 return -1;
1171         }
1172
1173         for (j=0; j<nodemap->num; j++) {
1174                 if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) {
1175                         continue;
1176                 }
1177
1178                 /* Retrieve the list of known public IPs from the node */
1179                 ret = ctdb_ctrl_get_public_ips_flags(ctdb,
1180                                         TAKEOVER_TIMEOUT(),
1181                                         j,
1182                                         ipalloc_state->known_public_ips,
1183                                         0,
1184                                         &ip_list);
1185                 if (ret != 0) {
1186                         DEBUG(DEBUG_ERR,
1187                               ("Failed to read known public IPs from node: %u\n",
1188                                j));
1189                         return -1;
1190                 }
1191                 ipalloc_state->known_public_ips[j].num = ip_list->num;
1192                 /* This could be copied and freed.  However, ip_list
1193                  * is allocated off ipalloc_state->known_public_ips,
1194                  * so this is a safe hack.  This will go away in a
1195                  * while anyway... */
1196                 ipalloc_state->known_public_ips[j].ip = &ip_list->ips[0];
1197
1198                 /* Retrieve the list of available public IPs from the node */
1199                 ret = ctdb_ctrl_get_public_ips_flags(ctdb,
1200                                         TAKEOVER_TIMEOUT(),
1201                                         j,
1202                                         ipalloc_state->available_public_ips,
1203                                         CTDB_PUBLIC_IP_FLAGS_ONLY_AVAILABLE,
1204                                         &ip_list);
1205                 if (ret != 0) {
1206                         DEBUG(DEBUG_ERR,
1207                               ("Failed to read available public IPs from node: %u\n",
1208                                j));
1209                         return -1;
1210                 }
1211                 ipalloc_state->available_public_ips[j].num = ip_list->num;
1212                 /* This could be copied and freed.  However, ip_list
1213                  * is allocated off ipalloc_state->available_public_ips,
1214                  * so this is a safe hack.  This will go away in a
1215                  * while anyway... */
1216                 ipalloc_state->available_public_ips[j].ip = &ip_list->ips[0];
1217         }
1218
1219         return 0;
1220 }
1221
1222 static struct public_ip_list *
1223 create_merged_ip_list(struct ipalloc_state *ipalloc_state)
1224 {
1225         int i, j;
1226         struct public_ip_list *ip_list;
1227         struct ctdb_public_ip_list *public_ips;
1228         struct trbt_tree *ip_tree;
1229
1230         ip_tree = trbt_create(ipalloc_state, 0);
1231
1232         if (ipalloc_state->known_public_ips == NULL) {
1233                 DEBUG(DEBUG_ERR, ("Known public IPs not set\n"));
1234                 return NULL;
1235         }
1236
1237         for (i=0; i < ipalloc_state->num; i++) {
1238
1239                 public_ips = &ipalloc_state->known_public_ips[i];
1240
1241                 for (j=0; j < public_ips->num; j++) {
1242                         struct public_ip_list *tmp_ip;
1243
1244                         /* This is returned as part of ip_list */
1245                         tmp_ip = talloc_zero(ipalloc_state, struct public_ip_list);
1246                         if (tmp_ip == NULL) {
1247                                 DEBUG(DEBUG_ERR,
1248                                       (__location__ " out of memory\n"));
1249                                 talloc_free(ip_tree);
1250                                 return NULL;
1251                         }
1252
1253                         /* Do not use information about IP addresses hosted
1254                          * on other nodes, it may not be accurate */
1255                         if (public_ips->ip[j].pnn == i) {
1256                                 tmp_ip->pnn = public_ips->ip[j].pnn;
1257                         } else {
1258                                 tmp_ip->pnn = -1;
1259                         }
1260                         tmp_ip->addr = public_ips->ip[j].addr;
1261                         tmp_ip->next = NULL;
1262
1263                         trbt_insertarray32_callback(ip_tree,
1264                                 IP_KEYLEN, ip_key(&public_ips->ip[j].addr),
1265                                 add_ip_callback,
1266                                 tmp_ip);
1267                 }
1268         }
1269
1270         ip_list = NULL;
1271         trbt_traversearray32(ip_tree, IP_KEYLEN, getips_count_callback, &ip_list);
1272         talloc_free(ip_tree);
1273
1274         return ip_list;
1275 }
1276
1277 static bool all_nodes_are_disabled(struct ctdb_node_map_old *nodemap)
1278 {
1279         int i;
1280
1281         for (i=0;i<nodemap->num;i++) {
1282                 if (!(nodemap->nodes[i].flags & (NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED))) {
1283                         /* Found one completely healthy node */
1284                         return false;
1285                 }
1286         }
1287
1288         return true;
1289 }
1290
1291 struct get_tunable_callback_data {
1292         const char *tunable;
1293         uint32_t *out;
1294         bool fatal;
1295 };
1296
1297 static void get_tunable_callback(struct ctdb_context *ctdb, uint32_t pnn,
1298                                  int32_t res, TDB_DATA outdata,
1299                                  void *callback)
1300 {
1301         struct get_tunable_callback_data *cd =
1302                 (struct get_tunable_callback_data *)callback;
1303         int size;
1304
1305         if (res != 0) {
1306                 /* Already handled in fail callback */
1307                 return;
1308         }
1309
1310         if (outdata.dsize != sizeof(uint32_t)) {
1311                 DEBUG(DEBUG_ERR,("Wrong size of returned data when reading \"%s\" tunable from node %d. Expected %d bytes but received %d bytes\n",
1312                                  cd->tunable, pnn, (int)sizeof(uint32_t),
1313                                  (int)outdata.dsize));
1314                 cd->fatal = true;
1315                 return;
1316         }
1317
1318         size = talloc_array_length(cd->out);
1319         if (pnn >= size) {
1320                 DEBUG(DEBUG_ERR,("Got %s reply from node %d but nodemap only has %d entries\n",
1321                                  cd->tunable, pnn, size));
1322                 return;
1323         }
1324
1325                 
1326         cd->out[pnn] = *(uint32_t *)outdata.dptr;
1327 }
1328
1329 static void get_tunable_fail_callback(struct ctdb_context *ctdb, uint32_t pnn,
1330                                        int32_t res, TDB_DATA outdata,
1331                                        void *callback)
1332 {
1333         struct get_tunable_callback_data *cd =
1334                 (struct get_tunable_callback_data *)callback;
1335
1336         switch (res) {
1337         case -ETIME:
1338                 DEBUG(DEBUG_ERR,
1339                       ("Timed out getting tunable \"%s\" from node %d\n",
1340                        cd->tunable, pnn));
1341                 cd->fatal = true;
1342                 break;
1343         case -EINVAL:
1344         case -1:
1345                 DEBUG(DEBUG_WARNING,
1346                       ("Tunable \"%s\" not implemented on node %d\n",
1347                        cd->tunable, pnn));
1348                 break;
1349         default:
1350                 DEBUG(DEBUG_ERR,
1351                       ("Unexpected error getting tunable \"%s\" from node %d\n",
1352                        cd->tunable, pnn));
1353                 cd->fatal = true;
1354         }
1355 }
1356
1357 static uint32_t *get_tunable_from_nodes(struct ctdb_context *ctdb,
1358                                         TALLOC_CTX *tmp_ctx,
1359                                         struct ctdb_node_map_old *nodemap,
1360                                         const char *tunable,
1361                                         uint32_t default_value)
1362 {
1363         TDB_DATA data;
1364         struct ctdb_control_get_tunable *t;
1365         uint32_t *nodes;
1366         uint32_t *tvals;
1367         struct get_tunable_callback_data callback_data;
1368         int i;
1369
1370         tvals = talloc_array(tmp_ctx, uint32_t, nodemap->num);
1371         CTDB_NO_MEMORY_NULL(ctdb, tvals);
1372         for (i=0; i<nodemap->num; i++) {
1373                 tvals[i] = default_value;
1374         }
1375                 
1376         callback_data.out = tvals;
1377         callback_data.tunable = tunable;
1378         callback_data.fatal = false;
1379
1380         data.dsize = offsetof(struct ctdb_control_get_tunable, name) + strlen(tunable) + 1;
1381         data.dptr  = talloc_size(tmp_ctx, data.dsize);
1382         t = (struct ctdb_control_get_tunable *)data.dptr;
1383         t->length = strlen(tunable)+1;
1384         memcpy(t->name, tunable, t->length);
1385         nodes = list_of_connected_nodes(ctdb, nodemap, tmp_ctx, true);
1386         if (ctdb_client_async_control(ctdb, CTDB_CONTROL_GET_TUNABLE,
1387                                       nodes, 0, TAKEOVER_TIMEOUT(),
1388                                       false, data,
1389                                       get_tunable_callback,
1390                                       get_tunable_fail_callback,
1391                                       &callback_data) != 0) {
1392                 if (callback_data.fatal) {
1393                         talloc_free(tvals);
1394                         tvals = NULL;
1395                 }
1396         }
1397         talloc_free(nodes);
1398         talloc_free(data.dptr);
1399
1400         return tvals;
1401 }
1402
1403 /* Set internal flags for IP allocation:
1404  *   Clear ip flags
1405  *   Set NOIPTAKOVER ip flags from per-node NoIPTakeover tunable
1406  *   Set NOIPHOST ip flag for each INACTIVE node
1407  *   if all nodes are disabled:
1408  *     Set NOIPHOST ip flags from per-node NoIPHostOnAllDisabled tunable
1409  *   else
1410  *     Set NOIPHOST ip flags for disabled nodes
1411  */
1412 static void set_ipflags_internal(struct ipalloc_state *ipalloc_state,
1413                                  struct ctdb_node_map_old *nodemap,
1414                                  uint32_t *tval_noiptakeover,
1415                                  uint32_t *tval_noiphostonalldisabled)
1416 {
1417         int i;
1418
1419         for (i=0;i<nodemap->num;i++) {
1420                 /* Can not take IPs on node with NoIPTakeover set */
1421                 if (tval_noiptakeover[i] != 0) {
1422                         ipalloc_state->noiptakeover[i] = true;
1423                 }
1424
1425                 /* Can not host IPs on INACTIVE node */
1426                 if (nodemap->nodes[i].flags & NODE_FLAGS_INACTIVE) {
1427                         ipalloc_state->noiphost[i] = true;
1428                 }
1429         }
1430
1431         if (all_nodes_are_disabled(nodemap)) {
1432                 /* If all nodes are disabled, can not host IPs on node
1433                  * with NoIPHostOnAllDisabled set
1434                  */
1435                 for (i=0;i<nodemap->num;i++) {
1436                         if (tval_noiphostonalldisabled[i] != 0) {
1437                                 ipalloc_state->noiphost[i] = true;
1438                         }
1439                 }
1440         } else {
1441                 /* If some nodes are not disabled, then can not host
1442                  * IPs on DISABLED node
1443                  */
1444                 for (i=0;i<nodemap->num;i++) {
1445                         if (nodemap->nodes[i].flags & NODE_FLAGS_DISABLED) {
1446                                 ipalloc_state->noiphost[i] = true;
1447                         }
1448                 }
1449         }
1450 }
1451
1452 static bool set_ipflags(struct ctdb_context *ctdb,
1453                         struct ipalloc_state *ipalloc_state,
1454                         struct ctdb_node_map_old *nodemap)
1455 {
1456         uint32_t *tval_noiptakeover;
1457         uint32_t *tval_noiphostonalldisabled;
1458
1459         tval_noiptakeover = get_tunable_from_nodes(ctdb, ipalloc_state, nodemap,
1460                                                    "NoIPTakeover", 0);
1461         if (tval_noiptakeover == NULL) {
1462                 return false;
1463         }
1464
1465         tval_noiphostonalldisabled =
1466                 get_tunable_from_nodes(ctdb, ipalloc_state, nodemap,
1467                                        "NoIPHostOnAllDisabled", 0);
1468         if (tval_noiphostonalldisabled == NULL) {
1469                 /* Caller frees tmp_ctx */
1470                 return false;
1471         }
1472
1473         set_ipflags_internal(ipalloc_state, nodemap,
1474                              tval_noiptakeover,
1475                              tval_noiphostonalldisabled);
1476
1477         talloc_free(tval_noiptakeover);
1478         talloc_free(tval_noiphostonalldisabled);
1479
1480         return true;
1481 }
1482
1483 static struct ipalloc_state * ipalloc_state_init(struct ctdb_context *ctdb,
1484                                                  TALLOC_CTX *mem_ctx)
1485 {
1486         struct ipalloc_state *ipalloc_state =
1487                 talloc_zero(mem_ctx, struct ipalloc_state);
1488         if (ipalloc_state == NULL) {
1489                 DEBUG(DEBUG_ERR, (__location__ " Out of memory\n"));
1490                 return NULL;
1491         }
1492
1493         ipalloc_state->num = ctdb->num_nodes;
1494
1495         ipalloc_state->known_public_ips =
1496                 talloc_zero_array(ipalloc_state,
1497                                   struct ctdb_public_ip_list,
1498                                   ipalloc_state->num);
1499         if (ipalloc_state->known_public_ips == NULL) {
1500                 DEBUG(DEBUG_ERR, (__location__ " Out of memory\n"));
1501                 goto fail;
1502         }
1503
1504         ipalloc_state->available_public_ips =
1505                 talloc_zero_array(ipalloc_state,
1506                                   struct ctdb_public_ip_list,
1507                                   ipalloc_state->num);
1508         if (ipalloc_state->available_public_ips == NULL) {
1509                 DEBUG(DEBUG_ERR, (__location__ " Out of memory\n"));
1510                 goto fail;
1511         }
1512         ipalloc_state->noiptakeover =
1513                 talloc_zero_array(ipalloc_state,
1514                                   bool,
1515                                   ipalloc_state->num);
1516         if (ipalloc_state->noiptakeover == NULL) {
1517                 DEBUG(DEBUG_ERR, (__location__ " Out of memory\n"));
1518                 goto fail;
1519         }
1520         ipalloc_state->noiphost =
1521                 talloc_zero_array(ipalloc_state,
1522                                   bool,
1523                                   ipalloc_state->num);
1524         if (ipalloc_state->noiphost == NULL) {
1525                 DEBUG(DEBUG_ERR, (__location__ " Out of memory\n"));
1526                 goto fail;
1527         }
1528
1529         if (1 == ctdb->tunable.lcp2_public_ip_assignment) {
1530                 ipalloc_state->algorithm = IPALLOC_LCP2;
1531         } else if (1 == ctdb->tunable.deterministic_public_ips) {
1532                 ipalloc_state->algorithm = IPALLOC_DETERMINISTIC;
1533         } else {
1534                 ipalloc_state->algorithm = IPALLOC_NONDETERMINISTIC;
1535         }
1536
1537         ipalloc_state->no_ip_failback = ctdb->tunable.no_ip_failback;
1538
1539         return ipalloc_state;
1540 fail:
1541         talloc_free(ipalloc_state);
1542         return NULL;
1543 }
1544
1545 struct takeover_callback_data {
1546         uint32_t num_nodes;
1547         unsigned int *fail_count;
1548 };
1549
1550 static struct takeover_callback_data *
1551 takeover_callback_data_init(TALLOC_CTX *mem_ctx,
1552                             uint32_t num_nodes)
1553 {
1554         static struct takeover_callback_data *takeover_data;
1555
1556         takeover_data = talloc_zero(mem_ctx, struct takeover_callback_data);
1557         if (takeover_data == NULL) {
1558                 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
1559                 return NULL;
1560         }
1561
1562         takeover_data->fail_count = talloc_zero_array(takeover_data,
1563                                                       unsigned int, num_nodes);
1564         if (takeover_data->fail_count == NULL) {
1565                 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
1566                 talloc_free(takeover_data);
1567                 return NULL;
1568         }
1569
1570         takeover_data->num_nodes = num_nodes;
1571
1572         return takeover_data;
1573 }
1574
1575 static void takeover_run_fail_callback(struct ctdb_context *ctdb,
1576                                        uint32_t node_pnn, int32_t res,
1577                                        TDB_DATA outdata, void *callback_data)
1578 {
1579         struct takeover_callback_data *cd =
1580                 talloc_get_type_abort(callback_data,
1581                                       struct takeover_callback_data);
1582
1583         if (node_pnn >= cd->num_nodes) {
1584                 DEBUG(DEBUG_ERR, (__location__ " invalid PNN %u\n", node_pnn));
1585                 return;
1586         }
1587
1588         if (cd->fail_count[node_pnn] == 0) {
1589                 DEBUG(DEBUG_ERR,
1590                       ("Node %u failed the takeover run\n", node_pnn));
1591         }
1592
1593         cd->fail_count[node_pnn]++;
1594 }
1595
1596 static void takeover_run_process_failures(struct ctdb_context *ctdb,
1597                                           struct takeover_callback_data *tcd)
1598 {
1599         unsigned int max_fails = 0;
1600         uint32_t max_pnn = -1;
1601         uint32_t i;
1602
1603         for (i = 0; i < tcd->num_nodes; i++) {
1604                 if (tcd->fail_count[i] > max_fails) {
1605                         max_pnn = i;
1606                         max_fails = tcd->fail_count[i];
1607                 }
1608         }
1609
1610         if (max_fails > 0) {
1611                 int ret;
1612                 TDB_DATA data;
1613
1614                 DEBUG(DEBUG_ERR,
1615                       ("Sending banning credits to %u with fail count %u\n",
1616                        max_pnn, max_fails));
1617
1618                 data.dptr = (uint8_t *)&max_pnn;
1619                 data.dsize = sizeof(uint32_t);
1620                 ret = ctdb_client_send_message(ctdb,
1621                                                CTDB_BROADCAST_CONNECTED,
1622                                                CTDB_SRVID_BANNING,
1623                                                data);
1624                 if (ret != 0) {
1625                         DEBUG(DEBUG_ERR,
1626                               ("Failed to set banning credits for node %u\n",
1627                                max_pnn));
1628                 }
1629         }
1630 }
1631
1632 /*
1633  * Recalculate the allocation of public IPs to nodes and have the
1634  * nodes host their allocated addresses.
1635  *
1636  * - Allocate memory for IP allocation state, including per node
1637  *   arrays
1638  * - Populate IP allocation algorithm in IP allocation state
1639  * - Populate local value of tunable NoIPFailback in IP allocation
1640      state - this is really a cluster-wide configuration variable and
1641      only the value form the master node is used
1642  * - Retrieve tunables NoIPTakeover and NoIPHostOnAllDisabled from all
1643  *   connected nodes - this is done separately so tunable values can
1644  *   be faked in unit testing
1645  * - Populate NoIPTakover tunable in IP allocation state
1646  * - Populate NoIPHost in IP allocation state, derived from node flags
1647  *   and NoIPHostOnAllDisabled tunable
1648  * - Retrieve and populate known and available IP lists in IP
1649  *   allocation state
1650  * - If no available IP addresses then early exit
1651  * - Build list of (known IPs, currently assigned node)
1652  * - Populate list of nodes to force rebalance - internal structure,
1653  *   currently no way to fetch, only used by LCP2 for nodes that have
1654  *   had new IP addresses added
1655  * - Run IP allocation algorithm
1656  * - Send RELEASE_IP to all nodes for IPs they should not host
1657  * - Send TAKE_IP to all nodes for IPs they should host
1658  * - Send IPREALLOCATED to all nodes (with backward compatibility hack)
1659  */
1660 int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map_old *nodemap,
1661                       uint32_t *force_rebalance_nodes)
1662 {
1663         int i, ret;
1664         struct ctdb_public_ip ip;
1665         uint32_t *nodes;
1666         struct public_ip_list *all_ips, *tmp_ip;
1667         TDB_DATA data;
1668         struct timeval timeout;
1669         struct client_async_data *async_data;
1670         struct ctdb_client_control_state *state;
1671         TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
1672         struct ipalloc_state *ipalloc_state;
1673         struct takeover_callback_data *takeover_data;
1674         bool can_host_ips;
1675
1676         /* Initialise fail callback data to be used with
1677          * takeover_run_fail_callback().  A failure in any of the
1678          * following steps will cause an early return, so this can be
1679          * reused for each of those steps without re-initialising. */
1680         takeover_data = takeover_callback_data_init(tmp_ctx,
1681                                                     nodemap->num);
1682         if (takeover_data == NULL) {
1683                 talloc_free(tmp_ctx);
1684                 return -1;
1685         }
1686
1687         /*
1688          * ip failover is completely disabled, just send out the 
1689          * ipreallocated event.
1690          */
1691         if (ctdb->tunable.disable_ip_failover != 0) {
1692                 goto ipreallocated;
1693         }
1694
1695         ipalloc_state = ipalloc_state_init(ctdb, tmp_ctx);
1696         if (ipalloc_state == NULL) {
1697                 talloc_free(tmp_ctx);
1698                 return -1;
1699         }
1700
1701         if (!set_ipflags(ctdb, ipalloc_state, nodemap)) {
1702                 DEBUG(DEBUG_ERR,("Failed to set IP flags - aborting takeover run\n"));
1703                 talloc_free(tmp_ctx);
1704                 return -1;
1705         }
1706
1707         /* Fetch known/available public IPs from each active node */
1708         ret = ctdb_reload_remote_public_ips(ctdb, ipalloc_state, nodemap);
1709         if (ret != 0) {
1710                 talloc_free(tmp_ctx);
1711                 return -1;
1712         }
1713
1714         /* Short-circuit IP allocation if no node has available IPs */
1715         can_host_ips = false;
1716         for (i=0; i < ipalloc_state->num; i++) {
1717                 if (ipalloc_state->available_public_ips[i].num != 0) {
1718                         can_host_ips = true;
1719                 }
1720         }
1721         if (!can_host_ips) {
1722                 DEBUG(DEBUG_WARNING,("No nodes available to host public IPs yet\n"));
1723                 goto ipreallocated;
1724         }
1725
1726         /* since nodes only know about those public addresses that
1727            can be served by that particular node, no single node has
1728            a full list of all public addresses that exist in the cluster.
1729            Walk over all node structures and create a merged list of
1730            all public addresses that exist in the cluster.
1731         */
1732         all_ips = create_merged_ip_list(ipalloc_state);
1733         if (all_ips == NULL) {
1734                 talloc_free(tmp_ctx);
1735                 return -1;
1736         }
1737         ipalloc_state->all_ips = all_ips;
1738
1739         ipalloc_state->force_rebalance_nodes = force_rebalance_nodes;
1740
1741         /* Do the IP reassignment calculations */
1742         ipalloc(ipalloc_state);
1743
1744         /* Now tell all nodes to release any public IPs should not
1745          * host.  This will be a NOOP on nodes that don't currently
1746          * hold the given IP.
1747          */
1748         async_data = talloc_zero(tmp_ctx, struct client_async_data);
1749         CTDB_NO_MEMORY_FATAL(ctdb, async_data);
1750
1751         async_data->fail_callback = takeover_run_fail_callback;
1752         async_data->callback_data = takeover_data;
1753
1754         ZERO_STRUCT(ip); /* Avoid valgrind warnings for union */
1755
1756         /* Send a RELEASE_IP to all nodes that should not be hosting
1757          * each IP.  For each IP, all but one of these will be
1758          * redundant.  However, the redundant ones are used to tell
1759          * nodes which node should be hosting the IP so that commands
1760          * like "ctdb ip" can display a particular nodes idea of who
1761          * is hosting what. */
1762         for (i=0;i<nodemap->num;i++) {
1763                 /* don't talk to unconnected nodes, but do talk to banned nodes */
1764                 if (nodemap->nodes[i].flags & NODE_FLAGS_DISCONNECTED) {
1765                         continue;
1766                 }
1767
1768                 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1769                         if (tmp_ip->pnn == nodemap->nodes[i].pnn) {
1770                                 /* This node should be serving this
1771                                    vnn so don't tell it to release the ip
1772                                 */
1773                                 continue;
1774                         }
1775                         ip.pnn  = tmp_ip->pnn;
1776                         ip.addr = tmp_ip->addr;
1777
1778                         timeout = TAKEOVER_TIMEOUT();
1779                         data.dsize = sizeof(ip);
1780                         data.dptr  = (uint8_t *)&ip;
1781                         state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
1782                                                   0, CTDB_CONTROL_RELEASE_IP, 0,
1783                                                   data, async_data,
1784                                                   &timeout, NULL);
1785                         if (state == NULL) {
1786                                 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_RELEASE_IP to node %u\n", nodemap->nodes[i].pnn));
1787                                 talloc_free(tmp_ctx);
1788                                 return -1;
1789                         }
1790
1791                         ctdb_client_async_add(async_data, state);
1792                 }
1793         }
1794         if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1795                 DEBUG(DEBUG_ERR,
1796                       ("Async control CTDB_CONTROL_RELEASE_IP failed\n"));
1797                 goto fail;
1798         }
1799         talloc_free(async_data);
1800
1801
1802         /* For each IP, send a TAKOVER_IP to the node that should be
1803          * hosting it.  Many of these will often be redundant (since
1804          * the allocation won't have changed) but they can be useful
1805          * to recover from inconsistencies. */
1806         async_data = talloc_zero(tmp_ctx, struct client_async_data);
1807         CTDB_NO_MEMORY_FATAL(ctdb, async_data);
1808
1809         async_data->fail_callback = takeover_run_fail_callback;
1810         async_data->callback_data = takeover_data;
1811
1812         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1813                 if (tmp_ip->pnn == -1) {
1814                         /* this IP won't be taken over */
1815                         continue;
1816                 }
1817
1818                 ip.pnn  = tmp_ip->pnn;
1819                 ip.addr = tmp_ip->addr;
1820
1821                 timeout = TAKEOVER_TIMEOUT();
1822                 data.dsize = sizeof(ip);
1823                 data.dptr  = (uint8_t *)&ip;
1824                 state = ctdb_control_send(ctdb, tmp_ip->pnn,
1825                                           0, CTDB_CONTROL_TAKEOVER_IP, 0,
1826                                           data, async_data, &timeout, NULL);
1827                 if (state == NULL) {
1828                         DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_TAKEOVER_IP to node %u\n", tmp_ip->pnn));
1829                         talloc_free(tmp_ctx);
1830                         return -1;
1831                 }
1832
1833                 ctdb_client_async_add(async_data, state);
1834         }
1835         if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1836                 DEBUG(DEBUG_ERR,
1837                       ("Async control CTDB_CONTROL_TAKEOVER_IP failed\n"));
1838                 goto fail;
1839         }
1840
1841 ipreallocated:
1842         /*
1843          * Tell all nodes to run eventscripts to process the
1844          * "ipreallocated" event.  This can do a lot of things,
1845          * including restarting services to reconfigure them if public
1846          * IPs have moved.  Once upon a time this event only used to
1847          * update natgw.
1848          */
1849         nodes = list_of_connected_nodes(ctdb, nodemap, tmp_ctx, true);
1850         ret = ctdb_client_async_control(ctdb, CTDB_CONTROL_IPREALLOCATED,
1851                                         nodes, 0, TAKEOVER_TIMEOUT(),
1852                                         false, tdb_null,
1853                                         NULL, takeover_run_fail_callback,
1854                                         takeover_data);
1855         if (ret != 0) {
1856                 DEBUG(DEBUG_ERR,
1857                       ("Async CTDB_CONTROL_IPREALLOCATED control failed\n"));
1858                 goto fail;
1859         }
1860
1861         talloc_free(tmp_ctx);
1862         return ret;
1863
1864 fail:
1865         takeover_run_process_failures(ctdb, takeover_data);
1866         talloc_free(tmp_ctx);
1867         return -1;
1868 }
1869
1870
1871 /*
1872   destroy a ctdb_client_ip structure
1873  */
1874 static int ctdb_client_ip_destructor(struct ctdb_client_ip *ip)
1875 {
1876         DEBUG(DEBUG_DEBUG,("destroying client tcp for %s:%u (client_id %u)\n",
1877                 ctdb_addr_to_str(&ip->addr),
1878                 ntohs(ip->addr.ip.sin_port),
1879                 ip->client_id));
1880
1881         DLIST_REMOVE(ip->ctdb->client_ip_list, ip);
1882         return 0;
1883 }
1884
1885 /*
1886   called by a client to inform us of a TCP connection that it is managing
1887   that should tickled with an ACK when IP takeover is done
1888  */
1889 int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id,
1890                                 TDB_DATA indata)
1891 {
1892         struct ctdb_client *client = reqid_find(ctdb->idr, client_id, struct ctdb_client);
1893         struct ctdb_connection *tcp_sock = NULL;
1894         struct ctdb_tcp_list *tcp;
1895         struct ctdb_connection t;
1896         int ret;
1897         TDB_DATA data;
1898         struct ctdb_client_ip *ip;
1899         struct ctdb_vnn *vnn;
1900         ctdb_sock_addr addr;
1901
1902         /* If we don't have public IPs, tickles are useless */
1903         if (ctdb->vnn == NULL) {
1904                 return 0;
1905         }
1906
1907         tcp_sock = (struct ctdb_connection *)indata.dptr;
1908
1909         addr = tcp_sock->src;
1910         ctdb_canonicalize_ip(&addr,  &tcp_sock->src);
1911         addr = tcp_sock->dst;
1912         ctdb_canonicalize_ip(&addr, &tcp_sock->dst);
1913
1914         ZERO_STRUCT(addr);
1915         memcpy(&addr, &tcp_sock->dst, sizeof(addr));
1916         vnn = find_public_ip_vnn(ctdb, &addr);
1917         if (vnn == NULL) {
1918                 switch (addr.sa.sa_family) {
1919                 case AF_INET:
1920                         if (ntohl(addr.ip.sin_addr.s_addr) != INADDR_LOOPBACK) {
1921                                 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public address.\n", 
1922                                         ctdb_addr_to_str(&addr)));
1923                         }
1924                         break;
1925                 case AF_INET6:
1926                         DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public ipv6 address.\n", 
1927                                 ctdb_addr_to_str(&addr)));
1928                         break;
1929                 default:
1930                         DEBUG(DEBUG_ERR,(__location__ " Unknown family type %d\n", addr.sa.sa_family));
1931                 }
1932
1933                 return 0;
1934         }
1935
1936         if (vnn->pnn != ctdb->pnn) {
1937                 DEBUG(DEBUG_ERR,("Attempt to register tcp client for IP %s we don't hold - failing (client_id %u pid %u)\n",
1938                         ctdb_addr_to_str(&addr),
1939                         client_id, client->pid));
1940                 /* failing this call will tell smbd to die */
1941                 return -1;
1942         }
1943
1944         ip = talloc(client, struct ctdb_client_ip);
1945         CTDB_NO_MEMORY(ctdb, ip);
1946
1947         ip->ctdb      = ctdb;
1948         ip->addr      = addr;
1949         ip->client_id = client_id;
1950         talloc_set_destructor(ip, ctdb_client_ip_destructor);
1951         DLIST_ADD(ctdb->client_ip_list, ip);
1952
1953         tcp = talloc(client, struct ctdb_tcp_list);
1954         CTDB_NO_MEMORY(ctdb, tcp);
1955
1956         tcp->connection.src = tcp_sock->src;
1957         tcp->connection.dst = tcp_sock->dst;
1958
1959         DLIST_ADD(client->tcp_list, tcp);
1960
1961         t.src = tcp_sock->src;
1962         t.dst = tcp_sock->dst;
1963
1964         data.dptr = (uint8_t *)&t;
1965         data.dsize = sizeof(t);
1966
1967         switch (addr.sa.sa_family) {
1968         case AF_INET:
1969                 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1970                         (unsigned)ntohs(tcp_sock->dst.ip.sin_port),
1971                         ctdb_addr_to_str(&tcp_sock->src),
1972                         (unsigned)ntohs(tcp_sock->src.ip.sin_port), client_id, client->pid));
1973                 break;
1974         case AF_INET6:
1975                 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1976                         (unsigned)ntohs(tcp_sock->dst.ip6.sin6_port),
1977                         ctdb_addr_to_str(&tcp_sock->src),
1978                         (unsigned)ntohs(tcp_sock->src.ip6.sin6_port), client_id, client->pid));
1979                 break;
1980         default:
1981                 DEBUG(DEBUG_ERR,(__location__ " Unknown family %d\n", addr.sa.sa_family));
1982         }
1983
1984
1985         /* tell all nodes about this tcp connection */
1986         ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0, 
1987                                        CTDB_CONTROL_TCP_ADD,
1988                                        0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
1989         if (ret != 0) {
1990                 DEBUG(DEBUG_ERR,(__location__ " Failed to send CTDB_CONTROL_TCP_ADD\n"));
1991                 return -1;
1992         }
1993
1994         return 0;
1995 }
1996
1997 /*
1998   find a tcp address on a list
1999  */
2000 static struct ctdb_connection *ctdb_tcp_find(struct ctdb_tcp_array *array,
2001                                            struct ctdb_connection *tcp)
2002 {
2003         int i;
2004
2005         if (array == NULL) {
2006                 return NULL;
2007         }
2008
2009         for (i=0;i<array->num;i++) {
2010                 if (ctdb_same_sockaddr(&array->connections[i].src, &tcp->src) &&
2011                     ctdb_same_sockaddr(&array->connections[i].dst, &tcp->dst)) {
2012                         return &array->connections[i];
2013                 }
2014         }
2015         return NULL;
2016 }
2017
2018
2019
2020 /*
2021   called by a daemon to inform us of a TCP connection that one of its
2022   clients managing that should tickled with an ACK when IP takeover is
2023   done
2024  */
2025 int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata, bool tcp_update_needed)
2026 {
2027         struct ctdb_connection *p = (struct ctdb_connection *)indata.dptr;
2028         struct ctdb_tcp_array *tcparray;
2029         struct ctdb_connection tcp;
2030         struct ctdb_vnn *vnn;
2031
2032         /* If we don't have public IPs, tickles are useless */
2033         if (ctdb->vnn == NULL) {
2034                 return 0;
2035         }
2036
2037         vnn = find_public_ip_vnn(ctdb, &p->dst);
2038         if (vnn == NULL) {
2039                 DEBUG(DEBUG_INFO,(__location__ " got TCP_ADD control for an address which is not a public address '%s'\n",
2040                         ctdb_addr_to_str(&p->dst)));
2041
2042                 return -1;
2043         }
2044
2045
2046         tcparray = vnn->tcp_array;
2047
2048         /* If this is the first tickle */
2049         if (tcparray == NULL) {
2050                 tcparray = talloc(vnn, struct ctdb_tcp_array);
2051                 CTDB_NO_MEMORY(ctdb, tcparray);
2052                 vnn->tcp_array = tcparray;
2053
2054                 tcparray->num = 0;
2055                 tcparray->connections = talloc_size(tcparray, sizeof(struct ctdb_connection));
2056                 CTDB_NO_MEMORY(ctdb, tcparray->connections);
2057
2058                 tcparray->connections[tcparray->num].src = p->src;
2059                 tcparray->connections[tcparray->num].dst = p->dst;
2060                 tcparray->num++;
2061
2062                 if (tcp_update_needed) {
2063                         vnn->tcp_update_needed = true;
2064                 }
2065                 return 0;
2066         }
2067
2068
2069         /* Do we already have this tickle ?*/
2070         tcp.src = p->src;
2071         tcp.dst = p->dst;
2072         if (ctdb_tcp_find(tcparray, &tcp) != NULL) {
2073                 DEBUG(DEBUG_DEBUG,("Already had tickle info for %s:%u for vnn:%u\n",
2074                         ctdb_addr_to_str(&tcp.dst),
2075                         ntohs(tcp.dst.ip.sin_port),
2076                         vnn->pnn));
2077                 return 0;
2078         }
2079
2080         /* A new tickle, we must add it to the array */
2081         tcparray->connections = talloc_realloc(tcparray, tcparray->connections,
2082                                         struct ctdb_connection,
2083                                         tcparray->num+1);
2084         CTDB_NO_MEMORY(ctdb, tcparray->connections);
2085
2086         tcparray->connections[tcparray->num].src = p->src;
2087         tcparray->connections[tcparray->num].dst = p->dst;
2088         tcparray->num++;
2089
2090         DEBUG(DEBUG_INFO,("Added tickle info for %s:%u from vnn %u\n",
2091                 ctdb_addr_to_str(&tcp.dst),
2092                 ntohs(tcp.dst.ip.sin_port),
2093                 vnn->pnn));
2094
2095         if (tcp_update_needed) {
2096                 vnn->tcp_update_needed = true;
2097         }
2098
2099         return 0;
2100 }
2101
2102
2103 static void ctdb_remove_connection(struct ctdb_vnn *vnn, struct ctdb_connection *conn)
2104 {
2105         struct ctdb_connection *tcpp;
2106
2107         if (vnn == NULL) {
2108                 return;
2109         }
2110
2111         /* if the array is empty we cant remove it
2112            and we don't need to do anything
2113          */
2114         if (vnn->tcp_array == NULL) {
2115                 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist (array is empty) %s:%u\n",
2116                         ctdb_addr_to_str(&conn->dst),
2117                         ntohs(conn->dst.ip.sin_port)));
2118                 return;
2119         }
2120
2121
2122         /* See if we know this connection
2123            if we don't know this connection  then we dont need to do anything
2124          */
2125         tcpp = ctdb_tcp_find(vnn->tcp_array, conn);
2126         if (tcpp == NULL) {
2127                 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist %s:%u\n",
2128                         ctdb_addr_to_str(&conn->dst),
2129                         ntohs(conn->dst.ip.sin_port)));
2130                 return;
2131         }
2132
2133
2134         /* We need to remove this entry from the array.
2135            Instead of allocating a new array and copying data to it
2136            we cheat and just copy the last entry in the existing array
2137            to the entry that is to be removed and just shring the 
2138            ->num field
2139          */
2140         *tcpp = vnn->tcp_array->connections[vnn->tcp_array->num - 1];
2141         vnn->tcp_array->num--;
2142
2143         /* If we deleted the last entry we also need to remove the entire array
2144          */
2145         if (vnn->tcp_array->num == 0) {
2146                 talloc_free(vnn->tcp_array);
2147                 vnn->tcp_array = NULL;
2148         }               
2149
2150         vnn->tcp_update_needed = true;
2151
2152         DEBUG(DEBUG_INFO,("Removed tickle info for %s:%u\n",
2153                 ctdb_addr_to_str(&conn->src),
2154                 ntohs(conn->src.ip.sin_port)));
2155 }
2156
2157
2158 /*
2159   called by a daemon to inform us of a TCP connection that one of its
2160   clients used are no longer needed in the tickle database
2161  */
2162 int32_t ctdb_control_tcp_remove(struct ctdb_context *ctdb, TDB_DATA indata)
2163 {
2164         struct ctdb_vnn *vnn;
2165         struct ctdb_connection *conn = (struct ctdb_connection *)indata.dptr;
2166
2167         /* If we don't have public IPs, tickles are useless */
2168         if (ctdb->vnn == NULL) {
2169                 return 0;
2170         }
2171
2172         vnn = find_public_ip_vnn(ctdb, &conn->dst);
2173         if (vnn == NULL) {
2174                 DEBUG(DEBUG_ERR,
2175                       (__location__ " unable to find public address %s\n",
2176                        ctdb_addr_to_str(&conn->dst)));
2177                 return 0;
2178         }
2179
2180         ctdb_remove_connection(vnn, conn);
2181
2182         return 0;
2183 }
2184
2185
2186 /*
2187   Called when another daemon starts - causes all tickles for all
2188   public addresses we are serving to be sent to the new node on the
2189   next check.  This actually causes the next scheduled call to
2190   tdb_update_tcp_tickles() to update all nodes.  This is simple and
2191   doesn't require careful error handling.
2192  */
2193 int32_t ctdb_control_startup(struct ctdb_context *ctdb, uint32_t pnn)
2194 {
2195         struct ctdb_vnn *vnn;
2196
2197         DEBUG(DEBUG_INFO, ("Received startup control from node %lu\n",
2198                            (unsigned long) pnn));
2199
2200         for (vnn = ctdb->vnn; vnn != NULL; vnn = vnn->next) {
2201                 vnn->tcp_update_needed = true;
2202         }
2203
2204         return 0;
2205 }
2206
2207
2208 /*
2209   called when a client structure goes away - hook to remove
2210   elements from the tcp_list in all daemons
2211  */
2212 void ctdb_takeover_client_destructor_hook(struct ctdb_client *client)
2213 {
2214         while (client->tcp_list) {
2215                 struct ctdb_vnn *vnn;
2216                 struct ctdb_tcp_list *tcp = client->tcp_list;
2217                 struct ctdb_connection *conn = &tcp->connection;
2218
2219                 DLIST_REMOVE(client->tcp_list, tcp);
2220
2221                 vnn = find_public_ip_vnn(client->ctdb,
2222                                          &conn->dst);
2223                 if (vnn == NULL) {
2224                         DEBUG(DEBUG_ERR,
2225                               (__location__ " unable to find public address %s\n",
2226                                ctdb_addr_to_str(&conn->dst)));
2227                         continue;
2228                 }
2229
2230                 /* If the IP address is hosted on this node then
2231                  * remove the connection. */
2232                 if (vnn->pnn == client->ctdb->pnn) {
2233                         ctdb_remove_connection(vnn, conn);
2234                 }
2235
2236                 /* Otherwise this function has been called because the
2237                  * server IP address has been released to another node
2238                  * and the client has exited.  This means that we
2239                  * should not delete the connection information.  The
2240                  * takeover node processes connections too. */
2241         }
2242 }
2243
2244
2245 void ctdb_release_all_ips(struct ctdb_context *ctdb)
2246 {
2247         struct ctdb_vnn *vnn;
2248         int count = 0;
2249         TDB_DATA data;
2250
2251         if (ctdb->tunable.disable_ip_failover == 1) {
2252                 return;
2253         }
2254
2255         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2256                 if (!ctdb_sys_have_ip(&vnn->public_address)) {
2257                         ctdb_vnn_unassign_iface(ctdb, vnn);
2258                         continue;
2259                 }
2260                 if (!vnn->iface) {
2261                         continue;
2262                 }
2263
2264                 /* Don't allow multiple releases at once.  Some code,
2265                  * particularly ctdb_tickle_sentenced_connections() is
2266                  * not re-entrant */
2267                 if (vnn->update_in_flight) {
2268                         DEBUG(DEBUG_WARNING,
2269                               (__location__
2270                                " Not releasing IP %s/%u on interface %s, an update is already in progess\n",
2271                                     ctdb_addr_to_str(&vnn->public_address),
2272                                     vnn->public_netmask_bits,
2273                                     ctdb_vnn_iface_string(vnn)));
2274                         continue;
2275                 }
2276                 vnn->update_in_flight = true;
2277
2278                 DEBUG(DEBUG_INFO,("Release of IP %s/%u on interface %s node:-1\n",
2279                                     ctdb_addr_to_str(&vnn->public_address),
2280                                     vnn->public_netmask_bits,
2281                                     ctdb_vnn_iface_string(vnn)));
2282
2283                 ctdb_event_script_args(ctdb, CTDB_EVENT_RELEASE_IP, "%s %s %u",
2284                                   ctdb_vnn_iface_string(vnn),
2285                                   ctdb_addr_to_str(&vnn->public_address),
2286                                   vnn->public_netmask_bits);
2287
2288                 data.dptr = (uint8_t *)talloc_strdup(
2289                                 vnn, ctdb_addr_to_str(&vnn->public_address));
2290                 if (data.dptr != NULL) {
2291                         data.dsize = strlen((char *)data.dptr) + 1;
2292                         ctdb_daemon_send_message(ctdb, ctdb->pnn,
2293                                                  CTDB_SRVID_RELEASE_IP, data);
2294                         talloc_free(data.dptr);
2295                 }
2296
2297                 ctdb_vnn_unassign_iface(ctdb, vnn);
2298                 vnn->update_in_flight = false;
2299                 count++;
2300         }
2301
2302         DEBUG(DEBUG_NOTICE,(__location__ " Released %d public IPs\n", count));
2303 }
2304
2305
2306 /*
2307   get list of public IPs
2308  */
2309 int32_t ctdb_control_get_public_ips(struct ctdb_context *ctdb, 
2310                                     struct ctdb_req_control_old *c, TDB_DATA *outdata)
2311 {
2312         int i, num, len;
2313         struct ctdb_public_ip_list_old *ips;
2314         struct ctdb_vnn *vnn;
2315         bool only_available = false;
2316
2317         if (c->flags & CTDB_PUBLIC_IP_FLAGS_ONLY_AVAILABLE) {
2318                 only_available = true;
2319         }
2320
2321         /* count how many public ip structures we have */
2322         num = 0;
2323         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2324                 num++;
2325         }
2326
2327         len = offsetof(struct ctdb_public_ip_list_old, ips) +
2328                 num*sizeof(struct ctdb_public_ip);
2329         ips = talloc_zero_size(outdata, len);
2330         CTDB_NO_MEMORY(ctdb, ips);
2331
2332         i = 0;
2333         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2334                 if (only_available && !ctdb_vnn_available(ctdb, vnn)) {
2335                         continue;
2336                 }
2337                 ips->ips[i].pnn  = vnn->pnn;
2338                 ips->ips[i].addr = vnn->public_address;
2339                 i++;
2340         }
2341         ips->num = i;
2342         len = offsetof(struct ctdb_public_ip_list_old, ips) +
2343                 i*sizeof(struct ctdb_public_ip);
2344
2345         outdata->dsize = len;
2346         outdata->dptr  = (uint8_t *)ips;
2347
2348         return 0;
2349 }
2350
2351
2352 int32_t ctdb_control_get_public_ip_info(struct ctdb_context *ctdb,
2353                                         struct ctdb_req_control_old *c,
2354                                         TDB_DATA indata,
2355                                         TDB_DATA *outdata)
2356 {
2357         int i, num, len;
2358         ctdb_sock_addr *addr;
2359         struct ctdb_public_ip_info_old *info;
2360         struct ctdb_vnn *vnn;
2361
2362         addr = (ctdb_sock_addr *)indata.dptr;
2363
2364         vnn = find_public_ip_vnn(ctdb, addr);
2365         if (vnn == NULL) {
2366                 DEBUG(DEBUG_ERR,(__location__ " Could not get public ip info, "
2367                                  "'%s'not a public address\n",
2368                                  ctdb_addr_to_str(addr)));
2369                 return -1;
2370         }
2371
2372         /* count how many public ip structures we have */
2373         num = 0;
2374         for (;vnn->ifaces[num];) {
2375                 num++;
2376         }
2377
2378         len = offsetof(struct ctdb_public_ip_info_old, ifaces) +
2379                 num*sizeof(struct ctdb_iface);
2380         info = talloc_zero_size(outdata, len);
2381         CTDB_NO_MEMORY(ctdb, info);
2382
2383         info->ip.addr = vnn->public_address;
2384         info->ip.pnn = vnn->pnn;
2385         info->active_idx = 0xFFFFFFFF;
2386
2387         for (i=0; vnn->ifaces[i]; i++) {
2388                 struct ctdb_interface *cur;
2389
2390                 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
2391                 if (cur == NULL) {
2392                         DEBUG(DEBUG_CRIT, (__location__ " internal error iface[%s] unknown\n",
2393                                            vnn->ifaces[i]));
2394                         return -1;
2395                 }
2396                 if (vnn->iface == cur) {
2397                         info->active_idx = i;
2398                 }
2399                 strncpy(info->ifaces[i].name, cur->name,
2400                         sizeof(info->ifaces[i].name));
2401                 info->ifaces[i].name[sizeof(info->ifaces[i].name)-1] = '\0';
2402                 info->ifaces[i].link_state = cur->link_up;
2403                 info->ifaces[i].references = cur->references;
2404         }
2405         info->num = i;
2406         len = offsetof(struct ctdb_public_ip_info_old, ifaces) +
2407                 i*sizeof(struct ctdb_iface);
2408
2409         outdata->dsize = len;
2410         outdata->dptr  = (uint8_t *)info;
2411
2412         return 0;
2413 }
2414
2415 int32_t ctdb_control_get_ifaces(struct ctdb_context *ctdb,
2416                                 struct ctdb_req_control_old *c,
2417                                 TDB_DATA *outdata)
2418 {
2419         int i, num, len;
2420         struct ctdb_iface_list_old *ifaces;
2421         struct ctdb_interface *cur;
2422
2423         /* count how many public ip structures we have */
2424         num = 0;
2425         for (cur=ctdb->ifaces;cur;cur=cur->next) {
2426                 num++;
2427         }
2428
2429         len = offsetof(struct ctdb_iface_list_old, ifaces) +
2430                 num*sizeof(struct ctdb_iface);
2431         ifaces = talloc_zero_size(outdata, len);
2432         CTDB_NO_MEMORY(ctdb, ifaces);
2433
2434         i = 0;
2435         for (cur=ctdb->ifaces;cur;cur=cur->next) {
2436                 strncpy(ifaces->ifaces[i].name, cur->name,
2437                         sizeof(ifaces->ifaces[i].name));
2438                 ifaces->ifaces[i].name[sizeof(ifaces->ifaces[i].name)-1] = '\0';
2439                 ifaces->ifaces[i].link_state = cur->link_up;
2440                 ifaces->ifaces[i].references = cur->references;
2441                 i++;
2442         }
2443         ifaces->num = i;
2444         len = offsetof(struct ctdb_iface_list_old, ifaces) +
2445                 i*sizeof(struct ctdb_iface);
2446
2447         outdata->dsize = len;
2448         outdata->dptr  = (uint8_t *)ifaces;
2449
2450         return 0;
2451 }
2452
2453 int32_t ctdb_control_set_iface_link(struct ctdb_context *ctdb,
2454                                     struct ctdb_req_control_old *c,
2455                                     TDB_DATA indata)
2456 {
2457         struct ctdb_iface *info;
2458         struct ctdb_interface *iface;
2459         bool link_up = false;
2460
2461         info = (struct ctdb_iface *)indata.dptr;
2462
2463         if (info->name[CTDB_IFACE_SIZE] != '\0') {
2464                 int len = strnlen(info->name, CTDB_IFACE_SIZE);
2465                 DEBUG(DEBUG_ERR, (__location__ " name[%*.*s] not terminated\n",
2466                                   len, len, info->name));
2467                 return -1;
2468         }
2469
2470         switch (info->link_state) {
2471         case 0:
2472                 link_up = false;
2473                 break;
2474         case 1:
2475                 link_up = true;
2476                 break;
2477         default:
2478                 DEBUG(DEBUG_ERR, (__location__ " link_state[%u] invalid\n",
2479                                   (unsigned int)info->link_state));
2480                 return -1;
2481         }
2482
2483         if (info->references != 0) {
2484                 DEBUG(DEBUG_ERR, (__location__ " references[%u] should be 0\n",
2485                                   (unsigned int)info->references));
2486                 return -1;
2487         }
2488
2489         iface = ctdb_find_iface(ctdb, info->name);
2490         if (iface == NULL) {
2491                 return -1;
2492         }
2493
2494         if (link_up == iface->link_up) {
2495                 return 0;
2496         }
2497
2498         DEBUG(iface->link_up?DEBUG_ERR:DEBUG_NOTICE,
2499               ("iface[%s] has changed it's link status %s => %s\n",
2500                iface->name,
2501                iface->link_up?"up":"down",
2502                link_up?"up":"down"));
2503
2504         iface->link_up = link_up;
2505         return 0;
2506 }
2507
2508
2509 /*
2510   called by a daemon to inform us of the entire list of TCP tickles for
2511   a particular public address.
2512   this control should only be sent by the node that is currently serving
2513   that public address.
2514  */
2515 int32_t ctdb_control_set_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata)
2516 {
2517         struct ctdb_tickle_list_old *list = (struct ctdb_tickle_list_old *)indata.dptr;
2518         struct ctdb_tcp_array *tcparray;
2519         struct ctdb_vnn *vnn;
2520
2521         /* We must at least have tickles.num or else we cant verify the size
2522            of the received data blob
2523          */
2524         if (indata.dsize < offsetof(struct ctdb_tickle_list_old, connections)) {
2525                 DEBUG(DEBUG_ERR,("Bad indata in ctdb_tickle_list. Not enough data for the tickle.num field\n"));
2526                 return -1;
2527         }
2528
2529         /* verify that the size of data matches what we expect */
2530         if (indata.dsize < offsetof(struct ctdb_tickle_list_old, connections)
2531                          + sizeof(struct ctdb_connection) * list->num) {
2532                 DEBUG(DEBUG_ERR,("Bad indata in ctdb_tickle_list\n"));
2533                 return -1;
2534         }
2535
2536         DEBUG(DEBUG_INFO, ("Received tickle update for public address %s\n",
2537                            ctdb_addr_to_str(&list->addr)));
2538
2539         vnn = find_public_ip_vnn(ctdb, &list->addr);
2540         if (vnn == NULL) {
2541                 DEBUG(DEBUG_INFO,(__location__ " Could not set tcp tickle list, '%s' is not a public address\n",
2542                         ctdb_addr_to_str(&list->addr)));
2543
2544                 return 1;
2545         }
2546
2547         if (vnn->pnn == ctdb->pnn) {
2548                 DEBUG(DEBUG_INFO,
2549                       ("Ignoring redundant set tcp tickle list, this node hosts '%s'\n",
2550                        ctdb_addr_to_str(&list->addr)));
2551                 return 0;
2552         }
2553
2554         /* remove any old ticklelist we might have */
2555         talloc_free(vnn->tcp_array);
2556         vnn->tcp_array = NULL;
2557
2558         tcparray = talloc(vnn, struct ctdb_tcp_array);
2559         CTDB_NO_MEMORY(ctdb, tcparray);
2560
2561         tcparray->num = list->num;
2562
2563         tcparray->connections = talloc_array(tcparray, struct ctdb_connection, tcparray->num);
2564         CTDB_NO_MEMORY(ctdb, tcparray->connections);
2565
2566         memcpy(tcparray->connections, &list->connections[0],
2567                sizeof(struct ctdb_connection)*tcparray->num);
2568
2569         /* We now have a new fresh tickle list array for this vnn */
2570         vnn->tcp_array = tcparray;
2571
2572         return 0;
2573 }
2574
2575 /*
2576   called to return the full list of tickles for the puclic address associated 
2577   with the provided vnn
2578  */
2579 int32_t ctdb_control_get_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata)
2580 {
2581         ctdb_sock_addr *addr = (ctdb_sock_addr *)indata.dptr;
2582         struct ctdb_tickle_list_old *list;
2583         struct ctdb_tcp_array *tcparray;
2584         int num, i;
2585         struct ctdb_vnn *vnn;
2586         unsigned port;
2587
2588         vnn = find_public_ip_vnn(ctdb, addr);
2589         if (vnn == NULL) {
2590                 DEBUG(DEBUG_ERR,(__location__ " Could not get tcp tickle list, '%s' is not a public address\n",
2591                         ctdb_addr_to_str(addr)));
2592
2593                 return 1;
2594         }
2595
2596         port = ctdb_addr_to_port(addr);
2597
2598         tcparray = vnn->tcp_array;
2599         num = 0;
2600         if (tcparray != NULL) {
2601                 if (port == 0) {
2602                         /* All connections */
2603                         num = tcparray->num;
2604                 } else {
2605                         /* Count connections for port */
2606                         for (i = 0; i < tcparray->num; i++) {
2607                                 if (port == ctdb_addr_to_port(&tcparray->connections[i].dst)) {
2608                                         num++;
2609                                 }
2610                         }
2611                 }
2612         }
2613
2614         outdata->dsize = offsetof(struct ctdb_tickle_list_old, connections)
2615                         + sizeof(struct ctdb_connection) * num;
2616
2617         outdata->dptr  = talloc_size(outdata, outdata->dsize);
2618         CTDB_NO_MEMORY(ctdb, outdata->dptr);
2619         list = (struct ctdb_tickle_list_old *)outdata->dptr;
2620
2621         list->addr = *addr;
2622         list->num = num;
2623
2624         if (num == 0) {
2625                 return 0;
2626         }
2627
2628         num = 0;
2629         for (i = 0; i < tcparray->num; i++) {
2630                 if (port == 0 || \
2631                     port == ctdb_addr_to_port(&tcparray->connections[i].dst)) {
2632                         list->connections[num] = tcparray->connections[i];
2633                         num++;
2634                 }
2635         }
2636
2637         return 0;
2638 }
2639
2640
2641 /*
2642   set the list of all tcp tickles for a public address
2643  */
2644 static int ctdb_send_set_tcp_tickles_for_ip(struct ctdb_context *ctdb,
2645                                             ctdb_sock_addr *addr,
2646                                             struct ctdb_tcp_array *tcparray)
2647 {
2648         int ret, num;
2649         TDB_DATA data;
2650         struct ctdb_tickle_list_old *list;
2651
2652         if (tcparray) {
2653                 num = tcparray->num;
2654         } else {
2655                 num = 0;
2656         }
2657
2658         data.dsize = offsetof(struct ctdb_tickle_list_old, connections) +
2659                         sizeof(struct ctdb_connection) * num;
2660         data.dptr = talloc_size(ctdb, data.dsize);
2661         CTDB_NO_MEMORY(ctdb, data.dptr);
2662
2663         list = (struct ctdb_tickle_list_old *)data.dptr;
2664         list->addr = *addr;
2665         list->num = num;
2666         if (tcparray) {
2667                 memcpy(&list->connections[0], tcparray->connections, sizeof(struct ctdb_connection) * num);
2668         }
2669
2670         ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_ALL, 0,
2671                                        CTDB_CONTROL_SET_TCP_TICKLE_LIST,
2672                                        0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
2673         if (ret != 0) {
2674                 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set tcp tickles failed\n"));
2675                 return -1;
2676         }
2677
2678         talloc_free(data.dptr);
2679
2680         return ret;
2681 }
2682
2683
2684 /*
2685   perform tickle updates if required
2686  */
2687 static void ctdb_update_tcp_tickles(struct tevent_context *ev,
2688                                     struct tevent_timer *te,
2689                                     struct timeval t, void *private_data)
2690 {
2691         struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
2692         int ret;
2693         struct ctdb_vnn *vnn;
2694
2695         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2696                 /* we only send out updates for public addresses that 
2697                    we have taken over
2698                  */
2699                 if (ctdb->pnn != vnn->pnn) {
2700                         continue;
2701                 }
2702                 /* We only send out the updates if we need to */
2703                 if (!vnn->tcp_update_needed) {
2704                         continue;
2705                 }
2706                 ret = ctdb_send_set_tcp_tickles_for_ip(ctdb,
2707                                                        &vnn->public_address,
2708                                                        vnn->tcp_array);
2709                 if (ret != 0) {
2710                         DEBUG(DEBUG_ERR,("Failed to send the tickle update for public address %s\n",
2711                                 ctdb_addr_to_str(&vnn->public_address)));
2712                 } else {
2713                         DEBUG(DEBUG_INFO,
2714                               ("Sent tickle update for public address %s\n",
2715                                ctdb_addr_to_str(&vnn->public_address)));
2716                         vnn->tcp_update_needed = false;
2717                 }
2718         }
2719
2720         tevent_add_timer(ctdb->ev, ctdb->tickle_update_context,
2721                          timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
2722                          ctdb_update_tcp_tickles, ctdb);
2723 }
2724
2725 /*
2726   start periodic update of tcp tickles
2727  */
2728 void ctdb_start_tcp_tickle_update(struct ctdb_context *ctdb)
2729 {
2730         ctdb->tickle_update_context = talloc_new(ctdb);
2731
2732         tevent_add_timer(ctdb->ev, ctdb->tickle_update_context,
2733                          timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
2734                          ctdb_update_tcp_tickles, ctdb);
2735 }
2736
2737
2738
2739
2740 struct control_gratious_arp {
2741         struct ctdb_context *ctdb;
2742         ctdb_sock_addr addr;
2743         const char *iface;
2744         int count;
2745 };
2746
2747 /*
2748   send a control_gratuitous arp
2749  */
2750 static void send_gratious_arp(struct tevent_context *ev,
2751                               struct tevent_timer *te,
2752                               struct timeval t, void *private_data)
2753 {
2754         int ret;
2755         struct control_gratious_arp *arp = talloc_get_type(private_data, 
2756                                                         struct control_gratious_arp);
2757
2758         ret = ctdb_sys_send_arp(&arp->addr, arp->iface);
2759         if (ret != 0) {
2760                 DEBUG(DEBUG_ERR,(__location__ " sending of gratious arp on iface '%s' failed (%s)\n",
2761                                  arp->iface, strerror(errno)));
2762         }
2763
2764
2765         arp->count++;
2766         if (arp->count == CTDB_ARP_REPEAT) {
2767                 talloc_free(arp);
2768                 return;
2769         }
2770
2771         tevent_add_timer(arp->ctdb->ev, arp,
2772                          timeval_current_ofs(CTDB_ARP_INTERVAL, 0),
2773                          send_gratious_arp, arp);
2774 }
2775
2776
2777 /*
2778   send a gratious arp 
2779  */
2780 int32_t ctdb_control_send_gratious_arp(struct ctdb_context *ctdb, TDB_DATA indata)
2781 {
2782         struct ctdb_addr_info_old *gratious_arp = (struct ctdb_addr_info_old *)indata.dptr;
2783         struct control_gratious_arp *arp;
2784
2785         /* verify the size of indata */
2786         if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
2787                 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_gratious_arp structure. Got %u require %u bytes\n", 
2788                                  (unsigned)indata.dsize, 
2789                                  (unsigned)offsetof(struct ctdb_addr_info_old, iface)));
2790                 return -1;
2791         }
2792         if (indata.dsize != 
2793                 ( offsetof(struct ctdb_addr_info_old, iface)
2794                 + gratious_arp->len ) ){
2795
2796                 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2797                         "but should be %u bytes\n", 
2798                          (unsigned)indata.dsize, 
2799                          (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+gratious_arp->len)));
2800                 return -1;
2801         }
2802
2803
2804         arp = talloc(ctdb, struct control_gratious_arp);
2805         CTDB_NO_MEMORY(ctdb, arp);
2806
2807         arp->ctdb  = ctdb;
2808         arp->addr   = gratious_arp->addr;
2809         arp->iface = talloc_strdup(arp, gratious_arp->iface);
2810         CTDB_NO_MEMORY(ctdb, arp->iface);
2811         arp->count = 0;
2812
2813         tevent_add_timer(arp->ctdb->ev, arp,
2814                          timeval_zero(), send_gratious_arp, arp);
2815
2816         return 0;
2817 }
2818
2819 int32_t ctdb_control_add_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2820 {
2821         struct ctdb_addr_info_old *pub = (struct ctdb_addr_info_old *)indata.dptr;
2822         int ret;
2823
2824         /* verify the size of indata */
2825         if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
2826                 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_addr_info structure\n"));
2827                 return -1;
2828         }
2829         if (indata.dsize != 
2830                 ( offsetof(struct ctdb_addr_info_old, iface)
2831                 + pub->len ) ){
2832
2833                 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2834                         "but should be %u bytes\n", 
2835                          (unsigned)indata.dsize, 
2836                          (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+pub->len)));
2837                 return -1;
2838         }
2839
2840         DEBUG(DEBUG_NOTICE,("Add IP %s\n", ctdb_addr_to_str(&pub->addr)));
2841
2842         ret = ctdb_add_public_address(ctdb, &pub->addr, pub->mask, &pub->iface[0], true);
2843
2844         if (ret != 0) {
2845                 DEBUG(DEBUG_ERR,(__location__ " Failed to add public address\n"));
2846                 return -1;
2847         }
2848
2849         return 0;
2850 }
2851
2852 struct delete_ip_callback_state {
2853         struct ctdb_req_control_old *c;
2854 };
2855
2856 /*
2857   called when releaseip event finishes for del_public_address
2858  */
2859 static void delete_ip_callback(struct ctdb_context *ctdb,
2860                                int32_t status, TDB_DATA data,
2861                                const char *errormsg,
2862                                void *private_data)
2863 {
2864         struct delete_ip_callback_state *state =
2865                 talloc_get_type(private_data, struct delete_ip_callback_state);
2866
2867         /* If release failed then fail. */
2868         ctdb_request_control_reply(ctdb, state->c, NULL, status, errormsg);
2869         talloc_free(private_data);
2870 }
2871
2872 int32_t ctdb_control_del_public_address(struct ctdb_context *ctdb,
2873                                         struct ctdb_req_control_old *c,
2874                                         TDB_DATA indata, bool *async_reply)
2875 {
2876         struct ctdb_addr_info_old *pub = (struct ctdb_addr_info_old *)indata.dptr;
2877         struct ctdb_vnn *vnn;
2878
2879         /* verify the size of indata */
2880         if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
2881                 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_addr_info structure\n"));
2882                 return -1;
2883         }
2884         if (indata.dsize != 
2885                 ( offsetof(struct ctdb_addr_info_old, iface)
2886                 + pub->len ) ){
2887
2888                 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2889                         "but should be %u bytes\n", 
2890                          (unsigned)indata.dsize, 
2891                          (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+pub->len)));
2892                 return -1;
2893         }
2894
2895         DEBUG(DEBUG_NOTICE,("Delete IP %s\n", ctdb_addr_to_str(&pub->addr)));
2896
2897         /* walk over all public addresses until we find a match */
2898         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2899                 if (ctdb_same_ip(&vnn->public_address, &pub->addr)) {
2900                         if (vnn->pnn == ctdb->pnn) {
2901                                 struct delete_ip_callback_state *state;
2902                                 struct ctdb_public_ip *ip;
2903                                 TDB_DATA data;
2904                                 int ret;
2905
2906                                 vnn->delete_pending = true;
2907
2908                                 state = talloc(ctdb,
2909                                                struct delete_ip_callback_state);
2910                                 CTDB_NO_MEMORY(ctdb, state);
2911                                 state->c = c;
2912
2913                                 ip = talloc(state, struct ctdb_public_ip);
2914                                 if (ip == NULL) {
2915                                         DEBUG(DEBUG_ERR,
2916                                               (__location__ " Out of memory\n"));
2917                                         talloc_free(state);
2918                                         return -1;
2919                                 }
2920                                 ip->pnn = -1;
2921                                 ip->addr = pub->addr;
2922
2923                                 data.dsize = sizeof(struct ctdb_public_ip);
2924                                 data.dptr = (unsigned char *)ip;
2925
2926                                 ret = ctdb_daemon_send_control(ctdb,
2927                                                                ctdb_get_pnn(ctdb),
2928                                                                0,
2929                                                                CTDB_CONTROL_RELEASE_IP,
2930                                                                0, 0,
2931                                                                data,
2932                                                                delete_ip_callback,
2933                                                                state);
2934                                 if (ret == -1) {
2935                                         DEBUG(DEBUG_ERR,
2936                                               (__location__ "Unable to send "
2937                                                "CTDB_CONTROL_RELEASE_IP\n"));
2938                                         talloc_free(state);
2939                                         return -1;
2940                                 }
2941
2942                                 state->c = talloc_steal(state, c);
2943                                 *async_reply = true;
2944                         } else {
2945                                 /* This IP is not hosted on the
2946                                  * current node so just delete it
2947                                  * now. */
2948                                 do_delete_ip(ctdb, vnn);
2949                         }
2950
2951                         return 0;
2952                 }
2953         }
2954
2955         DEBUG(DEBUG_ERR,("Delete IP of unknown public IP address %s\n",
2956                          ctdb_addr_to_str(&pub->addr)));
2957         return -1;
2958 }
2959
2960
2961 struct ipreallocated_callback_state {
2962         struct ctdb_req_control_old *c;
2963 };
2964
2965 static void ctdb_ipreallocated_callback(struct ctdb_context *ctdb,
2966                                         int status, void *p)
2967 {
2968         struct ipreallocated_callback_state *state =
2969                 talloc_get_type(p, struct ipreallocated_callback_state);
2970
2971         if (status != 0) {
2972                 DEBUG(DEBUG_ERR,
2973                       (" \"ipreallocated\" event script failed (status %d)\n",
2974                        status));
2975                 if (status == -ETIME) {
2976                         ctdb_ban_self(ctdb);
2977                 }
2978         }
2979
2980         ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
2981         talloc_free(state);
2982 }
2983
2984 /* A control to run the ipreallocated event */
2985 int32_t ctdb_control_ipreallocated(struct ctdb_context *ctdb,
2986                                    struct ctdb_req_control_old *c,
2987                                    bool *async_reply)
2988 {
2989         int ret;
2990         struct ipreallocated_callback_state *state;
2991
2992         state = talloc(ctdb, struct ipreallocated_callback_state);
2993         CTDB_NO_MEMORY(ctdb, state);
2994
2995         DEBUG(DEBUG_INFO,(__location__ " Running \"ipreallocated\" event\n"));
2996
2997         ret = ctdb_event_script_callback(ctdb, state,
2998                                          ctdb_ipreallocated_callback, state,
2999                                          CTDB_EVENT_IPREALLOCATED,
3000                                          "%s", "");
3001
3002         if (ret != 0) {
3003                 DEBUG(DEBUG_ERR,("Failed to run \"ipreallocated\" event \n"));
3004                 talloc_free(state);
3005                 return -1;
3006         }
3007
3008         /* tell the control that we will be reply asynchronously */
3009         state->c    = talloc_steal(state, c);
3010         *async_reply = true;
3011
3012         return 0;
3013 }
3014
3015
3016 struct ctdb_reloadips_handle {
3017         struct ctdb_context *ctdb;
3018         struct ctdb_req_control_old *c;
3019         int status;
3020         int fd[2];
3021         pid_t child;
3022         struct tevent_fd *fde;
3023 };
3024
3025 static int ctdb_reloadips_destructor(struct ctdb_reloadips_handle *h)
3026 {
3027         if (h == h->ctdb->reload_ips) {
3028                 h->ctdb->reload_ips = NULL;
3029         }
3030         if (h->c != NULL) {
3031                 ctdb_request_control_reply(h->ctdb, h->c, NULL, h->status, NULL);
3032                 h->c = NULL;
3033         }
3034         ctdb_kill(h->ctdb, h->child, SIGKILL);
3035         return 0;
3036 }
3037
3038 static void ctdb_reloadips_timeout_event(struct tevent_context *ev,
3039                                          struct tevent_timer *te,
3040                                          struct timeval t, void *private_data)
3041 {
3042         struct ctdb_reloadips_handle *h = talloc_get_type(private_data, struct ctdb_reloadips_handle);
3043
3044         talloc_free(h);
3045 }
3046
3047 static void ctdb_reloadips_child_handler(struct tevent_context *ev,
3048                                          struct tevent_fd *fde,
3049                                          uint16_t flags, void *private_data)
3050 {
3051         struct ctdb_reloadips_handle *h = talloc_get_type(private_data, struct ctdb_reloadips_handle);
3052
3053         char res;
3054         int ret;
3055
3056         ret = sys_read(h->fd[0], &res, 1);
3057         if (ret < 1 || res != 0) {
3058                 DEBUG(DEBUG_ERR, (__location__ " Reloadips child process returned error\n"));
3059                 res = 1;
3060         }
3061         h->status = res;
3062
3063         talloc_free(h);
3064 }
3065
3066 static int ctdb_reloadips_child(struct ctdb_context *ctdb)
3067 {
3068         TALLOC_CTX *mem_ctx = talloc_new(NULL);
3069         struct ctdb_public_ip_list_old *ips;
3070         struct ctdb_vnn *vnn;
3071         struct client_async_data *async_data;
3072         struct timeval timeout;
3073         TDB_DATA data;
3074         struct ctdb_client_control_state *state;
3075         bool first_add;
3076         int i, ret;
3077
3078         CTDB_NO_MEMORY(ctdb, mem_ctx);
3079
3080         /* Read IPs from local node */
3081         ret = ctdb_ctrl_get_public_ips(ctdb, TAKEOVER_TIMEOUT(),
3082                                        CTDB_CURRENT_NODE, mem_ctx, &ips);
3083         if (ret != 0) {
3084                 DEBUG(DEBUG_ERR,
3085                       ("Unable to fetch public IPs from local node\n"));
3086                 talloc_free(mem_ctx);
3087                 return -1;
3088         }
3089
3090         /* Read IPs file - this is safe since this is a child process */
3091         ctdb->vnn = NULL;
3092         if (ctdb_set_public_addresses(ctdb, false) != 0) {
3093                 DEBUG(DEBUG_ERR,("Failed to re-read public addresses file\n"));
3094                 talloc_free(mem_ctx);
3095                 return -1;
3096         }
3097
3098         async_data = talloc_zero(mem_ctx, struct client_async_data);
3099         CTDB_NO_MEMORY(ctdb, async_data);
3100
3101         /* Compare IPs between node and file for IPs to be deleted */
3102         for (i = 0; i < ips->num; i++) {
3103                 /* */
3104                 for (vnn = ctdb->vnn; vnn; vnn = vnn->next) {
3105                         if (ctdb_same_ip(&vnn->public_address,
3106                                          &ips->ips[i].addr)) {
3107                                 /* IP is still in file */
3108                                 break;
3109                         }
3110                 }
3111
3112                 if (vnn == NULL) {
3113                         /* Delete IP ips->ips[i] */
3114                         struct ctdb_addr_info_old *pub;
3115
3116                         DEBUG(DEBUG_NOTICE,
3117                               ("IP %s no longer configured, deleting it\n",
3118                                ctdb_addr_to_str(&ips->ips[i].addr)));
3119
3120                         pub = talloc_zero(mem_ctx, struct ctdb_addr_info_old);
3121                         CTDB_NO_MEMORY(ctdb, pub);
3122
3123                         pub->addr  = ips->ips[i].addr;
3124                         pub->mask  = 0;
3125                         pub->len   = 0;
3126
3127                         timeout = TAKEOVER_TIMEOUT();
3128
3129                         data.dsize = offsetof(struct ctdb_addr_info_old,
3130                                               iface) + pub->len;
3131                         data.dptr = (uint8_t *)pub;
3132
3133                         state = ctdb_control_send(ctdb, CTDB_CURRENT_NODE, 0,
3134                                                   CTDB_CONTROL_DEL_PUBLIC_IP,
3135                                                   0, data, async_data,
3136                                                   &timeout, NULL);
3137                         if (state == NULL) {
3138                                 DEBUG(DEBUG_ERR,
3139                                       (__location__
3140                                        " failed sending CTDB_CONTROL_DEL_PUBLIC_IP\n"));
3141                                 goto failed;
3142                         }
3143
3144                         ctdb_client_async_add(async_data, state);
3145                 }
3146         }
3147
3148         /* Compare IPs between node and file for IPs to be added */
3149         first_add = true;
3150         for (vnn = ctdb->vnn; vnn; vnn = vnn->next) {
3151                 for (i = 0; i < ips->num; i++) {
3152                         if (ctdb_same_ip(&vnn->public_address,
3153                                          &ips->ips[i].addr)) {
3154                                 /* IP already on node */
3155                                 break;
3156                         }
3157                 }
3158                 if (i == ips->num) {
3159                         /* Add IP ips->ips[i] */
3160                         struct ctdb_addr_info_old *pub;
3161                         const char *ifaces = NULL;
3162                         uint32_t len;
3163                         int iface = 0;
3164
3165                         DEBUG(DEBUG_NOTICE,
3166                               ("New IP %s configured, adding it\n",
3167                                ctdb_addr_to_str(&vnn->public_address)));
3168                         if (first_add) {
3169                                 uint32_t pnn = ctdb_get_pnn(ctdb);
3170
3171                                 data.dsize = sizeof(pnn);
3172                                 data.dptr  = (uint8_t *)&pnn;
3173
3174                                 ret = ctdb_client_send_message(
3175                                         ctdb,
3176                                         CTDB_BROADCAST_CONNECTED,
3177                                         CTDB_SRVID_REBALANCE_NODE,
3178                                         data);
3179                                 if (ret != 0) {
3180                                         DEBUG(DEBUG_WARNING,
3181                                               ("Failed to send message to force node reallocation - IPs may be unbalanced\n"));
3182                                 }
3183
3184                                 first_add = false;
3185                         }
3186
3187                         ifaces = vnn->ifaces[0];
3188                         iface = 1;
3189                         while (vnn->ifaces[iface] != NULL) {
3190                                 ifaces = talloc_asprintf(vnn, "%s,%s", ifaces,
3191                                                          vnn->ifaces[iface]);
3192                                 iface++;
3193                         }
3194
3195                         len   = strlen(ifaces) + 1;
3196                         pub = talloc_zero_size(mem_ctx,
3197                                                offsetof(struct ctdb_addr_info_old, iface) + len);
3198                         CTDB_NO_MEMORY(ctdb, pub);
3199
3200                         pub->addr  = vnn->public_address;
3201                         pub->mask  = vnn->public_netmask_bits;
3202                         pub->len   = len;
3203                         memcpy(&pub->iface[0], ifaces, pub->len);
3204
3205                         timeout = TAKEOVER_TIMEOUT();
3206
3207                         data.dsize = offsetof(struct ctdb_addr_info_old,
3208                                               iface) + pub->len;
3209                         data.dptr = (uint8_t *)pub;
3210
3211                         state = ctdb_control_send(ctdb, CTDB_CURRENT_NODE, 0,
3212                                                   CTDB_CONTROL_ADD_PUBLIC_IP,
3213                                                   0, data, async_data,
3214                                                   &timeout, NULL);
3215                         if (state == NULL) {
3216                                 DEBUG(DEBUG_ERR,
3217                                       (__location__
3218                                        " failed sending CTDB_CONTROL_ADD_PUBLIC_IP\n"));
3219                                 goto failed;
3220                         }
3221
3222                         ctdb_client_async_add(async_data, state);
3223                 }
3224         }
3225
3226         if (ctdb_client_async_wait(ctdb, async_data) != 0) {
3227                 DEBUG(DEBUG_ERR,(__location__ " Add/delete IPs failed\n"));
3228                 goto failed;
3229         }
3230
3231         talloc_free(mem_ctx);
3232         return 0;
3233
3234 failed:
3235         talloc_free(mem_ctx);
3236         return -1;
3237 }
3238
3239 /* This control is sent to force the node to re-read the public addresses file
3240    and drop any addresses we should nnot longer host, and add new addresses
3241    that we are now able to host
3242 */
3243 int32_t ctdb_control_reload_public_ips(struct ctdb_context *ctdb, struct ctdb_req_control_old *c, bool *async_reply)
3244 {
3245         struct ctdb_reloadips_handle *h;
3246         pid_t parent = getpid();
3247
3248         if (ctdb->reload_ips != NULL) {
3249                 talloc_free(ctdb->reload_ips);
3250                 ctdb->reload_ips = NULL;
3251         }
3252
3253         h = talloc(ctdb, struct ctdb_reloadips_handle);
3254         CTDB_NO_MEMORY(ctdb, h);
3255         h->ctdb     = ctdb;
3256         h->c        = NULL;
3257         h->status   = -1;
3258         
3259         if (pipe(h->fd) == -1) {
3260                 DEBUG(DEBUG_ERR,("Failed to create pipe for ctdb_freeze_lock\n"));
3261                 talloc_free(h);
3262                 return -1;
3263         }
3264
3265         h->child = ctdb_fork(ctdb);
3266         if (h->child == (pid_t)-1) {
3267                 DEBUG(DEBUG_ERR, ("Failed to fork a child for reloadips\n"));
3268                 close(h->fd[0]);
3269                 close(h->fd[1]);
3270                 talloc_free(h);
3271                 return -1;
3272         }
3273
3274         /* child process */
3275         if (h->child == 0) {
3276                 signed char res = 0;
3277
3278                 close(h->fd[0]);
3279                 debug_extra = talloc_asprintf(NULL, "reloadips:");
3280
3281                 prctl_set_comment("ctdb_reloadips");
3282                 if (switch_from_server_to_client(ctdb, "reloadips-child") != 0) {
3283                         DEBUG(DEBUG_CRIT,("ERROR: Failed to switch reloadips child into client mode\n"));
3284                         res = -1;
3285                 } else {
3286                         res = ctdb_reloadips_child(ctdb);
3287                         if (res != 0) {
3288                                 DEBUG(DEBUG_ERR,("Failed to reload ips on local node\n"));
3289                         }
3290                 }
3291
3292                 sys_write(h->fd[1], &res, 1);
3293                 ctdb_wait_for_process_to_exit(parent);
3294                 _exit(0);
3295         }
3296
3297         h->c             = talloc_steal(h, c);
3298
3299         close(h->fd[1]);
3300         set_close_on_exec(h->fd[0]);
3301
3302         talloc_set_destructor(h, ctdb_reloadips_destructor);
3303
3304
3305         h->fde = tevent_add_fd(ctdb->ev, h, h->fd[0], TEVENT_FD_READ,
3306                                ctdb_reloadips_child_handler, (void *)h);
3307         tevent_fd_set_auto_close(h->fde);
3308
3309         tevent_add_timer(ctdb->ev, h, timeval_current_ofs(120, 0),
3310                          ctdb_reloadips_timeout_event, h);
3311
3312         /* we reply later */
3313         *async_reply = true;
3314         return 0;
3315 }