ctdb-daemon: Rename takeover_callback_state -> release_ip_callback_state
[metze/samba/wip.git] / ctdb / server / ctdb_takeover.c
1 /* 
2    ctdb ip takeover code
3
4    Copyright (C) Ronnie Sahlberg  2007
5    Copyright (C) Andrew Tridgell  2007
6    Copyright (C) Martin Schwenke  2011
7
8    This program is free software; you can redistribute it and/or modify
9    it under the terms of the GNU General Public License as published by
10    the Free Software Foundation; either version 3 of the License, or
11    (at your option) any later version.
12    
13    This program is distributed in the hope that it will be useful,
14    but WITHOUT ANY WARRANTY; without even the implied warranty of
15    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16    GNU General Public License for more details.
17    
18    You should have received a copy of the GNU General Public License
19    along with this program; if not, see <http://www.gnu.org/licenses/>.
20 */
21 #include "replace.h"
22 #include "system/network.h"
23 #include "system/filesys.h"
24 #include "system/time.h"
25 #include "system/wait.h"
26
27 #include <talloc.h>
28 #include <tevent.h>
29
30 #include "lib/util/dlinklist.h"
31 #include "lib/util/debug.h"
32 #include "lib/util/samba_util.h"
33 #include "lib/util/util_process.h"
34
35 #include "ctdb_private.h"
36 #include "ctdb_client.h"
37
38 #include "common/rb_tree.h"
39 #include "common/reqid.h"
40 #include "common/system.h"
41 #include "common/common.h"
42 #include "common/logging.h"
43
44 #include "server/ipalloc.h"
45
46 #define TAKEOVER_TIMEOUT() timeval_current_ofs(ctdb->tunable.takeover_timeout,0)
47
48 #define CTDB_ARP_INTERVAL 1
49 #define CTDB_ARP_REPEAT   3
50
51 struct ctdb_interface {
52         struct ctdb_interface *prev, *next;
53         const char *name;
54         bool link_up;
55         uint32_t references;
56 };
57
58 /* state associated with a public ip address */
59 struct ctdb_vnn {
60         struct ctdb_vnn *prev, *next;
61
62         struct ctdb_interface *iface;
63         const char **ifaces;
64         ctdb_sock_addr public_address;
65         uint8_t public_netmask_bits;
66
67         /* the node number that is serving this public address, if any.
68            If no node serves this ip it is set to -1 */
69         int32_t pnn;
70
71         /* List of clients to tickle for this public address */
72         struct ctdb_tcp_array *tcp_array;
73
74         /* whether we need to update the other nodes with changes to our list
75            of connected clients */
76         bool tcp_update_needed;
77
78         /* a context to hang sending gratious arp events off */
79         TALLOC_CTX *takeover_ctx;
80
81         /* Set to true any time an update to this VNN is in flight.
82            This helps to avoid races. */
83         bool update_in_flight;
84
85         /* If CTDB_CONTROL_DEL_PUBLIC_IP is received for this IP
86          * address then this flag is set.  It will be deleted in the
87          * release IP callback. */
88         bool delete_pending;
89 };
90
91 static const char *iface_string(const struct ctdb_interface *iface)
92 {
93         return (iface != NULL ? iface->name : "__none__");
94 }
95
96 static const char *ctdb_vnn_iface_string(const struct ctdb_vnn *vnn)
97 {
98         return iface_string(vnn->iface);
99 }
100
101 static int ctdb_add_local_iface(struct ctdb_context *ctdb, const char *iface)
102 {
103         struct ctdb_interface *i;
104
105         if (strlen(iface) > CTDB_IFACE_SIZE) {
106                 DEBUG(DEBUG_ERR, ("Interface name too long \"%s\"\n", iface));
107                 return -1;
108         }
109
110         /* Verify that we don't have an entry for this ip yet */
111         for (i=ctdb->ifaces;i;i=i->next) {
112                 if (strcmp(i->name, iface) == 0) {
113                         return 0;
114                 }
115         }
116
117         /* create a new structure for this interface */
118         i = talloc_zero(ctdb, struct ctdb_interface);
119         CTDB_NO_MEMORY_FATAL(ctdb, i);
120         i->name = talloc_strdup(i, iface);
121         CTDB_NO_MEMORY(ctdb, i->name);
122
123         i->link_up = true;
124
125         DLIST_ADD(ctdb->ifaces, i);
126
127         return 0;
128 }
129
130 static bool vnn_has_interface_with_name(struct ctdb_vnn *vnn,
131                                         const char *name)
132 {
133         int n;
134
135         for (n = 0; vnn->ifaces[n] != NULL; n++) {
136                 if (strcmp(name, vnn->ifaces[n]) == 0) {
137                         return true;
138                 }
139         }
140
141         return false;
142 }
143
144 /* If any interfaces now have no possible IPs then delete them.  This
145  * implementation is naive (i.e. simple) rather than clever
146  * (i.e. complex).  Given that this is run on delip and that operation
147  * is rare, this doesn't need to be efficient - it needs to be
148  * foolproof.  One alternative is reference counting, where the logic
149  * is distributed and can, therefore, be broken in multiple places.
150  * Another alternative is to build a red-black tree of interfaces that
151  * can have addresses (by walking ctdb->vnn once) and then walking
152  * ctdb->ifaces once and deleting those not in the tree.  Let's go to
153  * one of those if the naive implementation causes problems...  :-)
154  */
155 static void ctdb_remove_orphaned_ifaces(struct ctdb_context *ctdb,
156                                         struct ctdb_vnn *vnn)
157 {
158         struct ctdb_interface *i, *next;
159
160         /* For each interface, check if there's an IP using it. */
161         for (i = ctdb->ifaces; i != NULL; i = next) {
162                 struct ctdb_vnn *tv;
163                 bool found;
164                 next = i->next;
165
166                 /* Only consider interfaces named in the given VNN. */
167                 if (!vnn_has_interface_with_name(vnn, i->name)) {
168                         continue;
169                 }
170
171                 /* Search for a vnn with this interface. */
172                 found = false;
173                 for (tv=ctdb->vnn; tv; tv=tv->next) {
174                         if (vnn_has_interface_with_name(tv, i->name)) {
175                                 found = true;
176                                 break;
177                         }
178                 }
179
180                 if (!found) {
181                         /* None of the VNNs are using this interface. */
182                         DLIST_REMOVE(ctdb->ifaces, i);
183                         talloc_free(i);
184                 }
185         }
186 }
187
188
189 static struct ctdb_interface *ctdb_find_iface(struct ctdb_context *ctdb,
190                                               const char *iface)
191 {
192         struct ctdb_interface *i;
193
194         for (i=ctdb->ifaces;i;i=i->next) {
195                 if (strcmp(i->name, iface) == 0) {
196                         return i;
197                 }
198         }
199
200         return NULL;
201 }
202
203 static struct ctdb_interface *ctdb_vnn_best_iface(struct ctdb_context *ctdb,
204                                                   struct ctdb_vnn *vnn)
205 {
206         int i;
207         struct ctdb_interface *cur = NULL;
208         struct ctdb_interface *best = NULL;
209
210         for (i=0; vnn->ifaces[i]; i++) {
211
212                 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
213                 if (cur == NULL) {
214                         continue;
215                 }
216
217                 if (!cur->link_up) {
218                         continue;
219                 }
220
221                 if (best == NULL) {
222                         best = cur;
223                         continue;
224                 }
225
226                 if (cur->references < best->references) {
227                         best = cur;
228                         continue;
229                 }
230         }
231
232         return best;
233 }
234
235 static int32_t ctdb_vnn_assign_iface(struct ctdb_context *ctdb,
236                                      struct ctdb_vnn *vnn)
237 {
238         struct ctdb_interface *best = NULL;
239
240         if (vnn->iface) {
241                 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
242                                    "still assigned to iface '%s'\n",
243                                    ctdb_addr_to_str(&vnn->public_address),
244                                    ctdb_vnn_iface_string(vnn)));
245                 return 0;
246         }
247
248         best = ctdb_vnn_best_iface(ctdb, vnn);
249         if (best == NULL) {
250                 DEBUG(DEBUG_ERR, (__location__ " public address '%s' "
251                                   "cannot assign to iface any iface\n",
252                                   ctdb_addr_to_str(&vnn->public_address)));
253                 return -1;
254         }
255
256         vnn->iface = best;
257         best->references++;
258         vnn->pnn = ctdb->pnn;
259
260         DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
261                            "now assigned to iface '%s' refs[%d]\n",
262                            ctdb_addr_to_str(&vnn->public_address),
263                            ctdb_vnn_iface_string(vnn),
264                            best->references));
265         return 0;
266 }
267
268 static void ctdb_vnn_unassign_iface(struct ctdb_context *ctdb,
269                                     struct ctdb_vnn *vnn)
270 {
271         DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
272                            "now unassigned (old iface '%s' refs[%d])\n",
273                            ctdb_addr_to_str(&vnn->public_address),
274                            ctdb_vnn_iface_string(vnn),
275                            vnn->iface?vnn->iface->references:0));
276         if (vnn->iface) {
277                 vnn->iface->references--;
278         }
279         vnn->iface = NULL;
280         if (vnn->pnn == ctdb->pnn) {
281                 vnn->pnn = -1;
282         }
283 }
284
285 static bool ctdb_vnn_available(struct ctdb_context *ctdb,
286                                struct ctdb_vnn *vnn)
287 {
288         int i;
289
290         /* Nodes that are not RUNNING can not host IPs */
291         if (ctdb->runstate != CTDB_RUNSTATE_RUNNING) {
292                 return false;
293         }
294
295         if (vnn->delete_pending) {
296                 return false;
297         }
298
299         if (vnn->iface && vnn->iface->link_up) {
300                 return true;
301         }
302
303         for (i=0; vnn->ifaces[i]; i++) {
304                 struct ctdb_interface *cur;
305
306                 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
307                 if (cur == NULL) {
308                         continue;
309                 }
310
311                 if (cur->link_up) {
312                         return true;
313                 }
314         }
315
316         return false;
317 }
318
319 struct ctdb_takeover_arp {
320         struct ctdb_context *ctdb;
321         uint32_t count;
322         ctdb_sock_addr addr;
323         struct ctdb_tcp_array *tcparray;
324         struct ctdb_vnn *vnn;
325 };
326
327
328 /*
329   lists of tcp endpoints
330  */
331 struct ctdb_tcp_list {
332         struct ctdb_tcp_list *prev, *next;
333         struct ctdb_connection connection;
334 };
335
336 /*
337   list of clients to kill on IP release
338  */
339 struct ctdb_client_ip {
340         struct ctdb_client_ip *prev, *next;
341         struct ctdb_context *ctdb;
342         ctdb_sock_addr addr;
343         uint32_t client_id;
344 };
345
346
347 /*
348   send a gratuitous arp
349  */
350 static void ctdb_control_send_arp(struct tevent_context *ev,
351                                   struct tevent_timer *te,
352                                   struct timeval t, void *private_data)
353 {
354         struct ctdb_takeover_arp *arp = talloc_get_type(private_data, 
355                                                         struct ctdb_takeover_arp);
356         int i, ret;
357         struct ctdb_tcp_array *tcparray;
358         const char *iface = ctdb_vnn_iface_string(arp->vnn);
359
360         ret = ctdb_sys_send_arp(&arp->addr, iface);
361         if (ret != 0) {
362                 DEBUG(DEBUG_CRIT,(__location__ " sending of arp failed on iface '%s' (%s)\n",
363                                   iface, strerror(errno)));
364         }
365
366         tcparray = arp->tcparray;
367         if (tcparray) {
368                 for (i=0;i<tcparray->num;i++) {
369                         struct ctdb_connection *tcon;
370
371                         tcon = &tcparray->connections[i];
372                         DEBUG(DEBUG_INFO,("sending tcp tickle ack for %u->%s:%u\n",
373                                 (unsigned)ntohs(tcon->dst.ip.sin_port),
374                                 ctdb_addr_to_str(&tcon->src),
375                                 (unsigned)ntohs(tcon->src.ip.sin_port)));
376                         ret = ctdb_sys_send_tcp(
377                                 &tcon->src,
378                                 &tcon->dst,
379                                 0, 0, 0);
380                         if (ret != 0) {
381                                 DEBUG(DEBUG_CRIT,(__location__ " Failed to send tcp tickle ack for %s\n",
382                                         ctdb_addr_to_str(&tcon->src)));
383                         }
384                 }
385         }
386
387         arp->count++;
388
389         if (arp->count == CTDB_ARP_REPEAT) {
390                 talloc_free(arp);
391                 return;
392         }
393
394         tevent_add_timer(arp->ctdb->ev, arp->vnn->takeover_ctx,
395                          timeval_current_ofs(CTDB_ARP_INTERVAL, 100000),
396                          ctdb_control_send_arp, arp);
397 }
398
399 static int32_t ctdb_announce_vnn_iface(struct ctdb_context *ctdb,
400                                        struct ctdb_vnn *vnn)
401 {
402         struct ctdb_takeover_arp *arp;
403         struct ctdb_tcp_array *tcparray;
404
405         if (!vnn->takeover_ctx) {
406                 vnn->takeover_ctx = talloc_new(vnn);
407                 if (!vnn->takeover_ctx) {
408                         return -1;
409                 }
410         }
411
412         arp = talloc_zero(vnn->takeover_ctx, struct ctdb_takeover_arp);
413         if (!arp) {
414                 return -1;
415         }
416
417         arp->ctdb = ctdb;
418         arp->addr = vnn->public_address;
419         arp->vnn  = vnn;
420
421         tcparray = vnn->tcp_array;
422         if (tcparray) {
423                 /* add all of the known tcp connections for this IP to the
424                    list of tcp connections to send tickle acks for */
425                 arp->tcparray = talloc_steal(arp, tcparray);
426
427                 vnn->tcp_array = NULL;
428                 vnn->tcp_update_needed = true;
429         }
430
431         tevent_add_timer(arp->ctdb->ev, vnn->takeover_ctx,
432                          timeval_zero(), ctdb_control_send_arp, arp);
433
434         return 0;
435 }
436
437 struct ctdb_do_takeip_state {
438         struct ctdb_req_control_old *c;
439         struct ctdb_vnn *vnn;
440 };
441
442 /*
443   called when takeip event finishes
444  */
445 static void ctdb_do_takeip_callback(struct ctdb_context *ctdb, int status,
446                                     void *private_data)
447 {
448         struct ctdb_do_takeip_state *state =
449                 talloc_get_type(private_data, struct ctdb_do_takeip_state);
450         int32_t ret;
451         TDB_DATA data;
452
453         if (status != 0) {
454                 if (status == -ETIME) {
455                         ctdb_ban_self(ctdb);
456                 }
457                 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
458                                  ctdb_addr_to_str(&state->vnn->public_address),
459                                  ctdb_vnn_iface_string(state->vnn)));
460                 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
461
462                 talloc_free(state);
463                 return;
464         }
465
466         if (ctdb->do_checkpublicip) {
467
468         ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
469         if (ret != 0) {
470                 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
471                 talloc_free(state);
472                 return;
473         }
474
475         }
476
477         data.dptr  = (uint8_t *)ctdb_addr_to_str(&state->vnn->public_address);
478         data.dsize = strlen((char *)data.dptr) + 1;
479         DEBUG(DEBUG_INFO,(__location__ " sending TAKE_IP for '%s'\n", data.dptr));
480
481         ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_TAKE_IP, data);
482
483
484         /* the control succeeded */
485         ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
486         talloc_free(state);
487         return;
488 }
489
490 static int ctdb_takeip_destructor(struct ctdb_do_takeip_state *state)
491 {
492         state->vnn->update_in_flight = false;
493         return 0;
494 }
495
496 /*
497   take over an ip address
498  */
499 static int32_t ctdb_do_takeip(struct ctdb_context *ctdb,
500                               struct ctdb_req_control_old *c,
501                               struct ctdb_vnn *vnn)
502 {
503         int ret;
504         struct ctdb_do_takeip_state *state;
505
506         if (vnn->update_in_flight) {
507                 DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u rejected "
508                                     "update for this IP already in flight\n",
509                                     ctdb_addr_to_str(&vnn->public_address),
510                                     vnn->public_netmask_bits));
511                 return -1;
512         }
513
514         ret = ctdb_vnn_assign_iface(ctdb, vnn);
515         if (ret != 0) {
516                 DEBUG(DEBUG_ERR,("Takeover of IP %s/%u failed to "
517                                  "assign a usable interface\n",
518                                  ctdb_addr_to_str(&vnn->public_address),
519                                  vnn->public_netmask_bits));
520                 return -1;
521         }
522
523         state = talloc(vnn, struct ctdb_do_takeip_state);
524         CTDB_NO_MEMORY(ctdb, state);
525
526         state->c = talloc_steal(ctdb, c);
527         state->vnn   = vnn;
528
529         vnn->update_in_flight = true;
530         talloc_set_destructor(state, ctdb_takeip_destructor);
531
532         DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u on interface %s\n",
533                             ctdb_addr_to_str(&vnn->public_address),
534                             vnn->public_netmask_bits,
535                             ctdb_vnn_iface_string(vnn)));
536
537         ret = ctdb_event_script_callback(ctdb,
538                                          state,
539                                          ctdb_do_takeip_callback,
540                                          state,
541                                          CTDB_EVENT_TAKE_IP,
542                                          "%s %s %u",
543                                          ctdb_vnn_iface_string(vnn),
544                                          ctdb_addr_to_str(&vnn->public_address),
545                                          vnn->public_netmask_bits);
546
547         if (ret != 0) {
548                 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
549                         ctdb_addr_to_str(&vnn->public_address),
550                         ctdb_vnn_iface_string(vnn)));
551                 talloc_free(state);
552                 return -1;
553         }
554
555         return 0;
556 }
557
558 struct ctdb_do_updateip_state {
559         struct ctdb_req_control_old *c;
560         struct ctdb_interface *old;
561         struct ctdb_vnn *vnn;
562 };
563
564 /*
565   called when updateip event finishes
566  */
567 static void ctdb_do_updateip_callback(struct ctdb_context *ctdb, int status,
568                                       void *private_data)
569 {
570         struct ctdb_do_updateip_state *state =
571                 talloc_get_type(private_data, struct ctdb_do_updateip_state);
572         int32_t ret;
573
574         if (status != 0) {
575                 if (status == -ETIME) {
576                         ctdb_ban_self(ctdb);
577                 }
578                 DEBUG(DEBUG_ERR,
579                       ("Failed update of IP %s from interface %s to %s\n",
580                        ctdb_addr_to_str(&state->vnn->public_address),
581                        iface_string(state->old),
582                        ctdb_vnn_iface_string(state->vnn)));
583
584                 /*
585                  * All we can do is reset the old interface
586                  * and let the next run fix it
587                  */
588                 ctdb_vnn_unassign_iface(ctdb, state->vnn);
589                 state->vnn->iface = state->old;
590                 state->vnn->iface->references++;
591
592                 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
593                 talloc_free(state);
594                 return;
595         }
596
597         if (ctdb->do_checkpublicip) {
598
599         ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
600         if (ret != 0) {
601                 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
602                 talloc_free(state);
603                 return;
604         }
605
606         }
607
608         /* the control succeeded */
609         ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
610         talloc_free(state);
611         return;
612 }
613
614 static int ctdb_updateip_destructor(struct ctdb_do_updateip_state *state)
615 {
616         state->vnn->update_in_flight = false;
617         return 0;
618 }
619
620 /*
621   update (move) an ip address
622  */
623 static int32_t ctdb_do_updateip(struct ctdb_context *ctdb,
624                                 struct ctdb_req_control_old *c,
625                                 struct ctdb_vnn *vnn)
626 {
627         int ret;
628         struct ctdb_do_updateip_state *state;
629         struct ctdb_interface *old = vnn->iface;
630         const char *old_name = iface_string(old);
631         const char *new_name;
632
633         if (vnn->update_in_flight) {
634                 DEBUG(DEBUG_NOTICE,("Update of IP %s/%u rejected "
635                                     "update for this IP already in flight\n",
636                                     ctdb_addr_to_str(&vnn->public_address),
637                                     vnn->public_netmask_bits));
638                 return -1;
639         }
640
641         ctdb_vnn_unassign_iface(ctdb, vnn);
642         ret = ctdb_vnn_assign_iface(ctdb, vnn);
643         if (ret != 0) {
644                 DEBUG(DEBUG_ERR,("Update of IP %s/%u failed to "
645                                  "assign a usable interface (old iface '%s')\n",
646                                  ctdb_addr_to_str(&vnn->public_address),
647                                  vnn->public_netmask_bits,
648                                  old_name));
649                 return -1;
650         }
651
652         new_name = ctdb_vnn_iface_string(vnn);
653         if (old_name != NULL && new_name != NULL &&
654             strcmp(old_name, new_name) == 0) {
655                 /* A benign update from one interface onto itself.
656                  * no need to run the eventscripts in this case, just return
657                  * success.
658                  */
659                 ctdb_request_control_reply(ctdb, c, NULL, 0, NULL);
660                 return 0;
661         }
662
663         state = talloc(vnn, struct ctdb_do_updateip_state);
664         CTDB_NO_MEMORY(ctdb, state);
665
666         state->c = talloc_steal(ctdb, c);
667         state->old = old;
668         state->vnn = vnn;
669
670         vnn->update_in_flight = true;
671         talloc_set_destructor(state, ctdb_updateip_destructor);
672
673         DEBUG(DEBUG_NOTICE,("Update of IP %s/%u from "
674                             "interface %s to %s\n",
675                             ctdb_addr_to_str(&vnn->public_address),
676                             vnn->public_netmask_bits,
677                             old_name,
678                             new_name));
679
680         ret = ctdb_event_script_callback(ctdb,
681                                          state,
682                                          ctdb_do_updateip_callback,
683                                          state,
684                                          CTDB_EVENT_UPDATE_IP,
685                                          "%s %s %s %u",
686                                          old_name,
687                                          new_name,
688                                          ctdb_addr_to_str(&vnn->public_address),
689                                          vnn->public_netmask_bits);
690         if (ret != 0) {
691                 DEBUG(DEBUG_ERR,
692                       ("Failed update IP %s from interface %s to %s\n",
693                        ctdb_addr_to_str(&vnn->public_address),
694                        old_name, new_name));
695                 talloc_free(state);
696                 return -1;
697         }
698
699         return 0;
700 }
701
702 /*
703   Find the vnn of the node that has a public ip address
704   returns -1 if the address is not known as a public address
705  */
706 static struct ctdb_vnn *find_public_ip_vnn(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
707 {
708         struct ctdb_vnn *vnn;
709
710         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
711                 if (ctdb_same_ip(&vnn->public_address, addr)) {
712                         return vnn;
713                 }
714         }
715
716         return NULL;
717 }
718
719 /*
720   take over an ip address
721  */
722 int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb,
723                                  struct ctdb_req_control_old *c,
724                                  TDB_DATA indata,
725                                  bool *async_reply)
726 {
727         int ret;
728         struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
729         struct ctdb_vnn *vnn;
730         bool have_ip = false;
731         bool do_updateip = false;
732         bool do_takeip = false;
733         struct ctdb_interface *best_iface = NULL;
734
735         if (pip->pnn != ctdb->pnn) {
736                 DEBUG(DEBUG_ERR,(__location__" takeoverip called for an ip '%s' "
737                                  "with pnn %d, but we're node %d\n",
738                                  ctdb_addr_to_str(&pip->addr),
739                                  pip->pnn, ctdb->pnn));
740                 return -1;
741         }
742
743         /* update out vnn list */
744         vnn = find_public_ip_vnn(ctdb, &pip->addr);
745         if (vnn == NULL) {
746                 DEBUG(DEBUG_INFO,("takeoverip called for an ip '%s' that is not a public address\n",
747                         ctdb_addr_to_str(&pip->addr)));
748                 return 0;
749         }
750
751         if (ctdb->tunable.disable_ip_failover == 0 && ctdb->do_checkpublicip) {
752                 have_ip = ctdb_sys_have_ip(&pip->addr);
753         }
754         best_iface = ctdb_vnn_best_iface(ctdb, vnn);
755         if (best_iface == NULL) {
756                 DEBUG(DEBUG_ERR,("takeoverip of IP %s/%u failed to find"
757                                  "a usable interface (old %s, have_ip %d)\n",
758                                  ctdb_addr_to_str(&vnn->public_address),
759                                  vnn->public_netmask_bits,
760                                  ctdb_vnn_iface_string(vnn),
761                                  have_ip));
762                 return -1;
763         }
764
765         if (vnn->pnn != ctdb->pnn && have_ip && vnn->pnn != -1) {
766                 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
767                                   "and we have it on iface[%s], but it was assigned to node %d"
768                                   "and we are node %d, banning ourself\n",
769                                  ctdb_addr_to_str(&vnn->public_address),
770                                  ctdb_vnn_iface_string(vnn), vnn->pnn, ctdb->pnn));
771                 ctdb_ban_self(ctdb);
772                 return -1;
773         }
774
775         if (vnn->pnn == -1 && have_ip) {
776                 /* This will cause connections to be reset and
777                  * reestablished.  However, this is a very unusual
778                  * situation and doing this will completely repair the
779                  * inconsistency in the VNN.
780                  */
781                 DEBUG(DEBUG_WARNING,
782                       (__location__
783                        " Doing updateip for IP %s already on an interface\n",
784                        ctdb_addr_to_str(&vnn->public_address)));
785                 do_updateip = true;
786         }
787
788         if (vnn->iface) {
789                 if (vnn->iface != best_iface) {
790                         if (!vnn->iface->link_up) {
791                                 do_updateip = true;
792                         } else if (vnn->iface->references > (best_iface->references + 1)) {
793                                 /* only move when the rebalance gains something */
794                                         do_updateip = true;
795                         }
796                 }
797         }
798
799         if (!have_ip) {
800                 if (do_updateip) {
801                         ctdb_vnn_unassign_iface(ctdb, vnn);
802                         do_updateip = false;
803                 }
804                 do_takeip = true;
805         }
806
807         if (do_takeip) {
808                 ret = ctdb_do_takeip(ctdb, c, vnn);
809                 if (ret != 0) {
810                         return -1;
811                 }
812         } else if (do_updateip) {
813                 ret = ctdb_do_updateip(ctdb, c, vnn);
814                 if (ret != 0) {
815                         return -1;
816                 }
817         } else {
818                 /*
819                  * The interface is up and the kernel known the ip
820                  * => do nothing
821                  */
822                 DEBUG(DEBUG_INFO,("Redundant takeover of IP %s/%u on interface %s (ip already held)\n",
823                         ctdb_addr_to_str(&pip->addr),
824                         vnn->public_netmask_bits,
825                         ctdb_vnn_iface_string(vnn)));
826                 return 0;
827         }
828
829         /* tell ctdb_control.c that we will be replying asynchronously */
830         *async_reply = true;
831
832         return 0;
833 }
834
835 static void do_delete_ip(struct ctdb_context *ctdb, struct ctdb_vnn *vnn)
836 {
837         DLIST_REMOVE(ctdb->vnn, vnn);
838         ctdb_vnn_unassign_iface(ctdb, vnn);
839         ctdb_remove_orphaned_ifaces(ctdb, vnn);
840         talloc_free(vnn);
841 }
842
843 static struct ctdb_vnn *release_ip_post(struct ctdb_context *ctdb,
844                                         struct ctdb_vnn *vnn,
845                                         ctdb_sock_addr *addr)
846 {
847         TDB_DATA data;
848
849         /* Send a message to all clients of this node telling them
850          * that the cluster has been reconfigured and they should
851          * close any connections on this IP address
852          */
853         data.dptr = (uint8_t *)ctdb_addr_to_str(addr);
854         data.dsize = strlen((char *)data.dptr)+1;
855         DEBUG(DEBUG_INFO, ("Sending RELEASE_IP message for %s\n", data.dptr));
856         ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_RELEASE_IP, data);
857
858         ctdb_vnn_unassign_iface(ctdb, vnn);
859
860         /* Process the IP if it has been marked for deletion */
861         if (vnn->delete_pending) {
862                 do_delete_ip(ctdb, vnn);
863                 return NULL;
864         }
865
866         return vnn;
867 }
868
869 struct release_ip_callback_state {
870         struct ctdb_req_control_old *c;
871         ctdb_sock_addr *addr;
872         struct ctdb_vnn *vnn;
873 };
874
875 /*
876   called when releaseip event finishes
877  */
878 static void release_ip_callback(struct ctdb_context *ctdb, int status,
879                                 void *private_data)
880 {
881         struct release_ip_callback_state *state =
882                 talloc_get_type(private_data, struct release_ip_callback_state);
883
884         if (status == -ETIME) {
885                 ctdb_ban_self(ctdb);
886         }
887
888         if (ctdb->tunable.disable_ip_failover == 0 && ctdb->do_checkpublicip) {
889                 if  (ctdb_sys_have_ip(state->addr)) {
890                         DEBUG(DEBUG_ERR,
891                               ("IP %s still hosted during release IP callback, failing\n",
892                                ctdb_addr_to_str(state->addr)));
893                         ctdb_request_control_reply(ctdb, state->c,
894                                                    NULL, -1, NULL);
895                         talloc_free(state);
896                         return;
897                 }
898         }
899
900         state->vnn = release_ip_post(ctdb, state->vnn, state->addr);
901
902         /* the control succeeded */
903         ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
904         talloc_free(state);
905 }
906
907 static int ctdb_releaseip_destructor(struct release_ip_callback_state *state)
908 {
909         if (state->vnn != NULL) {
910                 state->vnn->update_in_flight = false;
911         }
912         return 0;
913 }
914
915 /*
916   release an ip address
917  */
918 int32_t ctdb_control_release_ip(struct ctdb_context *ctdb, 
919                                 struct ctdb_req_control_old *c,
920                                 TDB_DATA indata, 
921                                 bool *async_reply)
922 {
923         int ret;
924         struct release_ip_callback_state *state;
925         struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
926         struct ctdb_vnn *vnn;
927         char *iface;
928
929         /* update our vnn list */
930         vnn = find_public_ip_vnn(ctdb, &pip->addr);
931         if (vnn == NULL) {
932                 DEBUG(DEBUG_INFO,("releaseip called for an ip '%s' that is not a public address\n",
933                         ctdb_addr_to_str(&pip->addr)));
934                 return 0;
935         }
936         vnn->pnn = pip->pnn;
937
938         /* stop any previous arps */
939         talloc_free(vnn->takeover_ctx);
940         vnn->takeover_ctx = NULL;
941
942         /* Some ctdb tool commands (e.g. moveip) send
943          * lazy multicast to drop an IP from any node that isn't the
944          * intended new node.  The following causes makes ctdbd ignore
945          * a release for any address it doesn't host.
946          */
947         if (ctdb->tunable.disable_ip_failover == 0 && ctdb->do_checkpublicip) {
948                 if (!ctdb_sys_have_ip(&pip->addr)) {
949                         DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u on interface %s (ip not held)\n",
950                                 ctdb_addr_to_str(&pip->addr),
951                                 vnn->public_netmask_bits,
952                                 ctdb_vnn_iface_string(vnn)));
953                         ctdb_vnn_unassign_iface(ctdb, vnn);
954                         return 0;
955                 }
956         } else {
957                 if (vnn->iface == NULL) {
958                         DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u (ip not held)\n",
959                                            ctdb_addr_to_str(&pip->addr),
960                                            vnn->public_netmask_bits));
961                         return 0;
962                 }
963         }
964
965         /* There is a potential race between take_ip and us because we
966          * update the VNN via a callback that run when the
967          * eventscripts have been run.  Avoid the race by allowing one
968          * update to be in flight at a time.
969          */
970         if (vnn->update_in_flight) {
971                 DEBUG(DEBUG_NOTICE,("Release of IP %s/%u rejected "
972                                     "update for this IP already in flight\n",
973                                     ctdb_addr_to_str(&vnn->public_address),
974                                     vnn->public_netmask_bits));
975                 return -1;
976         }
977
978         iface = strdup(ctdb_vnn_iface_string(vnn));
979
980         DEBUG(DEBUG_NOTICE,("Release of IP %s/%u on interface %s  node:%d\n",
981                 ctdb_addr_to_str(&pip->addr),
982                 vnn->public_netmask_bits,
983                 iface,
984                 pip->pnn));
985
986         state = talloc(ctdb, struct release_ip_callback_state);
987         if (state == NULL) {
988                 ctdb_set_error(ctdb, "Out of memory at %s:%d",
989                                __FILE__, __LINE__);
990                 free(iface);
991                 return -1;
992         }
993
994         state->c = talloc_steal(state, c);
995         state->addr = talloc(state, ctdb_sock_addr);       
996         if (state->addr == NULL) {
997                 ctdb_set_error(ctdb, "Out of memory at %s:%d",
998                                __FILE__, __LINE__);
999                 free(iface);
1000                 talloc_free(state);
1001                 return -1;
1002         }
1003         *state->addr = pip->addr;
1004         state->vnn   = vnn;
1005
1006         vnn->update_in_flight = true;
1007         talloc_set_destructor(state, ctdb_releaseip_destructor);
1008
1009         ret = ctdb_event_script_callback(ctdb, 
1010                                          state, release_ip_callback, state,
1011                                          CTDB_EVENT_RELEASE_IP,
1012                                          "%s %s %u",
1013                                          iface,
1014                                          ctdb_addr_to_str(&pip->addr),
1015                                          vnn->public_netmask_bits);
1016         free(iface);
1017         if (ret != 0) {
1018                 DEBUG(DEBUG_ERR,(__location__ " Failed to release IP %s on interface %s\n",
1019                         ctdb_addr_to_str(&pip->addr),
1020                         ctdb_vnn_iface_string(vnn)));
1021                 talloc_free(state);
1022                 return -1;
1023         }
1024
1025         /* tell the control that we will be reply asynchronously */
1026         *async_reply = true;
1027         return 0;
1028 }
1029
1030 static int ctdb_add_public_address(struct ctdb_context *ctdb,
1031                                    ctdb_sock_addr *addr,
1032                                    unsigned mask, const char *ifaces,
1033                                    bool check_address)
1034 {
1035         struct ctdb_vnn      *vnn;
1036         uint32_t num = 0;
1037         char *tmp;
1038         const char *iface;
1039         int i;
1040         int ret;
1041
1042         tmp = strdup(ifaces);
1043         for (iface = strtok(tmp, ","); iface; iface = strtok(NULL, ",")) {
1044                 if (!ctdb_sys_check_iface_exists(iface)) {
1045                         DEBUG(DEBUG_CRIT,("Interface %s does not exist. Can not add public-address : %s\n", iface, ctdb_addr_to_str(addr)));
1046                         free(tmp);
1047                         return -1;
1048                 }
1049         }
1050         free(tmp);
1051
1052         /* Verify that we don't have an entry for this ip yet */
1053         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1054                 if (ctdb_same_sockaddr(addr, &vnn->public_address)) {
1055                         DEBUG(DEBUG_CRIT,("Same ip '%s' specified multiple times in the public address list \n", 
1056                                 ctdb_addr_to_str(addr)));
1057                         return -1;
1058                 }               
1059         }
1060
1061         /* create a new vnn structure for this ip address */
1062         vnn = talloc_zero(ctdb, struct ctdb_vnn);
1063         CTDB_NO_MEMORY_FATAL(ctdb, vnn);
1064         vnn->ifaces = talloc_array(vnn, const char *, num + 2);
1065         tmp = talloc_strdup(vnn, ifaces);
1066         CTDB_NO_MEMORY_FATAL(ctdb, tmp);
1067         for (iface = strtok(tmp, ","); iface; iface = strtok(NULL, ",")) {
1068                 vnn->ifaces = talloc_realloc(vnn, vnn->ifaces, const char *, num + 2);
1069                 CTDB_NO_MEMORY_FATAL(ctdb, vnn->ifaces);
1070                 vnn->ifaces[num] = talloc_strdup(vnn, iface);
1071                 CTDB_NO_MEMORY_FATAL(ctdb, vnn->ifaces[num]);
1072                 num++;
1073         }
1074         talloc_free(tmp);
1075         vnn->ifaces[num] = NULL;
1076         vnn->public_address      = *addr;
1077         vnn->public_netmask_bits = mask;
1078         vnn->pnn                 = -1;
1079
1080         for (i=0; vnn->ifaces[i]; i++) {
1081                 ret = ctdb_add_local_iface(ctdb, vnn->ifaces[i]);
1082                 if (ret != 0) {
1083                         DEBUG(DEBUG_CRIT, (__location__ " failed to add iface[%s] "
1084                                            "for public_address[%s]\n",
1085                                            vnn->ifaces[i], ctdb_addr_to_str(addr)));
1086                         talloc_free(vnn);
1087                         return -1;
1088                 }
1089         }
1090
1091         DLIST_ADD(ctdb->vnn, vnn);
1092
1093         return 0;
1094 }
1095
1096 /*
1097   setup the public address lists from a file
1098 */
1099 int ctdb_set_public_addresses(struct ctdb_context *ctdb, bool check_addresses)
1100 {
1101         char **lines;
1102         int nlines;
1103         int i;
1104
1105         lines = file_lines_load(ctdb->public_addresses_file, &nlines, 0, ctdb);
1106         if (lines == NULL) {
1107                 ctdb_set_error(ctdb, "Failed to load public address list '%s'\n", ctdb->public_addresses_file);
1108                 return -1;
1109         }
1110         while (nlines > 0 && strcmp(lines[nlines-1], "") == 0) {
1111                 nlines--;
1112         }
1113
1114         for (i=0;i<nlines;i++) {
1115                 unsigned mask;
1116                 ctdb_sock_addr addr;
1117                 const char *addrstr;
1118                 const char *ifaces;
1119                 char *tok, *line;
1120
1121                 line = lines[i];
1122                 while ((*line == ' ') || (*line == '\t')) {
1123                         line++;
1124                 }
1125                 if (*line == '#') {
1126                         continue;
1127                 }
1128                 if (strcmp(line, "") == 0) {
1129                         continue;
1130                 }
1131                 tok = strtok(line, " \t");
1132                 addrstr = tok;
1133                 tok = strtok(NULL, " \t");
1134                 if (tok == NULL) {
1135                         if (NULL == ctdb->default_public_interface) {
1136                                 DEBUG(DEBUG_CRIT,("No default public interface and no interface specified at line %u of public address list\n",
1137                                          i+1));
1138                                 talloc_free(lines);
1139                                 return -1;
1140                         }
1141                         ifaces = ctdb->default_public_interface;
1142                 } else {
1143                         ifaces = tok;
1144                 }
1145
1146                 if (!addrstr || !parse_ip_mask(addrstr, ifaces, &addr, &mask)) {
1147                         DEBUG(DEBUG_CRIT,("Badly formed line %u in public address list\n", i+1));
1148                         talloc_free(lines);
1149                         return -1;
1150                 }
1151                 if (ctdb_add_public_address(ctdb, &addr, mask, ifaces, check_addresses)) {
1152                         DEBUG(DEBUG_CRIT,("Failed to add line %u to the public address list\n", i+1));
1153                         talloc_free(lines);
1154                         return -1;
1155                 }
1156         }
1157
1158
1159         talloc_free(lines);
1160         return 0;
1161 }
1162
1163 static struct ctdb_public_ip_list *
1164 ctdb_fetch_remote_public_ips(struct ctdb_context *ctdb,
1165                              TALLOC_CTX *mem_ctx,
1166                              struct ctdb_node_map_old *nodemap,
1167                              uint32_t public_ip_flags)
1168 {
1169         int j, ret;
1170         struct ctdb_public_ip_list_old *ip_list;
1171         struct ctdb_public_ip_list *public_ips;
1172
1173         public_ips = talloc_zero_array(mem_ctx,
1174                                        struct ctdb_public_ip_list,
1175                                        nodemap->num);
1176         if (public_ips == NULL) {
1177                 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
1178                 return NULL;
1179         }
1180
1181         for (j = 0; j < nodemap->num; j++) {
1182                 if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) {
1183                         continue;
1184                 }
1185
1186                 /* Retrieve the list of public IPs from the
1187                  * node. Flags says whether it is known or
1188                  * available. */
1189                 ret = ctdb_ctrl_get_public_ips_flags(
1190                         ctdb, TAKEOVER_TIMEOUT(), j, public_ips,
1191                         public_ip_flags, &ip_list);
1192                 if (ret != 0) {
1193                         DEBUG(DEBUG_ERR,
1194                               ("Failed to read public IPs from node: %u\n", j));
1195                         talloc_free(public_ips);
1196                         return NULL;
1197                 }
1198                 public_ips[j].num = ip_list->num;
1199                 if (ip_list->num == 0) {
1200                         talloc_free(ip_list);
1201                         continue;
1202                 }
1203                 public_ips[j].ip = talloc_zero_array(public_ips,
1204                                                      struct ctdb_public_ip,
1205                                                      ip_list->num);
1206                 if (public_ips[j].ip == NULL) {
1207                         DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
1208                         talloc_free(public_ips);
1209                         return NULL;
1210                 }
1211                 memcpy(public_ips[j].ip, &ip_list->ips[0],
1212                        sizeof(struct ctdb_public_ip) * ip_list->num);
1213                 talloc_free(ip_list);
1214         }
1215
1216         return public_ips;
1217 }
1218
1219 struct get_tunable_callback_data {
1220         const char *tunable;
1221         uint32_t *out;
1222         bool fatal;
1223 };
1224
1225 static void get_tunable_callback(struct ctdb_context *ctdb, uint32_t pnn,
1226                                  int32_t res, TDB_DATA outdata,
1227                                  void *callback)
1228 {
1229         struct get_tunable_callback_data *cd =
1230                 (struct get_tunable_callback_data *)callback;
1231         int size;
1232
1233         if (res != 0) {
1234                 /* Already handled in fail callback */
1235                 return;
1236         }
1237
1238         if (outdata.dsize != sizeof(uint32_t)) {
1239                 DEBUG(DEBUG_ERR,("Wrong size of returned data when reading \"%s\" tunable from node %d. Expected %d bytes but received %d bytes\n",
1240                                  cd->tunable, pnn, (int)sizeof(uint32_t),
1241                                  (int)outdata.dsize));
1242                 cd->fatal = true;
1243                 return;
1244         }
1245
1246         size = talloc_array_length(cd->out);
1247         if (pnn >= size) {
1248                 DEBUG(DEBUG_ERR,("Got %s reply from node %d but nodemap only has %d entries\n",
1249                                  cd->tunable, pnn, size));
1250                 return;
1251         }
1252
1253                 
1254         cd->out[pnn] = *(uint32_t *)outdata.dptr;
1255 }
1256
1257 static void get_tunable_fail_callback(struct ctdb_context *ctdb, uint32_t pnn,
1258                                        int32_t res, TDB_DATA outdata,
1259                                        void *callback)
1260 {
1261         struct get_tunable_callback_data *cd =
1262                 (struct get_tunable_callback_data *)callback;
1263
1264         switch (res) {
1265         case -ETIME:
1266                 DEBUG(DEBUG_ERR,
1267                       ("Timed out getting tunable \"%s\" from node %d\n",
1268                        cd->tunable, pnn));
1269                 cd->fatal = true;
1270                 break;
1271         case -EINVAL:
1272         case -1:
1273                 DEBUG(DEBUG_WARNING,
1274                       ("Tunable \"%s\" not implemented on node %d\n",
1275                        cd->tunable, pnn));
1276                 break;
1277         default:
1278                 DEBUG(DEBUG_ERR,
1279                       ("Unexpected error getting tunable \"%s\" from node %d\n",
1280                        cd->tunable, pnn));
1281                 cd->fatal = true;
1282         }
1283 }
1284
1285 static uint32_t *get_tunable_from_nodes(struct ctdb_context *ctdb,
1286                                         TALLOC_CTX *tmp_ctx,
1287                                         struct ctdb_node_map_old *nodemap,
1288                                         const char *tunable,
1289                                         uint32_t default_value)
1290 {
1291         TDB_DATA data;
1292         struct ctdb_control_get_tunable *t;
1293         uint32_t *nodes;
1294         uint32_t *tvals;
1295         struct get_tunable_callback_data callback_data;
1296         int i;
1297
1298         tvals = talloc_array(tmp_ctx, uint32_t, nodemap->num);
1299         CTDB_NO_MEMORY_NULL(ctdb, tvals);
1300         for (i=0; i<nodemap->num; i++) {
1301                 tvals[i] = default_value;
1302         }
1303                 
1304         callback_data.out = tvals;
1305         callback_data.tunable = tunable;
1306         callback_data.fatal = false;
1307
1308         data.dsize = offsetof(struct ctdb_control_get_tunable, name) + strlen(tunable) + 1;
1309         data.dptr  = talloc_size(tmp_ctx, data.dsize);
1310         t = (struct ctdb_control_get_tunable *)data.dptr;
1311         t->length = strlen(tunable)+1;
1312         memcpy(t->name, tunable, t->length);
1313         nodes = list_of_connected_nodes(ctdb, nodemap, tmp_ctx, true);
1314         if (ctdb_client_async_control(ctdb, CTDB_CONTROL_GET_TUNABLE,
1315                                       nodes, 0, TAKEOVER_TIMEOUT(),
1316                                       false, data,
1317                                       get_tunable_callback,
1318                                       get_tunable_fail_callback,
1319                                       &callback_data) != 0) {
1320                 if (callback_data.fatal) {
1321                         talloc_free(tvals);
1322                         tvals = NULL;
1323                 }
1324         }
1325         talloc_free(nodes);
1326         talloc_free(data.dptr);
1327
1328         return tvals;
1329 }
1330
1331 static struct ctdb_node_map *
1332 ctdb_node_map_old_to_new(TALLOC_CTX *mem_ctx,
1333                          const struct ctdb_node_map_old *old)
1334 {
1335         struct ctdb_node_map *new;
1336
1337         new = talloc(mem_ctx, struct ctdb_node_map);
1338         if (new == NULL) {
1339                 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
1340                 return NULL;
1341         }
1342         new->num = old->num;
1343         new->node = talloc_zero_array(new,
1344                                       struct ctdb_node_and_flags, new->num);
1345         memcpy(new->node, &old->nodes[0],
1346                sizeof(struct ctdb_node_and_flags) * new->num);
1347
1348         return new;
1349 }
1350
1351
1352 static bool set_ipflags(struct ctdb_context *ctdb,
1353                         struct ipalloc_state *ipalloc_state,
1354                         struct ctdb_node_map_old *nodemap)
1355 {
1356         uint32_t *tval_noiptakeover;
1357         uint32_t *tval_noiphostonalldisabled;
1358         struct ctdb_node_map *new;
1359
1360         tval_noiptakeover = get_tunable_from_nodes(ctdb, ipalloc_state, nodemap,
1361                                                    "NoIPTakeover", 0);
1362         if (tval_noiptakeover == NULL) {
1363                 return false;
1364         }
1365
1366         tval_noiphostonalldisabled =
1367                 get_tunable_from_nodes(ctdb, ipalloc_state, nodemap,
1368                                        "NoIPHostOnAllDisabled", 0);
1369         if (tval_noiphostonalldisabled == NULL) {
1370                 /* Caller frees tmp_ctx */
1371                 return false;
1372         }
1373
1374         new = ctdb_node_map_old_to_new(ipalloc_state, nodemap);
1375         if (new == NULL) {
1376                 return false;
1377         }
1378
1379         ipalloc_set_node_flags(ipalloc_state, new,
1380                              tval_noiptakeover,
1381                              tval_noiphostonalldisabled);
1382
1383         talloc_free(tval_noiptakeover);
1384         talloc_free(tval_noiphostonalldisabled);
1385         talloc_free(new);
1386
1387         return true;
1388 }
1389
1390 static enum ipalloc_algorithm
1391 determine_algorithm(const struct ctdb_tunable_list *tunables)
1392 {
1393         if (1 == tunables->lcp2_public_ip_assignment) {
1394                 return IPALLOC_LCP2;
1395         } else if (1 == tunables->deterministic_public_ips) {
1396                 return IPALLOC_DETERMINISTIC;
1397         } else {
1398                 return IPALLOC_NONDETERMINISTIC;
1399         }
1400 }
1401
1402 struct takeover_callback_data {
1403         uint32_t num_nodes;
1404         unsigned int *fail_count;
1405 };
1406
1407 static struct takeover_callback_data *
1408 takeover_callback_data_init(TALLOC_CTX *mem_ctx,
1409                             uint32_t num_nodes)
1410 {
1411         static struct takeover_callback_data *takeover_data;
1412
1413         takeover_data = talloc_zero(mem_ctx, struct takeover_callback_data);
1414         if (takeover_data == NULL) {
1415                 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
1416                 return NULL;
1417         }
1418
1419         takeover_data->fail_count = talloc_zero_array(takeover_data,
1420                                                       unsigned int, num_nodes);
1421         if (takeover_data->fail_count == NULL) {
1422                 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
1423                 talloc_free(takeover_data);
1424                 return NULL;
1425         }
1426
1427         takeover_data->num_nodes = num_nodes;
1428
1429         return takeover_data;
1430 }
1431
1432 static void takeover_run_fail_callback(struct ctdb_context *ctdb,
1433                                        uint32_t node_pnn, int32_t res,
1434                                        TDB_DATA outdata, void *callback_data)
1435 {
1436         struct takeover_callback_data *cd =
1437                 talloc_get_type_abort(callback_data,
1438                                       struct takeover_callback_data);
1439
1440         if (node_pnn >= cd->num_nodes) {
1441                 DEBUG(DEBUG_ERR, (__location__ " invalid PNN %u\n", node_pnn));
1442                 return;
1443         }
1444
1445         if (cd->fail_count[node_pnn] == 0) {
1446                 DEBUG(DEBUG_ERR,
1447                       ("Node %u failed the takeover run\n", node_pnn));
1448         }
1449
1450         cd->fail_count[node_pnn]++;
1451 }
1452
1453 static void takeover_run_process_failures(struct ctdb_context *ctdb,
1454                                           struct takeover_callback_data *tcd)
1455 {
1456         unsigned int max_fails = 0;
1457         uint32_t max_pnn = -1;
1458         uint32_t i;
1459
1460         for (i = 0; i < tcd->num_nodes; i++) {
1461                 if (tcd->fail_count[i] > max_fails) {
1462                         max_pnn = i;
1463                         max_fails = tcd->fail_count[i];
1464                 }
1465         }
1466
1467         if (max_fails > 0) {
1468                 int ret;
1469                 TDB_DATA data;
1470
1471                 DEBUG(DEBUG_ERR,
1472                       ("Sending banning credits to %u with fail count %u\n",
1473                        max_pnn, max_fails));
1474
1475                 data.dptr = (uint8_t *)&max_pnn;
1476                 data.dsize = sizeof(uint32_t);
1477                 ret = ctdb_client_send_message(ctdb,
1478                                                CTDB_BROADCAST_CONNECTED,
1479                                                CTDB_SRVID_BANNING,
1480                                                data);
1481                 if (ret != 0) {
1482                         DEBUG(DEBUG_ERR,
1483                               ("Failed to set banning credits for node %u\n",
1484                                max_pnn));
1485                 }
1486         }
1487 }
1488
1489 /*
1490  * Recalculate the allocation of public IPs to nodes and have the
1491  * nodes host their allocated addresses.
1492  *
1493  * - Initialise IP allocation state.  Pass:
1494      + algorithm to be used;
1495      + whether IP rebalancing ("failback") should be done (this uses a
1496        cluster-wide configuration variable and only the value form the
1497        master node is used); and
1498  *   + list of nodes to force rebalance (internal structure, currently
1499  *     no way to fetch, only used by LCP2 for nodes that have had new
1500  *     IP addresses added).
1501  * - Set IP flags for IP allocation based on node map and tunables
1502  *   NoIPTakeover/NoIPHostOnAllDisabled from all connected nodes
1503  *   (tunable fetching done separately so values can be faked in unit
1504  *   testing)
1505  * - Retrieve known and available IP addresses (done separately so
1506  *   values can be faked in unit testing)
1507  * - Use ipalloc_set_public_ips() to set known and available IP
1508      addresses for allocation
1509  * - If cluster can't host IP addresses then early exit
1510  * - Run IP allocation algorithm
1511  * - Send RELEASE_IP to all nodes for IPs they should not host
1512  * - Send TAKE_IP to all nodes for IPs they should host
1513  * - Send IPREALLOCATED to all nodes (with backward compatibility hack)
1514  */
1515 int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map_old *nodemap,
1516                       uint32_t *force_rebalance_nodes)
1517 {
1518         int i, ret;
1519         struct ctdb_public_ip ip;
1520         uint32_t *nodes;
1521         struct public_ip_list *all_ips, *tmp_ip;
1522         TDB_DATA data;
1523         struct timeval timeout;
1524         struct client_async_data *async_data;
1525         struct ctdb_client_control_state *state;
1526         TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
1527         struct ipalloc_state *ipalloc_state;
1528         struct ctdb_public_ip_list *known_ips, *available_ips;
1529         struct takeover_callback_data *takeover_data;
1530
1531         /* Initialise fail callback data to be used with
1532          * takeover_run_fail_callback().  A failure in any of the
1533          * following steps will cause an early return, so this can be
1534          * reused for each of those steps without re-initialising. */
1535         takeover_data = takeover_callback_data_init(tmp_ctx,
1536                                                     nodemap->num);
1537         if (takeover_data == NULL) {
1538                 talloc_free(tmp_ctx);
1539                 return -1;
1540         }
1541
1542         /* Default timeout for early jump to IPREALLOCATED.  See below
1543          * for explanation of 3 times... */
1544         timeout = timeval_current_ofs(3 * ctdb->tunable.takeover_timeout, 0);
1545
1546         /*
1547          * ip failover is completely disabled, just send out the 
1548          * ipreallocated event.
1549          */
1550         if (ctdb->tunable.disable_ip_failover != 0) {
1551                 goto ipreallocated;
1552         }
1553
1554         ipalloc_state = ipalloc_state_init(tmp_ctx, ctdb->num_nodes,
1555                                            determine_algorithm(&ctdb->tunable),
1556                                            (ctdb->tunable.no_ip_failback != 0),
1557                                            force_rebalance_nodes);
1558         if (ipalloc_state == NULL) {
1559                 talloc_free(tmp_ctx);
1560                 return -1;
1561         }
1562
1563         if (!set_ipflags(ctdb, ipalloc_state, nodemap)) {
1564                 DEBUG(DEBUG_ERR,
1565                       ("Failed to set IP flags - aborting takeover run\n"));
1566                 talloc_free(tmp_ctx);
1567                 return -1;
1568         }
1569
1570         /* Fetch known/available public IPs from each active node */
1571         /* Fetch lists of known public IPs from all nodes */
1572         known_ips = ctdb_fetch_remote_public_ips(ctdb, ipalloc_state,
1573                                                  nodemap, 0);
1574         if (known_ips == NULL) {
1575                 DEBUG(DEBUG_ERR, ("Failed to read known public IPs\n"));
1576                 talloc_free(tmp_ctx);
1577                 return -1;
1578         }
1579         available_ips = ctdb_fetch_remote_public_ips(
1580                 ctdb, ipalloc_state, nodemap,
1581                 CTDB_PUBLIC_IP_FLAGS_ONLY_AVAILABLE);
1582         if (available_ips == NULL) {
1583                 DEBUG(DEBUG_ERR, ("Failed to read available public IPs\n"));
1584                 talloc_free(tmp_ctx);
1585                 return -1;
1586         }
1587
1588         if (! ipalloc_set_public_ips(ipalloc_state, known_ips, available_ips)) {
1589                 DEBUG(DEBUG_ERR, ("Failed to set public IPs\n"));
1590                 talloc_free(tmp_ctx);
1591                 return -1;
1592         }
1593
1594         if (! ipalloc_can_host_ips(ipalloc_state)) {
1595                 DEBUG(DEBUG_WARNING,("No nodes available to host public IPs yet\n"));
1596                 goto ipreallocated;
1597         }
1598
1599         /* Do the IP reassignment calculations */
1600         all_ips = ipalloc(ipalloc_state);
1601         if (all_ips == NULL) {
1602                 talloc_free(tmp_ctx);
1603                 return -1;
1604         }
1605
1606         /* Now tell all nodes to release any public IPs should not
1607          * host.  This will be a NOOP on nodes that don't currently
1608          * hold the given IP.
1609          */
1610         async_data = talloc_zero(tmp_ctx, struct client_async_data);
1611         CTDB_NO_MEMORY_FATAL(ctdb, async_data);
1612
1613         async_data->fail_callback = takeover_run_fail_callback;
1614         async_data->callback_data = takeover_data;
1615
1616         ZERO_STRUCT(ip); /* Avoid valgrind warnings for union */
1617
1618         /* Each of the following stages (RELEASE_IP, TAKEOVER_IP,
1619          * IPREALLOCATED) notionally has a timeout of TakeoverTimeout
1620          * seconds.  However, RELEASE_IP can take longer due to TCP
1621          * connection killing, so sometimes needs more time.
1622          * Therefore, use a cumulative timeout of TakeoverTimeout * 3
1623          * seconds across all 3 stages.  No explicit expiry checks are
1624          * needed before each stage because tevent is smart enough to
1625          * fire the timeouts even if they are in the past.  Initialise
1626          * this here so it explicitly covers the stages we're
1627          * interested in but, in particular, not the time taken by the
1628          * ipalloc().
1629          */
1630         timeout = timeval_current_ofs(3 * ctdb->tunable.takeover_timeout, 0);
1631
1632         /* Send a RELEASE_IP to all nodes that should not be hosting
1633          * each IP.  For each IP, all but one of these will be
1634          * redundant.  However, the redundant ones are used to tell
1635          * nodes which node should be hosting the IP so that commands
1636          * like "ctdb ip" can display a particular nodes idea of who
1637          * is hosting what. */
1638         for (i=0;i<nodemap->num;i++) {
1639                 /* don't talk to unconnected nodes, but do talk to banned nodes */
1640                 if (nodemap->nodes[i].flags & NODE_FLAGS_DISCONNECTED) {
1641                         continue;
1642                 }
1643
1644                 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1645                         if (tmp_ip->pnn == nodemap->nodes[i].pnn) {
1646                                 /* This node should be serving this
1647                                    vnn so don't tell it to release the ip
1648                                 */
1649                                 continue;
1650                         }
1651                         ip.pnn  = tmp_ip->pnn;
1652                         ip.addr = tmp_ip->addr;
1653
1654                         data.dsize = sizeof(ip);
1655                         data.dptr  = (uint8_t *)&ip;
1656                         state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
1657                                                   0, CTDB_CONTROL_RELEASE_IP, 0,
1658                                                   data, async_data,
1659                                                   &timeout, NULL);
1660                         if (state == NULL) {
1661                                 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_RELEASE_IP to node %u\n", nodemap->nodes[i].pnn));
1662                                 talloc_free(tmp_ctx);
1663                                 return -1;
1664                         }
1665
1666                         ctdb_client_async_add(async_data, state);
1667                 }
1668         }
1669         if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1670                 DEBUG(DEBUG_ERR,
1671                       ("Async control CTDB_CONTROL_RELEASE_IP failed\n"));
1672                 goto fail;
1673         }
1674         talloc_free(async_data);
1675
1676
1677         /* For each IP, send a TAKOVER_IP to the node that should be
1678          * hosting it.  Many of these will often be redundant (since
1679          * the allocation won't have changed) but they can be useful
1680          * to recover from inconsistencies. */
1681         async_data = talloc_zero(tmp_ctx, struct client_async_data);
1682         CTDB_NO_MEMORY_FATAL(ctdb, async_data);
1683
1684         async_data->fail_callback = takeover_run_fail_callback;
1685         async_data->callback_data = takeover_data;
1686
1687         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1688                 if (tmp_ip->pnn == -1) {
1689                         /* this IP won't be taken over */
1690                         continue;
1691                 }
1692
1693                 ip.pnn  = tmp_ip->pnn;
1694                 ip.addr = tmp_ip->addr;
1695
1696                 data.dsize = sizeof(ip);
1697                 data.dptr  = (uint8_t *)&ip;
1698                 state = ctdb_control_send(ctdb, tmp_ip->pnn,
1699                                           0, CTDB_CONTROL_TAKEOVER_IP, 0,
1700                                           data, async_data, &timeout, NULL);
1701                 if (state == NULL) {
1702                         DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_TAKEOVER_IP to node %u\n", tmp_ip->pnn));
1703                         talloc_free(tmp_ctx);
1704                         return -1;
1705                 }
1706
1707                 ctdb_client_async_add(async_data, state);
1708         }
1709         if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1710                 DEBUG(DEBUG_ERR,
1711                       ("Async control CTDB_CONTROL_TAKEOVER_IP failed\n"));
1712                 goto fail;
1713         }
1714
1715 ipreallocated:
1716         /*
1717          * Tell all nodes to run eventscripts to process the
1718          * "ipreallocated" event.  This can do a lot of things,
1719          * including restarting services to reconfigure them if public
1720          * IPs have moved.  Once upon a time this event only used to
1721          * update natgw.
1722          */
1723         nodes = list_of_connected_nodes(ctdb, nodemap, tmp_ctx, true);
1724         ret = ctdb_client_async_control(ctdb, CTDB_CONTROL_IPREALLOCATED,
1725                                         nodes, 0, timeout,
1726                                         false, tdb_null,
1727                                         NULL, takeover_run_fail_callback,
1728                                         takeover_data);
1729         if (ret != 0) {
1730                 DEBUG(DEBUG_ERR,
1731                       ("Async CTDB_CONTROL_IPREALLOCATED control failed\n"));
1732                 goto fail;
1733         }
1734
1735         talloc_free(tmp_ctx);
1736         return ret;
1737
1738 fail:
1739         takeover_run_process_failures(ctdb, takeover_data);
1740         talloc_free(tmp_ctx);
1741         return -1;
1742 }
1743
1744
1745 /*
1746   destroy a ctdb_client_ip structure
1747  */
1748 static int ctdb_client_ip_destructor(struct ctdb_client_ip *ip)
1749 {
1750         DEBUG(DEBUG_DEBUG,("destroying client tcp for %s:%u (client_id %u)\n",
1751                 ctdb_addr_to_str(&ip->addr),
1752                 ntohs(ip->addr.ip.sin_port),
1753                 ip->client_id));
1754
1755         DLIST_REMOVE(ip->ctdb->client_ip_list, ip);
1756         return 0;
1757 }
1758
1759 /*
1760   called by a client to inform us of a TCP connection that it is managing
1761   that should tickled with an ACK when IP takeover is done
1762  */
1763 int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id,
1764                                 TDB_DATA indata)
1765 {
1766         struct ctdb_client *client = reqid_find(ctdb->idr, client_id, struct ctdb_client);
1767         struct ctdb_connection *tcp_sock = NULL;
1768         struct ctdb_tcp_list *tcp;
1769         struct ctdb_connection t;
1770         int ret;
1771         TDB_DATA data;
1772         struct ctdb_client_ip *ip;
1773         struct ctdb_vnn *vnn;
1774         ctdb_sock_addr addr;
1775
1776         /* If we don't have public IPs, tickles are useless */
1777         if (ctdb->vnn == NULL) {
1778                 return 0;
1779         }
1780
1781         tcp_sock = (struct ctdb_connection *)indata.dptr;
1782
1783         addr = tcp_sock->src;
1784         ctdb_canonicalize_ip(&addr,  &tcp_sock->src);
1785         addr = tcp_sock->dst;
1786         ctdb_canonicalize_ip(&addr, &tcp_sock->dst);
1787
1788         ZERO_STRUCT(addr);
1789         memcpy(&addr, &tcp_sock->dst, sizeof(addr));
1790         vnn = find_public_ip_vnn(ctdb, &addr);
1791         if (vnn == NULL) {
1792                 switch (addr.sa.sa_family) {
1793                 case AF_INET:
1794                         if (ntohl(addr.ip.sin_addr.s_addr) != INADDR_LOOPBACK) {
1795                                 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public address.\n", 
1796                                         ctdb_addr_to_str(&addr)));
1797                         }
1798                         break;
1799                 case AF_INET6:
1800                         DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public ipv6 address.\n", 
1801                                 ctdb_addr_to_str(&addr)));
1802                         break;
1803                 default:
1804                         DEBUG(DEBUG_ERR,(__location__ " Unknown family type %d\n", addr.sa.sa_family));
1805                 }
1806
1807                 return 0;
1808         }
1809
1810         if (vnn->pnn != ctdb->pnn) {
1811                 DEBUG(DEBUG_ERR,("Attempt to register tcp client for IP %s we don't hold - failing (client_id %u pid %u)\n",
1812                         ctdb_addr_to_str(&addr),
1813                         client_id, client->pid));
1814                 /* failing this call will tell smbd to die */
1815                 return -1;
1816         }
1817
1818         ip = talloc(client, struct ctdb_client_ip);
1819         CTDB_NO_MEMORY(ctdb, ip);
1820
1821         ip->ctdb      = ctdb;
1822         ip->addr      = addr;
1823         ip->client_id = client_id;
1824         talloc_set_destructor(ip, ctdb_client_ip_destructor);
1825         DLIST_ADD(ctdb->client_ip_list, ip);
1826
1827         tcp = talloc(client, struct ctdb_tcp_list);
1828         CTDB_NO_MEMORY(ctdb, tcp);
1829
1830         tcp->connection.src = tcp_sock->src;
1831         tcp->connection.dst = tcp_sock->dst;
1832
1833         DLIST_ADD(client->tcp_list, tcp);
1834
1835         t.src = tcp_sock->src;
1836         t.dst = tcp_sock->dst;
1837
1838         data.dptr = (uint8_t *)&t;
1839         data.dsize = sizeof(t);
1840
1841         switch (addr.sa.sa_family) {
1842         case AF_INET:
1843                 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1844                         (unsigned)ntohs(tcp_sock->dst.ip.sin_port),
1845                         ctdb_addr_to_str(&tcp_sock->src),
1846                         (unsigned)ntohs(tcp_sock->src.ip.sin_port), client_id, client->pid));
1847                 break;
1848         case AF_INET6:
1849                 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1850                         (unsigned)ntohs(tcp_sock->dst.ip6.sin6_port),
1851                         ctdb_addr_to_str(&tcp_sock->src),
1852                         (unsigned)ntohs(tcp_sock->src.ip6.sin6_port), client_id, client->pid));
1853                 break;
1854         default:
1855                 DEBUG(DEBUG_ERR,(__location__ " Unknown family %d\n", addr.sa.sa_family));
1856         }
1857
1858
1859         /* tell all nodes about this tcp connection */
1860         ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0, 
1861                                        CTDB_CONTROL_TCP_ADD,
1862                                        0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
1863         if (ret != 0) {
1864                 DEBUG(DEBUG_ERR,(__location__ " Failed to send CTDB_CONTROL_TCP_ADD\n"));
1865                 return -1;
1866         }
1867
1868         return 0;
1869 }
1870
1871 /*
1872   find a tcp address on a list
1873  */
1874 static struct ctdb_connection *ctdb_tcp_find(struct ctdb_tcp_array *array,
1875                                            struct ctdb_connection *tcp)
1876 {
1877         int i;
1878
1879         if (array == NULL) {
1880                 return NULL;
1881         }
1882
1883         for (i=0;i<array->num;i++) {
1884                 if (ctdb_same_sockaddr(&array->connections[i].src, &tcp->src) &&
1885                     ctdb_same_sockaddr(&array->connections[i].dst, &tcp->dst)) {
1886                         return &array->connections[i];
1887                 }
1888         }
1889         return NULL;
1890 }
1891
1892
1893
1894 /*
1895   called by a daemon to inform us of a TCP connection that one of its
1896   clients managing that should tickled with an ACK when IP takeover is
1897   done
1898  */
1899 int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata, bool tcp_update_needed)
1900 {
1901         struct ctdb_connection *p = (struct ctdb_connection *)indata.dptr;
1902         struct ctdb_tcp_array *tcparray;
1903         struct ctdb_connection tcp;
1904         struct ctdb_vnn *vnn;
1905
1906         /* If we don't have public IPs, tickles are useless */
1907         if (ctdb->vnn == NULL) {
1908                 return 0;
1909         }
1910
1911         vnn = find_public_ip_vnn(ctdb, &p->dst);
1912         if (vnn == NULL) {
1913                 DEBUG(DEBUG_INFO,(__location__ " got TCP_ADD control for an address which is not a public address '%s'\n",
1914                         ctdb_addr_to_str(&p->dst)));
1915
1916                 return -1;
1917         }
1918
1919
1920         tcparray = vnn->tcp_array;
1921
1922         /* If this is the first tickle */
1923         if (tcparray == NULL) {
1924                 tcparray = talloc(vnn, struct ctdb_tcp_array);
1925                 CTDB_NO_MEMORY(ctdb, tcparray);
1926                 vnn->tcp_array = tcparray;
1927
1928                 tcparray->num = 0;
1929                 tcparray->connections = talloc_size(tcparray, sizeof(struct ctdb_connection));
1930                 CTDB_NO_MEMORY(ctdb, tcparray->connections);
1931
1932                 tcparray->connections[tcparray->num].src = p->src;
1933                 tcparray->connections[tcparray->num].dst = p->dst;
1934                 tcparray->num++;
1935
1936                 if (tcp_update_needed) {
1937                         vnn->tcp_update_needed = true;
1938                 }
1939                 return 0;
1940         }
1941
1942
1943         /* Do we already have this tickle ?*/
1944         tcp.src = p->src;
1945         tcp.dst = p->dst;
1946         if (ctdb_tcp_find(tcparray, &tcp) != NULL) {
1947                 DEBUG(DEBUG_DEBUG,("Already had tickle info for %s:%u for vnn:%u\n",
1948                         ctdb_addr_to_str(&tcp.dst),
1949                         ntohs(tcp.dst.ip.sin_port),
1950                         vnn->pnn));
1951                 return 0;
1952         }
1953
1954         /* A new tickle, we must add it to the array */
1955         tcparray->connections = talloc_realloc(tcparray, tcparray->connections,
1956                                         struct ctdb_connection,
1957                                         tcparray->num+1);
1958         CTDB_NO_MEMORY(ctdb, tcparray->connections);
1959
1960         tcparray->connections[tcparray->num].src = p->src;
1961         tcparray->connections[tcparray->num].dst = p->dst;
1962         tcparray->num++;
1963
1964         DEBUG(DEBUG_INFO,("Added tickle info for %s:%u from vnn %u\n",
1965                 ctdb_addr_to_str(&tcp.dst),
1966                 ntohs(tcp.dst.ip.sin_port),
1967                 vnn->pnn));
1968
1969         if (tcp_update_needed) {
1970                 vnn->tcp_update_needed = true;
1971         }
1972
1973         return 0;
1974 }
1975
1976
1977 static void ctdb_remove_connection(struct ctdb_vnn *vnn, struct ctdb_connection *conn)
1978 {
1979         struct ctdb_connection *tcpp;
1980
1981         if (vnn == NULL) {
1982                 return;
1983         }
1984
1985         /* if the array is empty we cant remove it
1986            and we don't need to do anything
1987          */
1988         if (vnn->tcp_array == NULL) {
1989                 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist (array is empty) %s:%u\n",
1990                         ctdb_addr_to_str(&conn->dst),
1991                         ntohs(conn->dst.ip.sin_port)));
1992                 return;
1993         }
1994
1995
1996         /* See if we know this connection
1997            if we don't know this connection  then we dont need to do anything
1998          */
1999         tcpp = ctdb_tcp_find(vnn->tcp_array, conn);
2000         if (tcpp == NULL) {
2001                 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist %s:%u\n",
2002                         ctdb_addr_to_str(&conn->dst),
2003                         ntohs(conn->dst.ip.sin_port)));
2004                 return;
2005         }
2006
2007
2008         /* We need to remove this entry from the array.
2009            Instead of allocating a new array and copying data to it
2010            we cheat and just copy the last entry in the existing array
2011            to the entry that is to be removed and just shring the 
2012            ->num field
2013          */
2014         *tcpp = vnn->tcp_array->connections[vnn->tcp_array->num - 1];
2015         vnn->tcp_array->num--;
2016
2017         /* If we deleted the last entry we also need to remove the entire array
2018          */
2019         if (vnn->tcp_array->num == 0) {
2020                 talloc_free(vnn->tcp_array);
2021                 vnn->tcp_array = NULL;
2022         }               
2023
2024         vnn->tcp_update_needed = true;
2025
2026         DEBUG(DEBUG_INFO,("Removed tickle info for %s:%u\n",
2027                 ctdb_addr_to_str(&conn->src),
2028                 ntohs(conn->src.ip.sin_port)));
2029 }
2030
2031
2032 /*
2033   called by a daemon to inform us of a TCP connection that one of its
2034   clients used are no longer needed in the tickle database
2035  */
2036 int32_t ctdb_control_tcp_remove(struct ctdb_context *ctdb, TDB_DATA indata)
2037 {
2038         struct ctdb_vnn *vnn;
2039         struct ctdb_connection *conn = (struct ctdb_connection *)indata.dptr;
2040
2041         /* If we don't have public IPs, tickles are useless */
2042         if (ctdb->vnn == NULL) {
2043                 return 0;
2044         }
2045
2046         vnn = find_public_ip_vnn(ctdb, &conn->dst);
2047         if (vnn == NULL) {
2048                 DEBUG(DEBUG_ERR,
2049                       (__location__ " unable to find public address %s\n",
2050                        ctdb_addr_to_str(&conn->dst)));
2051                 return 0;
2052         }
2053
2054         ctdb_remove_connection(vnn, conn);
2055
2056         return 0;
2057 }
2058
2059
2060 /*
2061   Called when another daemon starts - causes all tickles for all
2062   public addresses we are serving to be sent to the new node on the
2063   next check.  This actually causes the next scheduled call to
2064   tdb_update_tcp_tickles() to update all nodes.  This is simple and
2065   doesn't require careful error handling.
2066  */
2067 int32_t ctdb_control_startup(struct ctdb_context *ctdb, uint32_t pnn)
2068 {
2069         struct ctdb_vnn *vnn;
2070
2071         DEBUG(DEBUG_INFO, ("Received startup control from node %lu\n",
2072                            (unsigned long) pnn));
2073
2074         for (vnn = ctdb->vnn; vnn != NULL; vnn = vnn->next) {
2075                 vnn->tcp_update_needed = true;
2076         }
2077
2078         return 0;
2079 }
2080
2081
2082 /*
2083   called when a client structure goes away - hook to remove
2084   elements from the tcp_list in all daemons
2085  */
2086 void ctdb_takeover_client_destructor_hook(struct ctdb_client *client)
2087 {
2088         while (client->tcp_list) {
2089                 struct ctdb_vnn *vnn;
2090                 struct ctdb_tcp_list *tcp = client->tcp_list;
2091                 struct ctdb_connection *conn = &tcp->connection;
2092
2093                 DLIST_REMOVE(client->tcp_list, tcp);
2094
2095                 vnn = find_public_ip_vnn(client->ctdb,
2096                                          &conn->dst);
2097                 if (vnn == NULL) {
2098                         DEBUG(DEBUG_ERR,
2099                               (__location__ " unable to find public address %s\n",
2100                                ctdb_addr_to_str(&conn->dst)));
2101                         continue;
2102                 }
2103
2104                 /* If the IP address is hosted on this node then
2105                  * remove the connection. */
2106                 if (vnn->pnn == client->ctdb->pnn) {
2107                         ctdb_remove_connection(vnn, conn);
2108                 }
2109
2110                 /* Otherwise this function has been called because the
2111                  * server IP address has been released to another node
2112                  * and the client has exited.  This means that we
2113                  * should not delete the connection information.  The
2114                  * takeover node processes connections too. */
2115         }
2116 }
2117
2118
2119 void ctdb_release_all_ips(struct ctdb_context *ctdb)
2120 {
2121         struct ctdb_vnn *vnn, *next;
2122         int count = 0;
2123
2124         if (ctdb->tunable.disable_ip_failover == 1) {
2125                 return;
2126         }
2127
2128         for (vnn = ctdb->vnn; vnn != NULL; vnn = next) {
2129                 /* vnn can be freed below in release_ip_post() */
2130                 next = vnn->next;
2131
2132                 if (!ctdb_sys_have_ip(&vnn->public_address)) {
2133                         ctdb_vnn_unassign_iface(ctdb, vnn);
2134                         continue;
2135                 }
2136
2137                 /* Don't allow multiple releases at once.  Some code,
2138                  * particularly ctdb_tickle_sentenced_connections() is
2139                  * not re-entrant */
2140                 if (vnn->update_in_flight) {
2141                         DEBUG(DEBUG_WARNING,
2142                               (__location__
2143                                " Not releasing IP %s/%u on interface %s, an update is already in progess\n",
2144                                     ctdb_addr_to_str(&vnn->public_address),
2145                                     vnn->public_netmask_bits,
2146                                     ctdb_vnn_iface_string(vnn)));
2147                         continue;
2148                 }
2149                 vnn->update_in_flight = true;
2150
2151                 DEBUG(DEBUG_INFO,("Release of IP %s/%u on interface %s node:-1\n",
2152                                     ctdb_addr_to_str(&vnn->public_address),
2153                                     vnn->public_netmask_bits,
2154                                     ctdb_vnn_iface_string(vnn)));
2155
2156                 ctdb_event_script_args(ctdb, CTDB_EVENT_RELEASE_IP, "%s %s %u",
2157                                        ctdb_vnn_iface_string(vnn),
2158                                        ctdb_addr_to_str(&vnn->public_address),
2159                                        vnn->public_netmask_bits);
2160                 /* releaseip timeouts are converted to success, so to
2161                  * detect failures just check if the IP address is
2162                  * still there...
2163                  */
2164                 if (ctdb_sys_have_ip(&vnn->public_address)) {
2165                         DEBUG(DEBUG_ERR,
2166                               (__location__
2167                                " IP address %s not released\n",
2168                                ctdb_addr_to_str(&vnn->public_address)));
2169                         vnn->update_in_flight = false;
2170                         continue;
2171                 }
2172
2173                 vnn = release_ip_post(ctdb, vnn, &vnn->public_address);
2174                 if (vnn != NULL) {
2175                         vnn->update_in_flight = false;
2176                 }
2177                 count++;
2178         }
2179
2180         DEBUG(DEBUG_NOTICE,(__location__ " Released %d public IPs\n", count));
2181 }
2182
2183
2184 /*
2185   get list of public IPs
2186  */
2187 int32_t ctdb_control_get_public_ips(struct ctdb_context *ctdb, 
2188                                     struct ctdb_req_control_old *c, TDB_DATA *outdata)
2189 {
2190         int i, num, len;
2191         struct ctdb_public_ip_list_old *ips;
2192         struct ctdb_vnn *vnn;
2193         bool only_available = false;
2194
2195         if (c->flags & CTDB_PUBLIC_IP_FLAGS_ONLY_AVAILABLE) {
2196                 only_available = true;
2197         }
2198
2199         /* count how many public ip structures we have */
2200         num = 0;
2201         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2202                 num++;
2203         }
2204
2205         len = offsetof(struct ctdb_public_ip_list_old, ips) +
2206                 num*sizeof(struct ctdb_public_ip);
2207         ips = talloc_zero_size(outdata, len);
2208         CTDB_NO_MEMORY(ctdb, ips);
2209
2210         i = 0;
2211         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2212                 if (only_available && !ctdb_vnn_available(ctdb, vnn)) {
2213                         continue;
2214                 }
2215                 ips->ips[i].pnn  = vnn->pnn;
2216                 ips->ips[i].addr = vnn->public_address;
2217                 i++;
2218         }
2219         ips->num = i;
2220         len = offsetof(struct ctdb_public_ip_list_old, ips) +
2221                 i*sizeof(struct ctdb_public_ip);
2222
2223         outdata->dsize = len;
2224         outdata->dptr  = (uint8_t *)ips;
2225
2226         return 0;
2227 }
2228
2229
2230 int32_t ctdb_control_get_public_ip_info(struct ctdb_context *ctdb,
2231                                         struct ctdb_req_control_old *c,
2232                                         TDB_DATA indata,
2233                                         TDB_DATA *outdata)
2234 {
2235         int i, num, len;
2236         ctdb_sock_addr *addr;
2237         struct ctdb_public_ip_info_old *info;
2238         struct ctdb_vnn *vnn;
2239
2240         addr = (ctdb_sock_addr *)indata.dptr;
2241
2242         vnn = find_public_ip_vnn(ctdb, addr);
2243         if (vnn == NULL) {
2244                 DEBUG(DEBUG_ERR,(__location__ " Could not get public ip info, "
2245                                  "'%s'not a public address\n",
2246                                  ctdb_addr_to_str(addr)));
2247                 return -1;
2248         }
2249
2250         /* count how many public ip structures we have */
2251         num = 0;
2252         for (;vnn->ifaces[num];) {
2253                 num++;
2254         }
2255
2256         len = offsetof(struct ctdb_public_ip_info_old, ifaces) +
2257                 num*sizeof(struct ctdb_iface);
2258         info = talloc_zero_size(outdata, len);
2259         CTDB_NO_MEMORY(ctdb, info);
2260
2261         info->ip.addr = vnn->public_address;
2262         info->ip.pnn = vnn->pnn;
2263         info->active_idx = 0xFFFFFFFF;
2264
2265         for (i=0; vnn->ifaces[i]; i++) {
2266                 struct ctdb_interface *cur;
2267
2268                 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
2269                 if (cur == NULL) {
2270                         DEBUG(DEBUG_CRIT, (__location__ " internal error iface[%s] unknown\n",
2271                                            vnn->ifaces[i]));
2272                         return -1;
2273                 }
2274                 if (vnn->iface == cur) {
2275                         info->active_idx = i;
2276                 }
2277                 strncpy(info->ifaces[i].name, cur->name,
2278                         sizeof(info->ifaces[i].name));
2279                 info->ifaces[i].name[sizeof(info->ifaces[i].name)-1] = '\0';
2280                 info->ifaces[i].link_state = cur->link_up;
2281                 info->ifaces[i].references = cur->references;
2282         }
2283         info->num = i;
2284         len = offsetof(struct ctdb_public_ip_info_old, ifaces) +
2285                 i*sizeof(struct ctdb_iface);
2286
2287         outdata->dsize = len;
2288         outdata->dptr  = (uint8_t *)info;
2289
2290         return 0;
2291 }
2292
2293 int32_t ctdb_control_get_ifaces(struct ctdb_context *ctdb,
2294                                 struct ctdb_req_control_old *c,
2295                                 TDB_DATA *outdata)
2296 {
2297         int i, num, len;
2298         struct ctdb_iface_list_old *ifaces;
2299         struct ctdb_interface *cur;
2300
2301         /* count how many public ip structures we have */
2302         num = 0;
2303         for (cur=ctdb->ifaces;cur;cur=cur->next) {
2304                 num++;
2305         }
2306
2307         len = offsetof(struct ctdb_iface_list_old, ifaces) +
2308                 num*sizeof(struct ctdb_iface);
2309         ifaces = talloc_zero_size(outdata, len);
2310         CTDB_NO_MEMORY(ctdb, ifaces);
2311
2312         i = 0;
2313         for (cur=ctdb->ifaces;cur;cur=cur->next) {
2314                 strncpy(ifaces->ifaces[i].name, cur->name,
2315                         sizeof(ifaces->ifaces[i].name));
2316                 ifaces->ifaces[i].name[sizeof(ifaces->ifaces[i].name)-1] = '\0';
2317                 ifaces->ifaces[i].link_state = cur->link_up;
2318                 ifaces->ifaces[i].references = cur->references;
2319                 i++;
2320         }
2321         ifaces->num = i;
2322         len = offsetof(struct ctdb_iface_list_old, ifaces) +
2323                 i*sizeof(struct ctdb_iface);
2324
2325         outdata->dsize = len;
2326         outdata->dptr  = (uint8_t *)ifaces;
2327
2328         return 0;
2329 }
2330
2331 int32_t ctdb_control_set_iface_link(struct ctdb_context *ctdb,
2332                                     struct ctdb_req_control_old *c,
2333                                     TDB_DATA indata)
2334 {
2335         struct ctdb_iface *info;
2336         struct ctdb_interface *iface;
2337         bool link_up = false;
2338
2339         info = (struct ctdb_iface *)indata.dptr;
2340
2341         if (info->name[CTDB_IFACE_SIZE] != '\0') {
2342                 int len = strnlen(info->name, CTDB_IFACE_SIZE);
2343                 DEBUG(DEBUG_ERR, (__location__ " name[%*.*s] not terminated\n",
2344                                   len, len, info->name));
2345                 return -1;
2346         }
2347
2348         switch (info->link_state) {
2349         case 0:
2350                 link_up = false;
2351                 break;
2352         case 1:
2353                 link_up = true;
2354                 break;
2355         default:
2356                 DEBUG(DEBUG_ERR, (__location__ " link_state[%u] invalid\n",
2357                                   (unsigned int)info->link_state));
2358                 return -1;
2359         }
2360
2361         if (info->references != 0) {
2362                 DEBUG(DEBUG_ERR, (__location__ " references[%u] should be 0\n",
2363                                   (unsigned int)info->references));
2364                 return -1;
2365         }
2366
2367         iface = ctdb_find_iface(ctdb, info->name);
2368         if (iface == NULL) {
2369                 return -1;
2370         }
2371
2372         if (link_up == iface->link_up) {
2373                 return 0;
2374         }
2375
2376         DEBUG(DEBUG_ERR,
2377               ("iface[%s] has changed it's link status %s => %s\n",
2378                iface->name,
2379                iface->link_up?"up":"down",
2380                link_up?"up":"down"));
2381
2382         iface->link_up = link_up;
2383         return 0;
2384 }
2385
2386
2387 /*
2388   called by a daemon to inform us of the entire list of TCP tickles for
2389   a particular public address.
2390   this control should only be sent by the node that is currently serving
2391   that public address.
2392  */
2393 int32_t ctdb_control_set_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata)
2394 {
2395         struct ctdb_tickle_list_old *list = (struct ctdb_tickle_list_old *)indata.dptr;
2396         struct ctdb_tcp_array *tcparray;
2397         struct ctdb_vnn *vnn;
2398
2399         /* We must at least have tickles.num or else we cant verify the size
2400            of the received data blob
2401          */
2402         if (indata.dsize < offsetof(struct ctdb_tickle_list_old, connections)) {
2403                 DEBUG(DEBUG_ERR,("Bad indata in ctdb_tickle_list. Not enough data for the tickle.num field\n"));
2404                 return -1;
2405         }
2406
2407         /* verify that the size of data matches what we expect */
2408         if (indata.dsize < offsetof(struct ctdb_tickle_list_old, connections)
2409                          + sizeof(struct ctdb_connection) * list->num) {
2410                 DEBUG(DEBUG_ERR,("Bad indata in ctdb_tickle_list\n"));
2411                 return -1;
2412         }
2413
2414         DEBUG(DEBUG_INFO, ("Received tickle update for public address %s\n",
2415                            ctdb_addr_to_str(&list->addr)));
2416
2417         vnn = find_public_ip_vnn(ctdb, &list->addr);
2418         if (vnn == NULL) {
2419                 DEBUG(DEBUG_INFO,(__location__ " Could not set tcp tickle list, '%s' is not a public address\n",
2420                         ctdb_addr_to_str(&list->addr)));
2421
2422                 return 1;
2423         }
2424
2425         if (vnn->pnn == ctdb->pnn) {
2426                 DEBUG(DEBUG_INFO,
2427                       ("Ignoring redundant set tcp tickle list, this node hosts '%s'\n",
2428                        ctdb_addr_to_str(&list->addr)));
2429                 return 0;
2430         }
2431
2432         /* remove any old ticklelist we might have */
2433         talloc_free(vnn->tcp_array);
2434         vnn->tcp_array = NULL;
2435
2436         tcparray = talloc(vnn, struct ctdb_tcp_array);
2437         CTDB_NO_MEMORY(ctdb, tcparray);
2438
2439         tcparray->num = list->num;
2440
2441         tcparray->connections = talloc_array(tcparray, struct ctdb_connection, tcparray->num);
2442         CTDB_NO_MEMORY(ctdb, tcparray->connections);
2443
2444         memcpy(tcparray->connections, &list->connections[0],
2445                sizeof(struct ctdb_connection)*tcparray->num);
2446
2447         /* We now have a new fresh tickle list array for this vnn */
2448         vnn->tcp_array = tcparray;
2449
2450         return 0;
2451 }
2452
2453 /*
2454   called to return the full list of tickles for the puclic address associated 
2455   with the provided vnn
2456  */
2457 int32_t ctdb_control_get_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata)
2458 {
2459         ctdb_sock_addr *addr = (ctdb_sock_addr *)indata.dptr;
2460         struct ctdb_tickle_list_old *list;
2461         struct ctdb_tcp_array *tcparray;
2462         int num, i;
2463         struct ctdb_vnn *vnn;
2464         unsigned port;
2465
2466         vnn = find_public_ip_vnn(ctdb, addr);
2467         if (vnn == NULL) {
2468                 DEBUG(DEBUG_ERR,(__location__ " Could not get tcp tickle list, '%s' is not a public address\n",
2469                         ctdb_addr_to_str(addr)));
2470
2471                 return 1;
2472         }
2473
2474         port = ctdb_addr_to_port(addr);
2475
2476         tcparray = vnn->tcp_array;
2477         num = 0;
2478         if (tcparray != NULL) {
2479                 if (port == 0) {
2480                         /* All connections */
2481                         num = tcparray->num;
2482                 } else {
2483                         /* Count connections for port */
2484                         for (i = 0; i < tcparray->num; i++) {
2485                                 if (port == ctdb_addr_to_port(&tcparray->connections[i].dst)) {
2486                                         num++;
2487                                 }
2488                         }
2489                 }
2490         }
2491
2492         outdata->dsize = offsetof(struct ctdb_tickle_list_old, connections)
2493                         + sizeof(struct ctdb_connection) * num;
2494
2495         outdata->dptr  = talloc_size(outdata, outdata->dsize);
2496         CTDB_NO_MEMORY(ctdb, outdata->dptr);
2497         list = (struct ctdb_tickle_list_old *)outdata->dptr;
2498
2499         list->addr = *addr;
2500         list->num = num;
2501
2502         if (num == 0) {
2503                 return 0;
2504         }
2505
2506         num = 0;
2507         for (i = 0; i < tcparray->num; i++) {
2508                 if (port == 0 || \
2509                     port == ctdb_addr_to_port(&tcparray->connections[i].dst)) {
2510                         list->connections[num] = tcparray->connections[i];
2511                         num++;
2512                 }
2513         }
2514
2515         return 0;
2516 }
2517
2518
2519 /*
2520   set the list of all tcp tickles for a public address
2521  */
2522 static int ctdb_send_set_tcp_tickles_for_ip(struct ctdb_context *ctdb,
2523                                             ctdb_sock_addr *addr,
2524                                             struct ctdb_tcp_array *tcparray)
2525 {
2526         int ret, num;
2527         TDB_DATA data;
2528         struct ctdb_tickle_list_old *list;
2529
2530         if (tcparray) {
2531                 num = tcparray->num;
2532         } else {
2533                 num = 0;
2534         }
2535
2536         data.dsize = offsetof(struct ctdb_tickle_list_old, connections) +
2537                         sizeof(struct ctdb_connection) * num;
2538         data.dptr = talloc_size(ctdb, data.dsize);
2539         CTDB_NO_MEMORY(ctdb, data.dptr);
2540
2541         list = (struct ctdb_tickle_list_old *)data.dptr;
2542         list->addr = *addr;
2543         list->num = num;
2544         if (tcparray) {
2545                 memcpy(&list->connections[0], tcparray->connections, sizeof(struct ctdb_connection) * num);
2546         }
2547
2548         ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_ALL, 0,
2549                                        CTDB_CONTROL_SET_TCP_TICKLE_LIST,
2550                                        0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
2551         if (ret != 0) {
2552                 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set tcp tickles failed\n"));
2553                 return -1;
2554         }
2555
2556         talloc_free(data.dptr);
2557
2558         return ret;
2559 }
2560
2561
2562 /*
2563   perform tickle updates if required
2564  */
2565 static void ctdb_update_tcp_tickles(struct tevent_context *ev,
2566                                     struct tevent_timer *te,
2567                                     struct timeval t, void *private_data)
2568 {
2569         struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
2570         int ret;
2571         struct ctdb_vnn *vnn;
2572
2573         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2574                 /* we only send out updates for public addresses that 
2575                    we have taken over
2576                  */
2577                 if (ctdb->pnn != vnn->pnn) {
2578                         continue;
2579                 }
2580                 /* We only send out the updates if we need to */
2581                 if (!vnn->tcp_update_needed) {
2582                         continue;
2583                 }
2584                 ret = ctdb_send_set_tcp_tickles_for_ip(ctdb,
2585                                                        &vnn->public_address,
2586                                                        vnn->tcp_array);
2587                 if (ret != 0) {
2588                         DEBUG(DEBUG_ERR,("Failed to send the tickle update for public address %s\n",
2589                                 ctdb_addr_to_str(&vnn->public_address)));
2590                 } else {
2591                         DEBUG(DEBUG_INFO,
2592                               ("Sent tickle update for public address %s\n",
2593                                ctdb_addr_to_str(&vnn->public_address)));
2594                         vnn->tcp_update_needed = false;
2595                 }
2596         }
2597
2598         tevent_add_timer(ctdb->ev, ctdb->tickle_update_context,
2599                          timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
2600                          ctdb_update_tcp_tickles, ctdb);
2601 }
2602
2603 /*
2604   start periodic update of tcp tickles
2605  */
2606 void ctdb_start_tcp_tickle_update(struct ctdb_context *ctdb)
2607 {
2608         ctdb->tickle_update_context = talloc_new(ctdb);
2609
2610         tevent_add_timer(ctdb->ev, ctdb->tickle_update_context,
2611                          timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
2612                          ctdb_update_tcp_tickles, ctdb);
2613 }
2614
2615
2616
2617
2618 struct control_gratious_arp {
2619         struct ctdb_context *ctdb;
2620         ctdb_sock_addr addr;
2621         const char *iface;
2622         int count;
2623 };
2624
2625 /*
2626   send a control_gratuitous arp
2627  */
2628 static void send_gratious_arp(struct tevent_context *ev,
2629                               struct tevent_timer *te,
2630                               struct timeval t, void *private_data)
2631 {
2632         int ret;
2633         struct control_gratious_arp *arp = talloc_get_type(private_data, 
2634                                                         struct control_gratious_arp);
2635
2636         ret = ctdb_sys_send_arp(&arp->addr, arp->iface);
2637         if (ret != 0) {
2638                 DEBUG(DEBUG_ERR,(__location__ " sending of gratious arp on iface '%s' failed (%s)\n",
2639                                  arp->iface, strerror(errno)));
2640         }
2641
2642
2643         arp->count++;
2644         if (arp->count == CTDB_ARP_REPEAT) {
2645                 talloc_free(arp);
2646                 return;
2647         }
2648
2649         tevent_add_timer(arp->ctdb->ev, arp,
2650                          timeval_current_ofs(CTDB_ARP_INTERVAL, 0),
2651                          send_gratious_arp, arp);
2652 }
2653
2654
2655 /*
2656   send a gratious arp 
2657  */
2658 int32_t ctdb_control_send_gratious_arp(struct ctdb_context *ctdb, TDB_DATA indata)
2659 {
2660         struct ctdb_addr_info_old *gratious_arp = (struct ctdb_addr_info_old *)indata.dptr;
2661         struct control_gratious_arp *arp;
2662
2663         /* verify the size of indata */
2664         if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
2665                 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_gratious_arp structure. Got %u require %u bytes\n", 
2666                                  (unsigned)indata.dsize, 
2667                                  (unsigned)offsetof(struct ctdb_addr_info_old, iface)));
2668                 return -1;
2669         }
2670         if (indata.dsize != 
2671                 ( offsetof(struct ctdb_addr_info_old, iface)
2672                 + gratious_arp->len ) ){
2673
2674                 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2675                         "but should be %u bytes\n", 
2676                          (unsigned)indata.dsize, 
2677                          (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+gratious_arp->len)));
2678                 return -1;
2679         }
2680
2681
2682         arp = talloc(ctdb, struct control_gratious_arp);
2683         CTDB_NO_MEMORY(ctdb, arp);
2684
2685         arp->ctdb  = ctdb;
2686         arp->addr   = gratious_arp->addr;
2687         arp->iface = talloc_strdup(arp, gratious_arp->iface);
2688         CTDB_NO_MEMORY(ctdb, arp->iface);
2689         arp->count = 0;
2690
2691         tevent_add_timer(arp->ctdb->ev, arp,
2692                          timeval_zero(), send_gratious_arp, arp);
2693
2694         return 0;
2695 }
2696
2697 int32_t ctdb_control_add_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2698 {
2699         struct ctdb_addr_info_old *pub = (struct ctdb_addr_info_old *)indata.dptr;
2700         int ret;
2701
2702         /* verify the size of indata */
2703         if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
2704                 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_addr_info structure\n"));
2705                 return -1;
2706         }
2707         if (indata.dsize != 
2708                 ( offsetof(struct ctdb_addr_info_old, iface)
2709                 + pub->len ) ){
2710
2711                 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2712                         "but should be %u bytes\n", 
2713                          (unsigned)indata.dsize, 
2714                          (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+pub->len)));
2715                 return -1;
2716         }
2717
2718         DEBUG(DEBUG_NOTICE,("Add IP %s\n", ctdb_addr_to_str(&pub->addr)));
2719
2720         ret = ctdb_add_public_address(ctdb, &pub->addr, pub->mask, &pub->iface[0], true);
2721
2722         if (ret != 0) {
2723                 DEBUG(DEBUG_ERR,(__location__ " Failed to add public address\n"));
2724                 return -1;
2725         }
2726
2727         return 0;
2728 }
2729
2730 int32_t ctdb_control_del_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2731 {
2732         struct ctdb_addr_info_old *pub = (struct ctdb_addr_info_old *)indata.dptr;
2733         struct ctdb_vnn *vnn;
2734
2735         /* verify the size of indata */
2736         if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
2737                 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_addr_info structure\n"));
2738                 return -1;
2739         }
2740         if (indata.dsize != 
2741                 ( offsetof(struct ctdb_addr_info_old, iface)
2742                 + pub->len ) ){
2743
2744                 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2745                         "but should be %u bytes\n", 
2746                          (unsigned)indata.dsize, 
2747                          (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+pub->len)));
2748                 return -1;
2749         }
2750
2751         DEBUG(DEBUG_NOTICE,("Delete IP %s\n", ctdb_addr_to_str(&pub->addr)));
2752
2753         /* walk over all public addresses until we find a match */
2754         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2755                 if (ctdb_same_ip(&vnn->public_address, &pub->addr)) {
2756                         if (vnn->pnn == ctdb->pnn) {
2757                                 /* This IP is currently being hosted.
2758                                  * Defer the deletion until the next
2759                                  * takeover run. "ctdb reloadips" will
2760                                  * always cause a takeover run.  "ctdb
2761                                  * delip" will now need an explicit
2762                                  * "ctdb ipreallocated" afterwards. */
2763                                 vnn->delete_pending = true;
2764                         } else {
2765                                 /* This IP is not hosted on the
2766                                  * current node so just delete it
2767                                  * now. */
2768                                 do_delete_ip(ctdb, vnn);
2769                         }
2770
2771                         return 0;
2772                 }
2773         }
2774
2775         DEBUG(DEBUG_ERR,("Delete IP of unknown public IP address %s\n",
2776                          ctdb_addr_to_str(&pub->addr)));
2777         return -1;
2778 }
2779
2780
2781 struct ipreallocated_callback_state {
2782         struct ctdb_req_control_old *c;
2783 };
2784
2785 static void ctdb_ipreallocated_callback(struct ctdb_context *ctdb,
2786                                         int status, void *p)
2787 {
2788         struct ipreallocated_callback_state *state =
2789                 talloc_get_type(p, struct ipreallocated_callback_state);
2790
2791         if (status != 0) {
2792                 DEBUG(DEBUG_ERR,
2793                       (" \"ipreallocated\" event script failed (status %d)\n",
2794                        status));
2795                 if (status == -ETIME) {
2796                         ctdb_ban_self(ctdb);
2797                 }
2798         }
2799
2800         ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
2801         talloc_free(state);
2802 }
2803
2804 /* A control to run the ipreallocated event */
2805 int32_t ctdb_control_ipreallocated(struct ctdb_context *ctdb,
2806                                    struct ctdb_req_control_old *c,
2807                                    bool *async_reply)
2808 {
2809         int ret;
2810         struct ipreallocated_callback_state *state;
2811
2812         state = talloc(ctdb, struct ipreallocated_callback_state);
2813         CTDB_NO_MEMORY(ctdb, state);
2814
2815         DEBUG(DEBUG_INFO,(__location__ " Running \"ipreallocated\" event\n"));
2816
2817         ret = ctdb_event_script_callback(ctdb, state,
2818                                          ctdb_ipreallocated_callback, state,
2819                                          CTDB_EVENT_IPREALLOCATED,
2820                                          "%s", "");
2821
2822         if (ret != 0) {
2823                 DEBUG(DEBUG_ERR,("Failed to run \"ipreallocated\" event \n"));
2824                 talloc_free(state);
2825                 return -1;
2826         }
2827
2828         /* tell the control that we will be reply asynchronously */
2829         state->c    = talloc_steal(state, c);
2830         *async_reply = true;
2831
2832         return 0;
2833 }
2834
2835
2836 struct ctdb_reloadips_handle {
2837         struct ctdb_context *ctdb;
2838         struct ctdb_req_control_old *c;
2839         int status;
2840         int fd[2];
2841         pid_t child;
2842         struct tevent_fd *fde;
2843 };
2844
2845 static int ctdb_reloadips_destructor(struct ctdb_reloadips_handle *h)
2846 {
2847         if (h == h->ctdb->reload_ips) {
2848                 h->ctdb->reload_ips = NULL;
2849         }
2850         if (h->c != NULL) {
2851                 ctdb_request_control_reply(h->ctdb, h->c, NULL, h->status, NULL);
2852                 h->c = NULL;
2853         }
2854         ctdb_kill(h->ctdb, h->child, SIGKILL);
2855         return 0;
2856 }
2857
2858 static void ctdb_reloadips_timeout_event(struct tevent_context *ev,
2859                                          struct tevent_timer *te,
2860                                          struct timeval t, void *private_data)
2861 {
2862         struct ctdb_reloadips_handle *h = talloc_get_type(private_data, struct ctdb_reloadips_handle);
2863
2864         talloc_free(h);
2865 }
2866
2867 static void ctdb_reloadips_child_handler(struct tevent_context *ev,
2868                                          struct tevent_fd *fde,
2869                                          uint16_t flags, void *private_data)
2870 {
2871         struct ctdb_reloadips_handle *h = talloc_get_type(private_data, struct ctdb_reloadips_handle);
2872
2873         char res;
2874         int ret;
2875
2876         ret = sys_read(h->fd[0], &res, 1);
2877         if (ret < 1 || res != 0) {
2878                 DEBUG(DEBUG_ERR, (__location__ " Reloadips child process returned error\n"));
2879                 res = 1;
2880         }
2881         h->status = res;
2882
2883         talloc_free(h);
2884 }
2885
2886 static int ctdb_reloadips_child(struct ctdb_context *ctdb)
2887 {
2888         TALLOC_CTX *mem_ctx = talloc_new(NULL);
2889         struct ctdb_public_ip_list_old *ips;
2890         struct ctdb_vnn *vnn;
2891         struct client_async_data *async_data;
2892         struct timeval timeout;
2893         TDB_DATA data;
2894         struct ctdb_client_control_state *state;
2895         bool first_add;
2896         int i, ret;
2897
2898         CTDB_NO_MEMORY(ctdb, mem_ctx);
2899
2900         /* Read IPs from local node */
2901         ret = ctdb_ctrl_get_public_ips(ctdb, TAKEOVER_TIMEOUT(),
2902                                        CTDB_CURRENT_NODE, mem_ctx, &ips);
2903         if (ret != 0) {
2904                 DEBUG(DEBUG_ERR,
2905                       ("Unable to fetch public IPs from local node\n"));
2906                 talloc_free(mem_ctx);
2907                 return -1;
2908         }
2909
2910         /* Read IPs file - this is safe since this is a child process */
2911         ctdb->vnn = NULL;
2912         if (ctdb_set_public_addresses(ctdb, false) != 0) {
2913                 DEBUG(DEBUG_ERR,("Failed to re-read public addresses file\n"));
2914                 talloc_free(mem_ctx);
2915                 return -1;
2916         }
2917
2918         async_data = talloc_zero(mem_ctx, struct client_async_data);
2919         CTDB_NO_MEMORY(ctdb, async_data);
2920
2921         /* Compare IPs between node and file for IPs to be deleted */
2922         for (i = 0; i < ips->num; i++) {
2923                 /* */
2924                 for (vnn = ctdb->vnn; vnn; vnn = vnn->next) {
2925                         if (ctdb_same_ip(&vnn->public_address,
2926                                          &ips->ips[i].addr)) {
2927                                 /* IP is still in file */
2928                                 break;
2929                         }
2930                 }
2931
2932                 if (vnn == NULL) {
2933                         /* Delete IP ips->ips[i] */
2934                         struct ctdb_addr_info_old *pub;
2935
2936                         DEBUG(DEBUG_NOTICE,
2937                               ("IP %s no longer configured, deleting it\n",
2938                                ctdb_addr_to_str(&ips->ips[i].addr)));
2939
2940                         pub = talloc_zero(mem_ctx, struct ctdb_addr_info_old);
2941                         CTDB_NO_MEMORY(ctdb, pub);
2942
2943                         pub->addr  = ips->ips[i].addr;
2944                         pub->mask  = 0;
2945                         pub->len   = 0;
2946
2947                         timeout = TAKEOVER_TIMEOUT();
2948
2949                         data.dsize = offsetof(struct ctdb_addr_info_old,
2950                                               iface) + pub->len;
2951                         data.dptr = (uint8_t *)pub;
2952
2953                         state = ctdb_control_send(ctdb, CTDB_CURRENT_NODE, 0,
2954                                                   CTDB_CONTROL_DEL_PUBLIC_IP,
2955                                                   0, data, async_data,
2956                                                   &timeout, NULL);
2957                         if (state == NULL) {
2958                                 DEBUG(DEBUG_ERR,
2959                                       (__location__
2960                                        " failed sending CTDB_CONTROL_DEL_PUBLIC_IP\n"));
2961                                 goto failed;
2962                         }
2963
2964                         ctdb_client_async_add(async_data, state);
2965                 }
2966         }
2967
2968         /* Compare IPs between node and file for IPs to be added */
2969         first_add = true;
2970         for (vnn = ctdb->vnn; vnn; vnn = vnn->next) {
2971                 for (i = 0; i < ips->num; i++) {
2972                         if (ctdb_same_ip(&vnn->public_address,
2973                                          &ips->ips[i].addr)) {
2974                                 /* IP already on node */
2975                                 break;
2976                         }
2977                 }
2978                 if (i == ips->num) {
2979                         /* Add IP ips->ips[i] */
2980                         struct ctdb_addr_info_old *pub;
2981                         const char *ifaces = NULL;
2982                         uint32_t len;
2983                         int iface = 0;
2984
2985                         DEBUG(DEBUG_NOTICE,
2986                               ("New IP %s configured, adding it\n",
2987                                ctdb_addr_to_str(&vnn->public_address)));
2988                         if (first_add) {
2989                                 uint32_t pnn = ctdb_get_pnn(ctdb);
2990
2991                                 data.dsize = sizeof(pnn);
2992                                 data.dptr  = (uint8_t *)&pnn;
2993
2994                                 ret = ctdb_client_send_message(
2995                                         ctdb,
2996                                         CTDB_BROADCAST_CONNECTED,
2997                                         CTDB_SRVID_REBALANCE_NODE,
2998                                         data);
2999                                 if (ret != 0) {
3000                                         DEBUG(DEBUG_WARNING,
3001                                               ("Failed to send message to force node reallocation - IPs may be unbalanced\n"));
3002                                 }
3003
3004                                 first_add = false;
3005                         }
3006
3007                         ifaces = vnn->ifaces[0];
3008                         iface = 1;
3009                         while (vnn->ifaces[iface] != NULL) {
3010                                 ifaces = talloc_asprintf(vnn, "%s,%s", ifaces,
3011                                                          vnn->ifaces[iface]);
3012                                 iface++;
3013                         }
3014
3015                         len   = strlen(ifaces) + 1;
3016                         pub = talloc_zero_size(mem_ctx,
3017                                                offsetof(struct ctdb_addr_info_old, iface) + len);
3018                         CTDB_NO_MEMORY(ctdb, pub);
3019
3020                         pub->addr  = vnn->public_address;
3021                         pub->mask  = vnn->public_netmask_bits;
3022                         pub->len   = len;
3023                         memcpy(&pub->iface[0], ifaces, pub->len);
3024
3025                         timeout = TAKEOVER_TIMEOUT();
3026
3027                         data.dsize = offsetof(struct ctdb_addr_info_old,
3028                                               iface) + pub->len;
3029                         data.dptr = (uint8_t *)pub;
3030
3031                         state = ctdb_control_send(ctdb, CTDB_CURRENT_NODE, 0,
3032                                                   CTDB_CONTROL_ADD_PUBLIC_IP,
3033                                                   0, data, async_data,
3034                                                   &timeout, NULL);
3035                         if (state == NULL) {
3036                                 DEBUG(DEBUG_ERR,
3037                                       (__location__
3038                                        " failed sending CTDB_CONTROL_ADD_PUBLIC_IP\n"));
3039                                 goto failed;
3040                         }
3041
3042                         ctdb_client_async_add(async_data, state);
3043                 }
3044         }
3045
3046         if (ctdb_client_async_wait(ctdb, async_data) != 0) {
3047                 DEBUG(DEBUG_ERR,(__location__ " Add/delete IPs failed\n"));
3048                 goto failed;
3049         }
3050
3051         talloc_free(mem_ctx);
3052         return 0;
3053
3054 failed:
3055         talloc_free(mem_ctx);
3056         return -1;
3057 }
3058
3059 /* This control is sent to force the node to re-read the public addresses file
3060    and drop any addresses we should nnot longer host, and add new addresses
3061    that we are now able to host
3062 */
3063 int32_t ctdb_control_reload_public_ips(struct ctdb_context *ctdb, struct ctdb_req_control_old *c, bool *async_reply)
3064 {
3065         struct ctdb_reloadips_handle *h;
3066         pid_t parent = getpid();
3067
3068         if (ctdb->reload_ips != NULL) {
3069                 talloc_free(ctdb->reload_ips);
3070                 ctdb->reload_ips = NULL;
3071         }
3072
3073         h = talloc(ctdb, struct ctdb_reloadips_handle);
3074         CTDB_NO_MEMORY(ctdb, h);
3075         h->ctdb     = ctdb;
3076         h->c        = NULL;
3077         h->status   = -1;
3078         
3079         if (pipe(h->fd) == -1) {
3080                 DEBUG(DEBUG_ERR,("Failed to create pipe for ctdb_freeze_lock\n"));
3081                 talloc_free(h);
3082                 return -1;
3083         }
3084
3085         h->child = ctdb_fork(ctdb);
3086         if (h->child == (pid_t)-1) {
3087                 DEBUG(DEBUG_ERR, ("Failed to fork a child for reloadips\n"));
3088                 close(h->fd[0]);
3089                 close(h->fd[1]);
3090                 talloc_free(h);
3091                 return -1;
3092         }
3093
3094         /* child process */
3095         if (h->child == 0) {
3096                 signed char res = 0;
3097
3098                 close(h->fd[0]);
3099                 debug_extra = talloc_asprintf(NULL, "reloadips:");
3100
3101                 prctl_set_comment("ctdb_reloadips");
3102                 if (switch_from_server_to_client(ctdb, "reloadips-child") != 0) {
3103                         DEBUG(DEBUG_CRIT,("ERROR: Failed to switch reloadips child into client mode\n"));
3104                         res = -1;
3105                 } else {
3106                         res = ctdb_reloadips_child(ctdb);
3107                         if (res != 0) {
3108                                 DEBUG(DEBUG_ERR,("Failed to reload ips on local node\n"));
3109                         }
3110                 }
3111
3112                 sys_write(h->fd[1], &res, 1);
3113                 ctdb_wait_for_process_to_exit(parent);
3114                 _exit(0);
3115         }
3116
3117         h->c             = talloc_steal(h, c);
3118
3119         close(h->fd[1]);
3120         set_close_on_exec(h->fd[0]);
3121
3122         talloc_set_destructor(h, ctdb_reloadips_destructor);
3123
3124
3125         h->fde = tevent_add_fd(ctdb->ev, h, h->fd[0], TEVENT_FD_READ,
3126                                ctdb_reloadips_child_handler, (void *)h);
3127         tevent_fd_set_auto_close(h->fde);
3128
3129         tevent_add_timer(ctdb->ev, h, timeval_current_ofs(120, 0),
3130                          ctdb_reloadips_timeout_event, h);
3131
3132         /* we reply later */
3133         *async_reply = true;
3134         return 0;
3135 }