ctdb-daemon: Fix CID 1125574 Operands don't affect result
[metze/samba/wip.git] / ctdb / server / ctdb_takeover.c
1 /* 
2    ctdb ip takeover code
3
4    Copyright (C) Ronnie Sahlberg  2007
5    Copyright (C) Andrew Tridgell  2007
6    Copyright (C) Martin Schwenke  2011
7
8    This program is free software; you can redistribute it and/or modify
9    it under the terms of the GNU General Public License as published by
10    the Free Software Foundation; either version 3 of the License, or
11    (at your option) any later version.
12    
13    This program is distributed in the hope that it will be useful,
14    but WITHOUT ANY WARRANTY; without even the implied warranty of
15    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16    GNU General Public License for more details.
17    
18    You should have received a copy of the GNU General Public License
19    along with this program; if not, see <http://www.gnu.org/licenses/>.
20 */
21 #include "replace.h"
22 #include "system/network.h"
23 #include "system/filesys.h"
24 #include "system/time.h"
25 #include "system/wait.h"
26
27 #include <talloc.h>
28 #include <tevent.h>
29
30 #include "lib/util/dlinklist.h"
31 #include "lib/util/debug.h"
32 #include "lib/util/samba_util.h"
33 #include "lib/util/util_process.h"
34
35 #include "ctdb_private.h"
36 #include "ctdb_client.h"
37
38 #include "common/rb_tree.h"
39 #include "common/reqid.h"
40 #include "common/system.h"
41 #include "common/common.h"
42 #include "common/logging.h"
43
44 #include "server/ipalloc.h"
45
46 #define TAKEOVER_TIMEOUT() timeval_current_ofs(ctdb->tunable.takeover_timeout,0)
47
48 #define CTDB_ARP_INTERVAL 1
49 #define CTDB_ARP_REPEAT   3
50
51 struct ctdb_interface {
52         struct ctdb_interface *prev, *next;
53         const char *name;
54         bool link_up;
55         uint32_t references;
56 };
57
58 /* state associated with a public ip address */
59 struct ctdb_vnn {
60         struct ctdb_vnn *prev, *next;
61
62         struct ctdb_interface *iface;
63         const char **ifaces;
64         ctdb_sock_addr public_address;
65         uint8_t public_netmask_bits;
66
67         /* the node number that is serving this public address, if any.
68            If no node serves this ip it is set to -1 */
69         int32_t pnn;
70
71         /* List of clients to tickle for this public address */
72         struct ctdb_tcp_array *tcp_array;
73
74         /* whether we need to update the other nodes with changes to our list
75            of connected clients */
76         bool tcp_update_needed;
77
78         /* a context to hang sending gratious arp events off */
79         TALLOC_CTX *takeover_ctx;
80
81         /* Set to true any time an update to this VNN is in flight.
82            This helps to avoid races. */
83         bool update_in_flight;
84
85         /* If CTDB_CONTROL_DEL_PUBLIC_IP is received for this IP
86          * address then this flag is set.  It will be deleted in the
87          * release IP callback. */
88         bool delete_pending;
89 };
90
91 static const char *ctdb_vnn_iface_string(const struct ctdb_vnn *vnn)
92 {
93         if (vnn->iface) {
94                 return vnn->iface->name;
95         }
96
97         return "__none__";
98 }
99
100 static int ctdb_add_local_iface(struct ctdb_context *ctdb, const char *iface)
101 {
102         struct ctdb_interface *i;
103
104         if (strlen(iface) > CTDB_IFACE_SIZE) {
105                 DEBUG(DEBUG_ERR, ("Interface name too long \"%s\"\n", iface));
106                 return -1;
107         }
108
109         /* Verify that we don't have an entry for this ip yet */
110         for (i=ctdb->ifaces;i;i=i->next) {
111                 if (strcmp(i->name, iface) == 0) {
112                         return 0;
113                 }
114         }
115
116         /* create a new structure for this interface */
117         i = talloc_zero(ctdb, struct ctdb_interface);
118         CTDB_NO_MEMORY_FATAL(ctdb, i);
119         i->name = talloc_strdup(i, iface);
120         CTDB_NO_MEMORY(ctdb, i->name);
121
122         i->link_up = true;
123
124         DLIST_ADD(ctdb->ifaces, i);
125
126         return 0;
127 }
128
129 static bool vnn_has_interface_with_name(struct ctdb_vnn *vnn,
130                                         const char *name)
131 {
132         int n;
133
134         for (n = 0; vnn->ifaces[n] != NULL; n++) {
135                 if (strcmp(name, vnn->ifaces[n]) == 0) {
136                         return true;
137                 }
138         }
139
140         return false;
141 }
142
143 /* If any interfaces now have no possible IPs then delete them.  This
144  * implementation is naive (i.e. simple) rather than clever
145  * (i.e. complex).  Given that this is run on delip and that operation
146  * is rare, this doesn't need to be efficient - it needs to be
147  * foolproof.  One alternative is reference counting, where the logic
148  * is distributed and can, therefore, be broken in multiple places.
149  * Another alternative is to build a red-black tree of interfaces that
150  * can have addresses (by walking ctdb->vnn once) and then walking
151  * ctdb->ifaces once and deleting those not in the tree.  Let's go to
152  * one of those if the naive implementation causes problems...  :-)
153  */
154 static void ctdb_remove_orphaned_ifaces(struct ctdb_context *ctdb,
155                                         struct ctdb_vnn *vnn)
156 {
157         struct ctdb_interface *i, *next;
158
159         /* For each interface, check if there's an IP using it. */
160         for (i = ctdb->ifaces; i != NULL; i = next) {
161                 struct ctdb_vnn *tv;
162                 bool found;
163                 next = i->next;
164
165                 /* Only consider interfaces named in the given VNN. */
166                 if (!vnn_has_interface_with_name(vnn, i->name)) {
167                         continue;
168                 }
169
170                 /* Search for a vnn with this interface. */
171                 found = false;
172                 for (tv=ctdb->vnn; tv; tv=tv->next) {
173                         if (vnn_has_interface_with_name(tv, i->name)) {
174                                 found = true;
175                                 break;
176                         }
177                 }
178
179                 if (!found) {
180                         /* None of the VNNs are using this interface. */
181                         DLIST_REMOVE(ctdb->ifaces, i);
182                         talloc_free(i);
183                 }
184         }
185 }
186
187
188 static struct ctdb_interface *ctdb_find_iface(struct ctdb_context *ctdb,
189                                               const char *iface)
190 {
191         struct ctdb_interface *i;
192
193         for (i=ctdb->ifaces;i;i=i->next) {
194                 if (strcmp(i->name, iface) == 0) {
195                         return i;
196                 }
197         }
198
199         return NULL;
200 }
201
202 static struct ctdb_interface *ctdb_vnn_best_iface(struct ctdb_context *ctdb,
203                                                   struct ctdb_vnn *vnn)
204 {
205         int i;
206         struct ctdb_interface *cur = NULL;
207         struct ctdb_interface *best = NULL;
208
209         for (i=0; vnn->ifaces[i]; i++) {
210
211                 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
212                 if (cur == NULL) {
213                         continue;
214                 }
215
216                 if (!cur->link_up) {
217                         continue;
218                 }
219
220                 if (best == NULL) {
221                         best = cur;
222                         continue;
223                 }
224
225                 if (cur->references < best->references) {
226                         best = cur;
227                         continue;
228                 }
229         }
230
231         return best;
232 }
233
234 static int32_t ctdb_vnn_assign_iface(struct ctdb_context *ctdb,
235                                      struct ctdb_vnn *vnn)
236 {
237         struct ctdb_interface *best = NULL;
238
239         if (vnn->iface) {
240                 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
241                                    "still assigned to iface '%s'\n",
242                                    ctdb_addr_to_str(&vnn->public_address),
243                                    ctdb_vnn_iface_string(vnn)));
244                 return 0;
245         }
246
247         best = ctdb_vnn_best_iface(ctdb, vnn);
248         if (best == NULL) {
249                 DEBUG(DEBUG_ERR, (__location__ " public address '%s' "
250                                   "cannot assign to iface any iface\n",
251                                   ctdb_addr_to_str(&vnn->public_address)));
252                 return -1;
253         }
254
255         vnn->iface = best;
256         best->references++;
257         vnn->pnn = ctdb->pnn;
258
259         DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
260                            "now assigned to iface '%s' refs[%d]\n",
261                            ctdb_addr_to_str(&vnn->public_address),
262                            ctdb_vnn_iface_string(vnn),
263                            best->references));
264         return 0;
265 }
266
267 static void ctdb_vnn_unassign_iface(struct ctdb_context *ctdb,
268                                     struct ctdb_vnn *vnn)
269 {
270         DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
271                            "now unassigned (old iface '%s' refs[%d])\n",
272                            ctdb_addr_to_str(&vnn->public_address),
273                            ctdb_vnn_iface_string(vnn),
274                            vnn->iface?vnn->iface->references:0));
275         if (vnn->iface) {
276                 vnn->iface->references--;
277         }
278         vnn->iface = NULL;
279         if (vnn->pnn == ctdb->pnn) {
280                 vnn->pnn = -1;
281         }
282 }
283
284 static bool ctdb_vnn_available(struct ctdb_context *ctdb,
285                                struct ctdb_vnn *vnn)
286 {
287         int i;
288
289         /* Nodes that are not RUNNING can not host IPs */
290         if (ctdb->runstate != CTDB_RUNSTATE_RUNNING) {
291                 return false;
292         }
293
294         if (vnn->delete_pending) {
295                 return false;
296         }
297
298         if (vnn->iface && vnn->iface->link_up) {
299                 return true;
300         }
301
302         for (i=0; vnn->ifaces[i]; i++) {
303                 struct ctdb_interface *cur;
304
305                 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
306                 if (cur == NULL) {
307                         continue;
308                 }
309
310                 if (cur->link_up) {
311                         return true;
312                 }
313         }
314
315         return false;
316 }
317
318 struct ctdb_takeover_arp {
319         struct ctdb_context *ctdb;
320         uint32_t count;
321         ctdb_sock_addr addr;
322         struct ctdb_tcp_array *tcparray;
323         struct ctdb_vnn *vnn;
324 };
325
326
327 /*
328   lists of tcp endpoints
329  */
330 struct ctdb_tcp_list {
331         struct ctdb_tcp_list *prev, *next;
332         struct ctdb_connection connection;
333 };
334
335 /*
336   list of clients to kill on IP release
337  */
338 struct ctdb_client_ip {
339         struct ctdb_client_ip *prev, *next;
340         struct ctdb_context *ctdb;
341         ctdb_sock_addr addr;
342         uint32_t client_id;
343 };
344
345
346 /*
347   send a gratuitous arp
348  */
349 static void ctdb_control_send_arp(struct tevent_context *ev,
350                                   struct tevent_timer *te,
351                                   struct timeval t, void *private_data)
352 {
353         struct ctdb_takeover_arp *arp = talloc_get_type(private_data, 
354                                                         struct ctdb_takeover_arp);
355         int i, ret;
356         struct ctdb_tcp_array *tcparray;
357         const char *iface = ctdb_vnn_iface_string(arp->vnn);
358
359         ret = ctdb_sys_send_arp(&arp->addr, iface);
360         if (ret != 0) {
361                 DEBUG(DEBUG_CRIT,(__location__ " sending of arp failed on iface '%s' (%s)\n",
362                                   iface, strerror(errno)));
363         }
364
365         tcparray = arp->tcparray;
366         if (tcparray) {
367                 for (i=0;i<tcparray->num;i++) {
368                         struct ctdb_connection *tcon;
369
370                         tcon = &tcparray->connections[i];
371                         DEBUG(DEBUG_INFO,("sending tcp tickle ack for %u->%s:%u\n",
372                                 (unsigned)ntohs(tcon->dst.ip.sin_port),
373                                 ctdb_addr_to_str(&tcon->src),
374                                 (unsigned)ntohs(tcon->src.ip.sin_port)));
375                         ret = ctdb_sys_send_tcp(
376                                 &tcon->src,
377                                 &tcon->dst,
378                                 0, 0, 0);
379                         if (ret != 0) {
380                                 DEBUG(DEBUG_CRIT,(__location__ " Failed to send tcp tickle ack for %s\n",
381                                         ctdb_addr_to_str(&tcon->src)));
382                         }
383                 }
384         }
385
386         arp->count++;
387
388         if (arp->count == CTDB_ARP_REPEAT) {
389                 talloc_free(arp);
390                 return;
391         }
392
393         tevent_add_timer(arp->ctdb->ev, arp->vnn->takeover_ctx,
394                          timeval_current_ofs(CTDB_ARP_INTERVAL, 100000),
395                          ctdb_control_send_arp, arp);
396 }
397
398 static int32_t ctdb_announce_vnn_iface(struct ctdb_context *ctdb,
399                                        struct ctdb_vnn *vnn)
400 {
401         struct ctdb_takeover_arp *arp;
402         struct ctdb_tcp_array *tcparray;
403
404         if (!vnn->takeover_ctx) {
405                 vnn->takeover_ctx = talloc_new(vnn);
406                 if (!vnn->takeover_ctx) {
407                         return -1;
408                 }
409         }
410
411         arp = talloc_zero(vnn->takeover_ctx, struct ctdb_takeover_arp);
412         if (!arp) {
413                 return -1;
414         }
415
416         arp->ctdb = ctdb;
417         arp->addr = vnn->public_address;
418         arp->vnn  = vnn;
419
420         tcparray = vnn->tcp_array;
421         if (tcparray) {
422                 /* add all of the known tcp connections for this IP to the
423                    list of tcp connections to send tickle acks for */
424                 arp->tcparray = talloc_steal(arp, tcparray);
425
426                 vnn->tcp_array = NULL;
427                 vnn->tcp_update_needed = true;
428         }
429
430         tevent_add_timer(arp->ctdb->ev, vnn->takeover_ctx,
431                          timeval_zero(), ctdb_control_send_arp, arp);
432
433         return 0;
434 }
435
436 struct takeover_callback_state {
437         struct ctdb_req_control_old *c;
438         ctdb_sock_addr *addr;
439         struct ctdb_vnn *vnn;
440 };
441
442 struct ctdb_do_takeip_state {
443         struct ctdb_req_control_old *c;
444         struct ctdb_vnn *vnn;
445 };
446
447 /*
448   called when takeip event finishes
449  */
450 static void ctdb_do_takeip_callback(struct ctdb_context *ctdb, int status,
451                                     void *private_data)
452 {
453         struct ctdb_do_takeip_state *state =
454                 talloc_get_type(private_data, struct ctdb_do_takeip_state);
455         int32_t ret;
456         TDB_DATA data;
457
458         if (status != 0) {
459                 if (status == -ETIME) {
460                         ctdb_ban_self(ctdb);
461                 }
462                 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
463                                  ctdb_addr_to_str(&state->vnn->public_address),
464                                  ctdb_vnn_iface_string(state->vnn)));
465                 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
466
467                 talloc_free(state);
468                 return;
469         }
470
471         if (ctdb->do_checkpublicip) {
472
473         ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
474         if (ret != 0) {
475                 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
476                 talloc_free(state);
477                 return;
478         }
479
480         }
481
482         data.dptr  = (uint8_t *)ctdb_addr_to_str(&state->vnn->public_address);
483         data.dsize = strlen((char *)data.dptr) + 1;
484         DEBUG(DEBUG_INFO,(__location__ " sending TAKE_IP for '%s'\n", data.dptr));
485
486         ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_TAKE_IP, data);
487
488
489         /* the control succeeded */
490         ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
491         talloc_free(state);
492         return;
493 }
494
495 static int ctdb_takeip_destructor(struct ctdb_do_takeip_state *state)
496 {
497         state->vnn->update_in_flight = false;
498         return 0;
499 }
500
501 /*
502   take over an ip address
503  */
504 static int32_t ctdb_do_takeip(struct ctdb_context *ctdb,
505                               struct ctdb_req_control_old *c,
506                               struct ctdb_vnn *vnn)
507 {
508         int ret;
509         struct ctdb_do_takeip_state *state;
510
511         if (vnn->update_in_flight) {
512                 DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u rejected "
513                                     "update for this IP already in flight\n",
514                                     ctdb_addr_to_str(&vnn->public_address),
515                                     vnn->public_netmask_bits));
516                 return -1;
517         }
518
519         ret = ctdb_vnn_assign_iface(ctdb, vnn);
520         if (ret != 0) {
521                 DEBUG(DEBUG_ERR,("Takeover of IP %s/%u failed to "
522                                  "assign a usable interface\n",
523                                  ctdb_addr_to_str(&vnn->public_address),
524                                  vnn->public_netmask_bits));
525                 return -1;
526         }
527
528         state = talloc(vnn, struct ctdb_do_takeip_state);
529         CTDB_NO_MEMORY(ctdb, state);
530
531         state->c = talloc_steal(ctdb, c);
532         state->vnn   = vnn;
533
534         vnn->update_in_flight = true;
535         talloc_set_destructor(state, ctdb_takeip_destructor);
536
537         DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u on interface %s\n",
538                             ctdb_addr_to_str(&vnn->public_address),
539                             vnn->public_netmask_bits,
540                             ctdb_vnn_iface_string(vnn)));
541
542         ret = ctdb_event_script_callback(ctdb,
543                                          state,
544                                          ctdb_do_takeip_callback,
545                                          state,
546                                          CTDB_EVENT_TAKE_IP,
547                                          "%s %s %u",
548                                          ctdb_vnn_iface_string(vnn),
549                                          ctdb_addr_to_str(&vnn->public_address),
550                                          vnn->public_netmask_bits);
551
552         if (ret != 0) {
553                 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
554                         ctdb_addr_to_str(&vnn->public_address),
555                         ctdb_vnn_iface_string(vnn)));
556                 talloc_free(state);
557                 return -1;
558         }
559
560         return 0;
561 }
562
563 struct ctdb_do_updateip_state {
564         struct ctdb_req_control_old *c;
565         struct ctdb_interface *old;
566         struct ctdb_vnn *vnn;
567 };
568
569 /*
570   called when updateip event finishes
571  */
572 static void ctdb_do_updateip_callback(struct ctdb_context *ctdb, int status,
573                                       void *private_data)
574 {
575         struct ctdb_do_updateip_state *state =
576                 talloc_get_type(private_data, struct ctdb_do_updateip_state);
577         int32_t ret;
578
579         if (status != 0) {
580                 if (status == -ETIME) {
581                         ctdb_ban_self(ctdb);
582                 }
583                 DEBUG(DEBUG_ERR,(__location__ " Failed to move IP %s from interface %s to %s\n",
584                         ctdb_addr_to_str(&state->vnn->public_address),
585                         state->old->name,
586                         ctdb_vnn_iface_string(state->vnn)));
587
588                 /*
589                  * All we can do is reset the old interface
590                  * and let the next run fix it
591                  */
592                 ctdb_vnn_unassign_iface(ctdb, state->vnn);
593                 state->vnn->iface = state->old;
594                 state->vnn->iface->references++;
595
596                 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
597                 talloc_free(state);
598                 return;
599         }
600
601         if (ctdb->do_checkpublicip) {
602
603         ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
604         if (ret != 0) {
605                 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
606                 talloc_free(state);
607                 return;
608         }
609
610         }
611
612         /* the control succeeded */
613         ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
614         talloc_free(state);
615         return;
616 }
617
618 static int ctdb_updateip_destructor(struct ctdb_do_updateip_state *state)
619 {
620         state->vnn->update_in_flight = false;
621         return 0;
622 }
623
624 /*
625   update (move) an ip address
626  */
627 static int32_t ctdb_do_updateip(struct ctdb_context *ctdb,
628                                 struct ctdb_req_control_old *c,
629                                 struct ctdb_vnn *vnn)
630 {
631         int ret;
632         struct ctdb_do_updateip_state *state;
633         struct ctdb_interface *old = vnn->iface;
634         const char *new_name;
635
636         if (vnn->update_in_flight) {
637                 DEBUG(DEBUG_NOTICE,("Update of IP %s/%u rejected "
638                                     "update for this IP already in flight\n",
639                                     ctdb_addr_to_str(&vnn->public_address),
640                                     vnn->public_netmask_bits));
641                 return -1;
642         }
643
644         ctdb_vnn_unassign_iface(ctdb, vnn);
645         ret = ctdb_vnn_assign_iface(ctdb, vnn);
646         if (ret != 0) {
647                 DEBUG(DEBUG_ERR,("update of IP %s/%u failed to "
648                                  "assin a usable interface (old iface '%s')\n",
649                                  ctdb_addr_to_str(&vnn->public_address),
650                                  vnn->public_netmask_bits,
651                                  old->name));
652                 return -1;
653         }
654
655         new_name = ctdb_vnn_iface_string(vnn);
656         if (old->name != NULL && new_name != NULL && !strcmp(old->name, new_name)) {
657                 /* A benign update from one interface onto itself.
658                  * no need to run the eventscripts in this case, just return
659                  * success.
660                  */
661                 ctdb_request_control_reply(ctdb, c, NULL, 0, NULL);
662                 return 0;
663         }
664
665         state = talloc(vnn, struct ctdb_do_updateip_state);
666         CTDB_NO_MEMORY(ctdb, state);
667
668         state->c = talloc_steal(ctdb, c);
669         state->old = old;
670         state->vnn = vnn;
671
672         vnn->update_in_flight = true;
673         talloc_set_destructor(state, ctdb_updateip_destructor);
674
675         DEBUG(DEBUG_NOTICE,("Update of IP %s/%u from "
676                             "interface %s to %s\n",
677                             ctdb_addr_to_str(&vnn->public_address),
678                             vnn->public_netmask_bits,
679                             old->name,
680                             new_name));
681
682         ret = ctdb_event_script_callback(ctdb,
683                                          state,
684                                          ctdb_do_updateip_callback,
685                                          state,
686                                          CTDB_EVENT_UPDATE_IP,
687                                          "%s %s %s %u",
688                                          state->old->name,
689                                          new_name,
690                                          ctdb_addr_to_str(&vnn->public_address),
691                                          vnn->public_netmask_bits);
692         if (ret != 0) {
693                 DEBUG(DEBUG_ERR,(__location__ " Failed update IP %s from interface %s to %s\n",
694                                  ctdb_addr_to_str(&vnn->public_address),
695                                  old->name, new_name));
696                 talloc_free(state);
697                 return -1;
698         }
699
700         return 0;
701 }
702
703 /*
704   Find the vnn of the node that has a public ip address
705   returns -1 if the address is not known as a public address
706  */
707 static struct ctdb_vnn *find_public_ip_vnn(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
708 {
709         struct ctdb_vnn *vnn;
710
711         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
712                 if (ctdb_same_ip(&vnn->public_address, addr)) {
713                         return vnn;
714                 }
715         }
716
717         return NULL;
718 }
719
720 /*
721   take over an ip address
722  */
723 int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb,
724                                  struct ctdb_req_control_old *c,
725                                  TDB_DATA indata,
726                                  bool *async_reply)
727 {
728         int ret;
729         struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
730         struct ctdb_vnn *vnn;
731         bool have_ip = false;
732         bool do_updateip = false;
733         bool do_takeip = false;
734         struct ctdb_interface *best_iface = NULL;
735
736         if (pip->pnn != ctdb->pnn) {
737                 DEBUG(DEBUG_ERR,(__location__" takeoverip called for an ip '%s' "
738                                  "with pnn %d, but we're node %d\n",
739                                  ctdb_addr_to_str(&pip->addr),
740                                  pip->pnn, ctdb->pnn));
741                 return -1;
742         }
743
744         /* update out vnn list */
745         vnn = find_public_ip_vnn(ctdb, &pip->addr);
746         if (vnn == NULL) {
747                 DEBUG(DEBUG_INFO,("takeoverip called for an ip '%s' that is not a public address\n",
748                         ctdb_addr_to_str(&pip->addr)));
749                 return 0;
750         }
751
752         if (ctdb->tunable.disable_ip_failover == 0 && ctdb->do_checkpublicip) {
753                 have_ip = ctdb_sys_have_ip(&pip->addr);
754         }
755         best_iface = ctdb_vnn_best_iface(ctdb, vnn);
756         if (best_iface == NULL) {
757                 DEBUG(DEBUG_ERR,("takeoverip of IP %s/%u failed to find"
758                                  "a usable interface (old %s, have_ip %d)\n",
759                                  ctdb_addr_to_str(&vnn->public_address),
760                                  vnn->public_netmask_bits,
761                                  ctdb_vnn_iface_string(vnn),
762                                  have_ip));
763                 return -1;
764         }
765
766         if (vnn->iface == NULL && vnn->pnn == -1 && have_ip && best_iface != NULL) {
767                 DEBUG(DEBUG_ERR,("Taking over newly created ip\n"));
768                 have_ip = false;
769         }
770
771
772         if (vnn->iface == NULL && have_ip) {
773                 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
774                                   "but we have no interface assigned, has someone manually configured it? Ignore for now.\n",
775                                  ctdb_addr_to_str(&vnn->public_address)));
776                 return 0;
777         }
778
779         if (vnn->pnn != ctdb->pnn && have_ip && vnn->pnn != -1) {
780                 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
781                                   "and we have it on iface[%s], but it was assigned to node %d"
782                                   "and we are node %d, banning ourself\n",
783                                  ctdb_addr_to_str(&vnn->public_address),
784                                  ctdb_vnn_iface_string(vnn), vnn->pnn, ctdb->pnn));
785                 ctdb_ban_self(ctdb);
786                 return -1;
787         }
788
789         if (vnn->pnn == -1 && have_ip) {
790                 vnn->pnn = ctdb->pnn;
791                 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
792                                   "and we already have it on iface[%s], update local daemon\n",
793                                  ctdb_addr_to_str(&vnn->public_address),
794                                   ctdb_vnn_iface_string(vnn)));
795                 return 0;
796         }
797
798         if (vnn->iface) {
799                 if (vnn->iface != best_iface) {
800                         if (!vnn->iface->link_up) {
801                                 do_updateip = true;
802                         } else if (vnn->iface->references > (best_iface->references + 1)) {
803                                 /* only move when the rebalance gains something */
804                                         do_updateip = true;
805                         }
806                 }
807         }
808
809         if (!have_ip) {
810                 if (do_updateip) {
811                         ctdb_vnn_unassign_iface(ctdb, vnn);
812                         do_updateip = false;
813                 }
814                 do_takeip = true;
815         }
816
817         if (do_takeip) {
818                 ret = ctdb_do_takeip(ctdb, c, vnn);
819                 if (ret != 0) {
820                         return -1;
821                 }
822         } else if (do_updateip) {
823                 ret = ctdb_do_updateip(ctdb, c, vnn);
824                 if (ret != 0) {
825                         return -1;
826                 }
827         } else {
828                 /*
829                  * The interface is up and the kernel known the ip
830                  * => do nothing
831                  */
832                 DEBUG(DEBUG_INFO,("Redundant takeover of IP %s/%u on interface %s (ip already held)\n",
833                         ctdb_addr_to_str(&pip->addr),
834                         vnn->public_netmask_bits,
835                         ctdb_vnn_iface_string(vnn)));
836                 return 0;
837         }
838
839         /* tell ctdb_control.c that we will be replying asynchronously */
840         *async_reply = true;
841
842         return 0;
843 }
844
845 static void do_delete_ip(struct ctdb_context *ctdb, struct ctdb_vnn *vnn)
846 {
847         DLIST_REMOVE(ctdb->vnn, vnn);
848         ctdb_vnn_unassign_iface(ctdb, vnn);
849         ctdb_remove_orphaned_ifaces(ctdb, vnn);
850         talloc_free(vnn);
851 }
852
853 /*
854   called when releaseip event finishes
855  */
856 static void release_ip_callback(struct ctdb_context *ctdb, int status, 
857                                 void *private_data)
858 {
859         struct takeover_callback_state *state = 
860                 talloc_get_type(private_data, struct takeover_callback_state);
861         TDB_DATA data;
862
863         if (status == -ETIME) {
864                 ctdb_ban_self(ctdb);
865         }
866
867         if (ctdb->tunable.disable_ip_failover == 0 && ctdb->do_checkpublicip) {
868                 if  (ctdb_sys_have_ip(state->addr)) {
869                         DEBUG(DEBUG_ERR,
870                               ("IP %s still hosted during release IP callback, failing\n",
871                                ctdb_addr_to_str(state->addr)));
872                         ctdb_request_control_reply(ctdb, state->c,
873                                                    NULL, -1, NULL);
874                         talloc_free(state);
875                         return;
876                 }
877         }
878
879         /* send a message to all clients of this node telling them
880            that the cluster has been reconfigured and they should
881            release any sockets on this IP */
882         data.dptr = (uint8_t *)talloc_strdup(state, ctdb_addr_to_str(state->addr));
883         CTDB_NO_MEMORY_VOID(ctdb, data.dptr);
884         data.dsize = strlen((char *)data.dptr)+1;
885
886         DEBUG(DEBUG_INFO,(__location__ " sending RELEASE_IP for '%s'\n", data.dptr));
887
888         ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_RELEASE_IP, data);
889
890         ctdb_vnn_unassign_iface(ctdb, state->vnn);
891
892         /* Process the IP if it has been marked for deletion */
893         if (state->vnn->delete_pending) {
894                 do_delete_ip(ctdb, state->vnn);
895                 state->vnn = NULL;
896         }
897
898         /* the control succeeded */
899         ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
900         talloc_free(state);
901 }
902
903 static int ctdb_releaseip_destructor(struct takeover_callback_state *state)
904 {
905         if (state->vnn != NULL) {
906                 state->vnn->update_in_flight = false;
907         }
908         return 0;
909 }
910
911 /*
912   release an ip address
913  */
914 int32_t ctdb_control_release_ip(struct ctdb_context *ctdb, 
915                                 struct ctdb_req_control_old *c,
916                                 TDB_DATA indata, 
917                                 bool *async_reply)
918 {
919         int ret;
920         struct takeover_callback_state *state;
921         struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
922         struct ctdb_vnn *vnn;
923         char *iface;
924
925         /* update our vnn list */
926         vnn = find_public_ip_vnn(ctdb, &pip->addr);
927         if (vnn == NULL) {
928                 DEBUG(DEBUG_INFO,("releaseip called for an ip '%s' that is not a public address\n",
929                         ctdb_addr_to_str(&pip->addr)));
930                 return 0;
931         }
932         vnn->pnn = pip->pnn;
933
934         /* stop any previous arps */
935         talloc_free(vnn->takeover_ctx);
936         vnn->takeover_ctx = NULL;
937
938         /* Some ctdb tool commands (e.g. moveip) send
939          * lazy multicast to drop an IP from any node that isn't the
940          * intended new node.  The following causes makes ctdbd ignore
941          * a release for any address it doesn't host.
942          */
943         if (ctdb->tunable.disable_ip_failover == 0 && ctdb->do_checkpublicip) {
944                 if (!ctdb_sys_have_ip(&pip->addr)) {
945                         DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u on interface %s (ip not held)\n",
946                                 ctdb_addr_to_str(&pip->addr),
947                                 vnn->public_netmask_bits,
948                                 ctdb_vnn_iface_string(vnn)));
949                         ctdb_vnn_unassign_iface(ctdb, vnn);
950                         return 0;
951                 }
952         } else {
953                 if (vnn->iface == NULL) {
954                         DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u (ip not held)\n",
955                                            ctdb_addr_to_str(&pip->addr),
956                                            vnn->public_netmask_bits));
957                         return 0;
958                 }
959         }
960
961         /* There is a potential race between take_ip and us because we
962          * update the VNN via a callback that run when the
963          * eventscripts have been run.  Avoid the race by allowing one
964          * update to be in flight at a time.
965          */
966         if (vnn->update_in_flight) {
967                 DEBUG(DEBUG_NOTICE,("Release of IP %s/%u rejected "
968                                     "update for this IP already in flight\n",
969                                     ctdb_addr_to_str(&vnn->public_address),
970                                     vnn->public_netmask_bits));
971                 return -1;
972         }
973
974         iface = strdup(ctdb_vnn_iface_string(vnn));
975
976         DEBUG(DEBUG_NOTICE,("Release of IP %s/%u on interface %s  node:%d\n",
977                 ctdb_addr_to_str(&pip->addr),
978                 vnn->public_netmask_bits,
979                 iface,
980                 pip->pnn));
981
982         state = talloc(ctdb, struct takeover_callback_state);
983         if (state == NULL) {
984                 ctdb_set_error(ctdb, "Out of memory at %s:%d",
985                                __FILE__, __LINE__);
986                 free(iface);
987                 return -1;
988         }
989
990         state->c = talloc_steal(state, c);
991         state->addr = talloc(state, ctdb_sock_addr);       
992         if (state->addr == NULL) {
993                 ctdb_set_error(ctdb, "Out of memory at %s:%d",
994                                __FILE__, __LINE__);
995                 free(iface);
996                 talloc_free(state);
997                 return -1;
998         }
999         *state->addr = pip->addr;
1000         state->vnn   = vnn;
1001
1002         vnn->update_in_flight = true;
1003         talloc_set_destructor(state, ctdb_releaseip_destructor);
1004
1005         ret = ctdb_event_script_callback(ctdb, 
1006                                          state, release_ip_callback, state,
1007                                          CTDB_EVENT_RELEASE_IP,
1008                                          "%s %s %u",
1009                                          iface,
1010                                          ctdb_addr_to_str(&pip->addr),
1011                                          vnn->public_netmask_bits);
1012         free(iface);
1013         if (ret != 0) {
1014                 DEBUG(DEBUG_ERR,(__location__ " Failed to release IP %s on interface %s\n",
1015                         ctdb_addr_to_str(&pip->addr),
1016                         ctdb_vnn_iface_string(vnn)));
1017                 talloc_free(state);
1018                 return -1;
1019         }
1020
1021         /* tell the control that we will be reply asynchronously */
1022         *async_reply = true;
1023         return 0;
1024 }
1025
1026 static int ctdb_add_public_address(struct ctdb_context *ctdb,
1027                                    ctdb_sock_addr *addr,
1028                                    unsigned mask, const char *ifaces,
1029                                    bool check_address)
1030 {
1031         struct ctdb_vnn      *vnn;
1032         uint32_t num = 0;
1033         char *tmp;
1034         const char *iface;
1035         int i;
1036         int ret;
1037
1038         tmp = strdup(ifaces);
1039         for (iface = strtok(tmp, ","); iface; iface = strtok(NULL, ",")) {
1040                 if (!ctdb_sys_check_iface_exists(iface)) {
1041                         DEBUG(DEBUG_CRIT,("Interface %s does not exist. Can not add public-address : %s\n", iface, ctdb_addr_to_str(addr)));
1042                         free(tmp);
1043                         return -1;
1044                 }
1045         }
1046         free(tmp);
1047
1048         /* Verify that we don't have an entry for this ip yet */
1049         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1050                 if (ctdb_same_sockaddr(addr, &vnn->public_address)) {
1051                         DEBUG(DEBUG_CRIT,("Same ip '%s' specified multiple times in the public address list \n", 
1052                                 ctdb_addr_to_str(addr)));
1053                         return -1;
1054                 }               
1055         }
1056
1057         /* create a new vnn structure for this ip address */
1058         vnn = talloc_zero(ctdb, struct ctdb_vnn);
1059         CTDB_NO_MEMORY_FATAL(ctdb, vnn);
1060         vnn->ifaces = talloc_array(vnn, const char *, num + 2);
1061         tmp = talloc_strdup(vnn, ifaces);
1062         CTDB_NO_MEMORY_FATAL(ctdb, tmp);
1063         for (iface = strtok(tmp, ","); iface; iface = strtok(NULL, ",")) {
1064                 vnn->ifaces = talloc_realloc(vnn, vnn->ifaces, const char *, num + 2);
1065                 CTDB_NO_MEMORY_FATAL(ctdb, vnn->ifaces);
1066                 vnn->ifaces[num] = talloc_strdup(vnn, iface);
1067                 CTDB_NO_MEMORY_FATAL(ctdb, vnn->ifaces[num]);
1068                 num++;
1069         }
1070         talloc_free(tmp);
1071         vnn->ifaces[num] = NULL;
1072         vnn->public_address      = *addr;
1073         vnn->public_netmask_bits = mask;
1074         vnn->pnn                 = -1;
1075         if (check_address) {
1076                 if (ctdb_sys_have_ip(addr)) {
1077                         DEBUG(DEBUG_ERR,("We are already hosting public address '%s'. setting PNN to ourself:%d\n", ctdb_addr_to_str(addr), ctdb->pnn));
1078                         vnn->pnn = ctdb->pnn;
1079                 }
1080         }
1081
1082         for (i=0; vnn->ifaces[i]; i++) {
1083                 ret = ctdb_add_local_iface(ctdb, vnn->ifaces[i]);
1084                 if (ret != 0) {
1085                         DEBUG(DEBUG_CRIT, (__location__ " failed to add iface[%s] "
1086                                            "for public_address[%s]\n",
1087                                            vnn->ifaces[i], ctdb_addr_to_str(addr)));
1088                         talloc_free(vnn);
1089                         return -1;
1090                 }
1091         }
1092
1093         DLIST_ADD(ctdb->vnn, vnn);
1094
1095         return 0;
1096 }
1097
1098 /*
1099   setup the public address lists from a file
1100 */
1101 int ctdb_set_public_addresses(struct ctdb_context *ctdb, bool check_addresses)
1102 {
1103         char **lines;
1104         int nlines;
1105         int i;
1106
1107         lines = file_lines_load(ctdb->public_addresses_file, &nlines, 0, ctdb);
1108         if (lines == NULL) {
1109                 ctdb_set_error(ctdb, "Failed to load public address list '%s'\n", ctdb->public_addresses_file);
1110                 return -1;
1111         }
1112         while (nlines > 0 && strcmp(lines[nlines-1], "") == 0) {
1113                 nlines--;
1114         }
1115
1116         for (i=0;i<nlines;i++) {
1117                 unsigned mask;
1118                 ctdb_sock_addr addr;
1119                 const char *addrstr;
1120                 const char *ifaces;
1121                 char *tok, *line;
1122
1123                 line = lines[i];
1124                 while ((*line == ' ') || (*line == '\t')) {
1125                         line++;
1126                 }
1127                 if (*line == '#') {
1128                         continue;
1129                 }
1130                 if (strcmp(line, "") == 0) {
1131                         continue;
1132                 }
1133                 tok = strtok(line, " \t");
1134                 addrstr = tok;
1135                 tok = strtok(NULL, " \t");
1136                 if (tok == NULL) {
1137                         if (NULL == ctdb->default_public_interface) {
1138                                 DEBUG(DEBUG_CRIT,("No default public interface and no interface specified at line %u of public address list\n",
1139                                          i+1));
1140                                 talloc_free(lines);
1141                                 return -1;
1142                         }
1143                         ifaces = ctdb->default_public_interface;
1144                 } else {
1145                         ifaces = tok;
1146                 }
1147
1148                 if (!addrstr || !parse_ip_mask(addrstr, ifaces, &addr, &mask)) {
1149                         DEBUG(DEBUG_CRIT,("Badly formed line %u in public address list\n", i+1));
1150                         talloc_free(lines);
1151                         return -1;
1152                 }
1153                 if (ctdb_add_public_address(ctdb, &addr, mask, ifaces, check_addresses)) {
1154                         DEBUG(DEBUG_CRIT,("Failed to add line %u to the public address list\n", i+1));
1155                         talloc_free(lines);
1156                         return -1;
1157                 }
1158         }
1159
1160
1161         talloc_free(lines);
1162         return 0;
1163 }
1164
1165 static struct ctdb_public_ip_list *
1166 ctdb_fetch_remote_public_ips(struct ctdb_context *ctdb,
1167                              TALLOC_CTX *mem_ctx,
1168                              struct ctdb_node_map_old *nodemap,
1169                              uint32_t public_ip_flags)
1170 {
1171         int j, ret;
1172         struct ctdb_public_ip_list_old *ip_list;
1173         struct ctdb_public_ip_list *public_ips;
1174
1175         public_ips = talloc_zero_array(mem_ctx,
1176                                        struct ctdb_public_ip_list,
1177                                        nodemap->num);
1178         if (public_ips == NULL) {
1179                 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
1180                 return NULL;
1181         }
1182
1183         for (j = 0; j < nodemap->num; j++) {
1184                 if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) {
1185                         continue;
1186                 }
1187
1188                 /* Retrieve the list of public IPs from the
1189                  * node. Flags says whether it is known or
1190                  * available. */
1191                 ret = ctdb_ctrl_get_public_ips_flags(
1192                         ctdb, TAKEOVER_TIMEOUT(), j, public_ips,
1193                         public_ip_flags, &ip_list);
1194                 if (ret != 0) {
1195                         DEBUG(DEBUG_ERR,
1196                               ("Failed to read public IPs from node: %u\n", j));
1197                         talloc_free(public_ips);
1198                         return NULL;
1199                 }
1200                 public_ips[j].num = ip_list->num;
1201                 if (ip_list->num == 0) {
1202                         talloc_free(ip_list);
1203                         continue;
1204                 }
1205                 public_ips[j].ip = talloc_zero_array(public_ips,
1206                                                      struct ctdb_public_ip,
1207                                                      ip_list->num);
1208                 if (public_ips[j].ip == NULL) {
1209                         DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
1210                         talloc_free(public_ips);
1211                         return NULL;
1212                 }
1213                 memcpy(public_ips[j].ip, &ip_list->ips[0],
1214                        sizeof(struct ctdb_public_ip) * ip_list->num);
1215                 talloc_free(ip_list);
1216         }
1217
1218         return public_ips;
1219 }
1220
1221 struct get_tunable_callback_data {
1222         const char *tunable;
1223         uint32_t *out;
1224         bool fatal;
1225 };
1226
1227 static void get_tunable_callback(struct ctdb_context *ctdb, uint32_t pnn,
1228                                  int32_t res, TDB_DATA outdata,
1229                                  void *callback)
1230 {
1231         struct get_tunable_callback_data *cd =
1232                 (struct get_tunable_callback_data *)callback;
1233         int size;
1234
1235         if (res != 0) {
1236                 /* Already handled in fail callback */
1237                 return;
1238         }
1239
1240         if (outdata.dsize != sizeof(uint32_t)) {
1241                 DEBUG(DEBUG_ERR,("Wrong size of returned data when reading \"%s\" tunable from node %d. Expected %d bytes but received %d bytes\n",
1242                                  cd->tunable, pnn, (int)sizeof(uint32_t),
1243                                  (int)outdata.dsize));
1244                 cd->fatal = true;
1245                 return;
1246         }
1247
1248         size = talloc_array_length(cd->out);
1249         if (pnn >= size) {
1250                 DEBUG(DEBUG_ERR,("Got %s reply from node %d but nodemap only has %d entries\n",
1251                                  cd->tunable, pnn, size));
1252                 return;
1253         }
1254
1255                 
1256         cd->out[pnn] = *(uint32_t *)outdata.dptr;
1257 }
1258
1259 static void get_tunable_fail_callback(struct ctdb_context *ctdb, uint32_t pnn,
1260                                        int32_t res, TDB_DATA outdata,
1261                                        void *callback)
1262 {
1263         struct get_tunable_callback_data *cd =
1264                 (struct get_tunable_callback_data *)callback;
1265
1266         switch (res) {
1267         case -ETIME:
1268                 DEBUG(DEBUG_ERR,
1269                       ("Timed out getting tunable \"%s\" from node %d\n",
1270                        cd->tunable, pnn));
1271                 cd->fatal = true;
1272                 break;
1273         case -EINVAL:
1274         case -1:
1275                 DEBUG(DEBUG_WARNING,
1276                       ("Tunable \"%s\" not implemented on node %d\n",
1277                        cd->tunable, pnn));
1278                 break;
1279         default:
1280                 DEBUG(DEBUG_ERR,
1281                       ("Unexpected error getting tunable \"%s\" from node %d\n",
1282                        cd->tunable, pnn));
1283                 cd->fatal = true;
1284         }
1285 }
1286
1287 static uint32_t *get_tunable_from_nodes(struct ctdb_context *ctdb,
1288                                         TALLOC_CTX *tmp_ctx,
1289                                         struct ctdb_node_map_old *nodemap,
1290                                         const char *tunable,
1291                                         uint32_t default_value)
1292 {
1293         TDB_DATA data;
1294         struct ctdb_control_get_tunable *t;
1295         uint32_t *nodes;
1296         uint32_t *tvals;
1297         struct get_tunable_callback_data callback_data;
1298         int i;
1299
1300         tvals = talloc_array(tmp_ctx, uint32_t, nodemap->num);
1301         CTDB_NO_MEMORY_NULL(ctdb, tvals);
1302         for (i=0; i<nodemap->num; i++) {
1303                 tvals[i] = default_value;
1304         }
1305                 
1306         callback_data.out = tvals;
1307         callback_data.tunable = tunable;
1308         callback_data.fatal = false;
1309
1310         data.dsize = offsetof(struct ctdb_control_get_tunable, name) + strlen(tunable) + 1;
1311         data.dptr  = talloc_size(tmp_ctx, data.dsize);
1312         t = (struct ctdb_control_get_tunable *)data.dptr;
1313         t->length = strlen(tunable)+1;
1314         memcpy(t->name, tunable, t->length);
1315         nodes = list_of_connected_nodes(ctdb, nodemap, tmp_ctx, true);
1316         if (ctdb_client_async_control(ctdb, CTDB_CONTROL_GET_TUNABLE,
1317                                       nodes, 0, TAKEOVER_TIMEOUT(),
1318                                       false, data,
1319                                       get_tunable_callback,
1320                                       get_tunable_fail_callback,
1321                                       &callback_data) != 0) {
1322                 if (callback_data.fatal) {
1323                         talloc_free(tvals);
1324                         tvals = NULL;
1325                 }
1326         }
1327         talloc_free(nodes);
1328         talloc_free(data.dptr);
1329
1330         return tvals;
1331 }
1332
1333 static struct ctdb_node_map *
1334 ctdb_node_map_old_to_new(TALLOC_CTX *mem_ctx,
1335                          const struct ctdb_node_map_old *old)
1336 {
1337         struct ctdb_node_map *new;
1338
1339         new = talloc(mem_ctx, struct ctdb_node_map);
1340         if (new == NULL) {
1341                 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
1342                 return NULL;
1343         }
1344         new->num = old->num;
1345         new->node = talloc_zero_array(new,
1346                                       struct ctdb_node_and_flags, new->num);
1347         memcpy(new->node, &old->nodes[0],
1348                sizeof(struct ctdb_node_and_flags) * new->num);
1349
1350         return new;
1351 }
1352
1353
1354 static bool set_ipflags(struct ctdb_context *ctdb,
1355                         struct ipalloc_state *ipalloc_state,
1356                         struct ctdb_node_map_old *nodemap)
1357 {
1358         uint32_t *tval_noiptakeover;
1359         uint32_t *tval_noiphostonalldisabled;
1360         struct ctdb_node_map *new;
1361
1362         tval_noiptakeover = get_tunable_from_nodes(ctdb, ipalloc_state, nodemap,
1363                                                    "NoIPTakeover", 0);
1364         if (tval_noiptakeover == NULL) {
1365                 return false;
1366         }
1367
1368         tval_noiphostonalldisabled =
1369                 get_tunable_from_nodes(ctdb, ipalloc_state, nodemap,
1370                                        "NoIPHostOnAllDisabled", 0);
1371         if (tval_noiphostonalldisabled == NULL) {
1372                 /* Caller frees tmp_ctx */
1373                 return false;
1374         }
1375
1376         new = ctdb_node_map_old_to_new(ipalloc_state, nodemap);
1377         if (new == NULL) {
1378                 return false;
1379         }
1380
1381         ipalloc_set_node_flags(ipalloc_state, new,
1382                              tval_noiptakeover,
1383                              tval_noiphostonalldisabled);
1384
1385         talloc_free(tval_noiptakeover);
1386         talloc_free(tval_noiphostonalldisabled);
1387         talloc_free(new);
1388
1389         return true;
1390 }
1391
1392 static enum ipalloc_algorithm
1393 determine_algorithm(const struct ctdb_tunable_list *tunables)
1394 {
1395         if (1 == tunables->lcp2_public_ip_assignment) {
1396                 return IPALLOC_LCP2;
1397         } else if (1 == tunables->deterministic_public_ips) {
1398                 return IPALLOC_DETERMINISTIC;
1399         } else {
1400                 return IPALLOC_NONDETERMINISTIC;
1401         }
1402 }
1403
1404 struct takeover_callback_data {
1405         uint32_t num_nodes;
1406         unsigned int *fail_count;
1407 };
1408
1409 static struct takeover_callback_data *
1410 takeover_callback_data_init(TALLOC_CTX *mem_ctx,
1411                             uint32_t num_nodes)
1412 {
1413         static struct takeover_callback_data *takeover_data;
1414
1415         takeover_data = talloc_zero(mem_ctx, struct takeover_callback_data);
1416         if (takeover_data == NULL) {
1417                 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
1418                 return NULL;
1419         }
1420
1421         takeover_data->fail_count = talloc_zero_array(takeover_data,
1422                                                       unsigned int, num_nodes);
1423         if (takeover_data->fail_count == NULL) {
1424                 DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
1425                 talloc_free(takeover_data);
1426                 return NULL;
1427         }
1428
1429         takeover_data->num_nodes = num_nodes;
1430
1431         return takeover_data;
1432 }
1433
1434 static void takeover_run_fail_callback(struct ctdb_context *ctdb,
1435                                        uint32_t node_pnn, int32_t res,
1436                                        TDB_DATA outdata, void *callback_data)
1437 {
1438         struct takeover_callback_data *cd =
1439                 talloc_get_type_abort(callback_data,
1440                                       struct takeover_callback_data);
1441
1442         if (node_pnn >= cd->num_nodes) {
1443                 DEBUG(DEBUG_ERR, (__location__ " invalid PNN %u\n", node_pnn));
1444                 return;
1445         }
1446
1447         if (cd->fail_count[node_pnn] == 0) {
1448                 DEBUG(DEBUG_ERR,
1449                       ("Node %u failed the takeover run\n", node_pnn));
1450         }
1451
1452         cd->fail_count[node_pnn]++;
1453 }
1454
1455 static void takeover_run_process_failures(struct ctdb_context *ctdb,
1456                                           struct takeover_callback_data *tcd)
1457 {
1458         unsigned int max_fails = 0;
1459         uint32_t max_pnn = -1;
1460         uint32_t i;
1461
1462         for (i = 0; i < tcd->num_nodes; i++) {
1463                 if (tcd->fail_count[i] > max_fails) {
1464                         max_pnn = i;
1465                         max_fails = tcd->fail_count[i];
1466                 }
1467         }
1468
1469         if (max_fails > 0) {
1470                 int ret;
1471                 TDB_DATA data;
1472
1473                 DEBUG(DEBUG_ERR,
1474                       ("Sending banning credits to %u with fail count %u\n",
1475                        max_pnn, max_fails));
1476
1477                 data.dptr = (uint8_t *)&max_pnn;
1478                 data.dsize = sizeof(uint32_t);
1479                 ret = ctdb_client_send_message(ctdb,
1480                                                CTDB_BROADCAST_CONNECTED,
1481                                                CTDB_SRVID_BANNING,
1482                                                data);
1483                 if (ret != 0) {
1484                         DEBUG(DEBUG_ERR,
1485                               ("Failed to set banning credits for node %u\n",
1486                                max_pnn));
1487                 }
1488         }
1489 }
1490
1491 /*
1492  * Recalculate the allocation of public IPs to nodes and have the
1493  * nodes host their allocated addresses.
1494  *
1495  * - Initialise IP allocation state.  Pass:
1496      + algorithm to be used;
1497      + whether IP rebalancing ("failback") should be done (this uses a
1498        cluster-wide configuration variable and only the value form the
1499        master node is used); and
1500  *   + list of nodes to force rebalance (internal structure, currently
1501  *     no way to fetch, only used by LCP2 for nodes that have had new
1502  *     IP addresses added).
1503  * - Set IP flags for IP allocation based on node map and tunables
1504  *   NoIPTakeover/NoIPHostOnAllDisabled from all connected nodes
1505  *   (tunable fetching done separately so values can be faked in unit
1506  *   testing)
1507  * - Retrieve known and available IP addresses (done separately so
1508  *   values can be faked in unit testing)
1509  * - Use ipalloc_set_public_ips() to set known and available IP
1510      addresses for allocation
1511  * - If cluster can't host IP addresses then early exit
1512  * - Run IP allocation algorithm
1513  * - Send RELEASE_IP to all nodes for IPs they should not host
1514  * - Send TAKE_IP to all nodes for IPs they should host
1515  * - Send IPREALLOCATED to all nodes (with backward compatibility hack)
1516  */
1517 int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map_old *nodemap,
1518                       uint32_t *force_rebalance_nodes)
1519 {
1520         int i, ret;
1521         struct ctdb_public_ip ip;
1522         uint32_t *nodes;
1523         struct public_ip_list *all_ips, *tmp_ip;
1524         TDB_DATA data;
1525         struct timeval timeout;
1526         struct client_async_data *async_data;
1527         struct ctdb_client_control_state *state;
1528         TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
1529         struct ipalloc_state *ipalloc_state;
1530         struct ctdb_public_ip_list *known_ips, *available_ips;
1531         struct takeover_callback_data *takeover_data;
1532
1533         /* Initialise fail callback data to be used with
1534          * takeover_run_fail_callback().  A failure in any of the
1535          * following steps will cause an early return, so this can be
1536          * reused for each of those steps without re-initialising. */
1537         takeover_data = takeover_callback_data_init(tmp_ctx,
1538                                                     nodemap->num);
1539         if (takeover_data == NULL) {
1540                 talloc_free(tmp_ctx);
1541                 return -1;
1542         }
1543
1544         /* Each of the later stages (RELEASE_IP, TAKEOVER_IP,
1545          * IPREALLOCATED) notionally has a timeout of TakeoverTimeout
1546          * seconds.  However, RELEASE_IP can take longer due to TCP
1547          * connection killing, so sometimes needs more time.
1548          * Therefore, use a cumulative timeout of TakeoverTimeout * 3
1549          * seconds across all 3 stages.  No explicit expiry checks are
1550          * needed before each stage because tevent is smart enough to
1551          * fire the timeouts even if they are in the past.  Initialise
1552          * this here to cope with early jumps to IPREALLOCATED. */
1553         timeout = timeval_current_ofs(3 * ctdb->tunable.takeover_timeout,0);
1554
1555         /*
1556          * ip failover is completely disabled, just send out the 
1557          * ipreallocated event.
1558          */
1559         if (ctdb->tunable.disable_ip_failover != 0) {
1560                 goto ipreallocated;
1561         }
1562
1563         ipalloc_state = ipalloc_state_init(tmp_ctx, ctdb->num_nodes,
1564                                            determine_algorithm(&ctdb->tunable),
1565                                            (ctdb->tunable.no_ip_failback != 0),
1566                                            force_rebalance_nodes);
1567         if (ipalloc_state == NULL) {
1568                 talloc_free(tmp_ctx);
1569                 return -1;
1570         }
1571
1572         if (!set_ipflags(ctdb, ipalloc_state, nodemap)) {
1573                 DEBUG(DEBUG_ERR,
1574                       ("Failed to set IP flags - aborting takeover run\n"));
1575                 talloc_free(tmp_ctx);
1576                 return -1;
1577         }
1578
1579         /* Fetch known/available public IPs from each active node */
1580         /* Fetch lists of known public IPs from all nodes */
1581         known_ips = ctdb_fetch_remote_public_ips(ctdb, ipalloc_state,
1582                                                  nodemap, 0);
1583         if (known_ips == NULL) {
1584                 DEBUG(DEBUG_ERR, ("Failed to read known public IPs\n"));
1585                 talloc_free(tmp_ctx);
1586                 return -1;
1587         }
1588         available_ips = ctdb_fetch_remote_public_ips(
1589                 ctdb, ipalloc_state, nodemap,
1590                 CTDB_PUBLIC_IP_FLAGS_ONLY_AVAILABLE);
1591         if (available_ips == NULL) {
1592                 DEBUG(DEBUG_ERR, ("Failed to read available public IPs\n"));
1593                 talloc_free(tmp_ctx);
1594                 return -1;
1595         }
1596
1597         if (! ipalloc_set_public_ips(ipalloc_state, known_ips, available_ips)) {
1598                 DEBUG(DEBUG_ERR, ("Failed to set public IPs\n"));
1599                 talloc_free(tmp_ctx);
1600                 return -1;
1601         }
1602
1603         if (! ipalloc_can_host_ips(ipalloc_state)) {
1604                 DEBUG(DEBUG_WARNING,("No nodes available to host public IPs yet\n"));
1605                 goto ipreallocated;
1606         }
1607
1608         /* Do the IP reassignment calculations */
1609         all_ips = ipalloc(ipalloc_state);
1610         if (all_ips == NULL) {
1611                 talloc_free(tmp_ctx);
1612                 return -1;
1613         }
1614
1615         /* Now tell all nodes to release any public IPs should not
1616          * host.  This will be a NOOP on nodes that don't currently
1617          * hold the given IP.
1618          */
1619         async_data = talloc_zero(tmp_ctx, struct client_async_data);
1620         CTDB_NO_MEMORY_FATAL(ctdb, async_data);
1621
1622         async_data->fail_callback = takeover_run_fail_callback;
1623         async_data->callback_data = takeover_data;
1624
1625         ZERO_STRUCT(ip); /* Avoid valgrind warnings for union */
1626
1627         /* Send a RELEASE_IP to all nodes that should not be hosting
1628          * each IP.  For each IP, all but one of these will be
1629          * redundant.  However, the redundant ones are used to tell
1630          * nodes which node should be hosting the IP so that commands
1631          * like "ctdb ip" can display a particular nodes idea of who
1632          * is hosting what. */
1633         for (i=0;i<nodemap->num;i++) {
1634                 /* don't talk to unconnected nodes, but do talk to banned nodes */
1635                 if (nodemap->nodes[i].flags & NODE_FLAGS_DISCONNECTED) {
1636                         continue;
1637                 }
1638
1639                 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1640                         if (tmp_ip->pnn == nodemap->nodes[i].pnn) {
1641                                 /* This node should be serving this
1642                                    vnn so don't tell it to release the ip
1643                                 */
1644                                 continue;
1645                         }
1646                         ip.pnn  = tmp_ip->pnn;
1647                         ip.addr = tmp_ip->addr;
1648
1649                         data.dsize = sizeof(ip);
1650                         data.dptr  = (uint8_t *)&ip;
1651                         state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
1652                                                   0, CTDB_CONTROL_RELEASE_IP, 0,
1653                                                   data, async_data,
1654                                                   &timeout, NULL);
1655                         if (state == NULL) {
1656                                 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_RELEASE_IP to node %u\n", nodemap->nodes[i].pnn));
1657                                 talloc_free(tmp_ctx);
1658                                 return -1;
1659                         }
1660
1661                         ctdb_client_async_add(async_data, state);
1662                 }
1663         }
1664         if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1665                 DEBUG(DEBUG_ERR,
1666                       ("Async control CTDB_CONTROL_RELEASE_IP failed\n"));
1667                 goto fail;
1668         }
1669         talloc_free(async_data);
1670
1671
1672         /* For each IP, send a TAKOVER_IP to the node that should be
1673          * hosting it.  Many of these will often be redundant (since
1674          * the allocation won't have changed) but they can be useful
1675          * to recover from inconsistencies. */
1676         async_data = talloc_zero(tmp_ctx, struct client_async_data);
1677         CTDB_NO_MEMORY_FATAL(ctdb, async_data);
1678
1679         async_data->fail_callback = takeover_run_fail_callback;
1680         async_data->callback_data = takeover_data;
1681
1682         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1683                 if (tmp_ip->pnn == -1) {
1684                         /* this IP won't be taken over */
1685                         continue;
1686                 }
1687
1688                 ip.pnn  = tmp_ip->pnn;
1689                 ip.addr = tmp_ip->addr;
1690
1691                 data.dsize = sizeof(ip);
1692                 data.dptr  = (uint8_t *)&ip;
1693                 state = ctdb_control_send(ctdb, tmp_ip->pnn,
1694                                           0, CTDB_CONTROL_TAKEOVER_IP, 0,
1695                                           data, async_data, &timeout, NULL);
1696                 if (state == NULL) {
1697                         DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_TAKEOVER_IP to node %u\n", tmp_ip->pnn));
1698                         talloc_free(tmp_ctx);
1699                         return -1;
1700                 }
1701
1702                 ctdb_client_async_add(async_data, state);
1703         }
1704         if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1705                 DEBUG(DEBUG_ERR,
1706                       ("Async control CTDB_CONTROL_TAKEOVER_IP failed\n"));
1707                 goto fail;
1708         }
1709
1710 ipreallocated:
1711         /*
1712          * Tell all nodes to run eventscripts to process the
1713          * "ipreallocated" event.  This can do a lot of things,
1714          * including restarting services to reconfigure them if public
1715          * IPs have moved.  Once upon a time this event only used to
1716          * update natgw.
1717          */
1718         nodes = list_of_connected_nodes(ctdb, nodemap, tmp_ctx, true);
1719         ret = ctdb_client_async_control(ctdb, CTDB_CONTROL_IPREALLOCATED,
1720                                         nodes, 0, timeout,
1721                                         false, tdb_null,
1722                                         NULL, takeover_run_fail_callback,
1723                                         takeover_data);
1724         if (ret != 0) {
1725                 DEBUG(DEBUG_ERR,
1726                       ("Async CTDB_CONTROL_IPREALLOCATED control failed\n"));
1727                 goto fail;
1728         }
1729
1730         talloc_free(tmp_ctx);
1731         return ret;
1732
1733 fail:
1734         takeover_run_process_failures(ctdb, takeover_data);
1735         talloc_free(tmp_ctx);
1736         return -1;
1737 }
1738
1739
1740 /*
1741   destroy a ctdb_client_ip structure
1742  */
1743 static int ctdb_client_ip_destructor(struct ctdb_client_ip *ip)
1744 {
1745         DEBUG(DEBUG_DEBUG,("destroying client tcp for %s:%u (client_id %u)\n",
1746                 ctdb_addr_to_str(&ip->addr),
1747                 ntohs(ip->addr.ip.sin_port),
1748                 ip->client_id));
1749
1750         DLIST_REMOVE(ip->ctdb->client_ip_list, ip);
1751         return 0;
1752 }
1753
1754 /*
1755   called by a client to inform us of a TCP connection that it is managing
1756   that should tickled with an ACK when IP takeover is done
1757  */
1758 int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id,
1759                                 TDB_DATA indata)
1760 {
1761         struct ctdb_client *client = reqid_find(ctdb->idr, client_id, struct ctdb_client);
1762         struct ctdb_connection *tcp_sock = NULL;
1763         struct ctdb_tcp_list *tcp;
1764         struct ctdb_connection t;
1765         int ret;
1766         TDB_DATA data;
1767         struct ctdb_client_ip *ip;
1768         struct ctdb_vnn *vnn;
1769         ctdb_sock_addr addr;
1770
1771         /* If we don't have public IPs, tickles are useless */
1772         if (ctdb->vnn == NULL) {
1773                 return 0;
1774         }
1775
1776         tcp_sock = (struct ctdb_connection *)indata.dptr;
1777
1778         addr = tcp_sock->src;
1779         ctdb_canonicalize_ip(&addr,  &tcp_sock->src);
1780         addr = tcp_sock->dst;
1781         ctdb_canonicalize_ip(&addr, &tcp_sock->dst);
1782
1783         ZERO_STRUCT(addr);
1784         memcpy(&addr, &tcp_sock->dst, sizeof(addr));
1785         vnn = find_public_ip_vnn(ctdb, &addr);
1786         if (vnn == NULL) {
1787                 switch (addr.sa.sa_family) {
1788                 case AF_INET:
1789                         if (ntohl(addr.ip.sin_addr.s_addr) != INADDR_LOOPBACK) {
1790                                 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public address.\n", 
1791                                         ctdb_addr_to_str(&addr)));
1792                         }
1793                         break;
1794                 case AF_INET6:
1795                         DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public ipv6 address.\n", 
1796                                 ctdb_addr_to_str(&addr)));
1797                         break;
1798                 default:
1799                         DEBUG(DEBUG_ERR,(__location__ " Unknown family type %d\n", addr.sa.sa_family));
1800                 }
1801
1802                 return 0;
1803         }
1804
1805         if (vnn->pnn != ctdb->pnn) {
1806                 DEBUG(DEBUG_ERR,("Attempt to register tcp client for IP %s we don't hold - failing (client_id %u pid %u)\n",
1807                         ctdb_addr_to_str(&addr),
1808                         client_id, client->pid));
1809                 /* failing this call will tell smbd to die */
1810                 return -1;
1811         }
1812
1813         ip = talloc(client, struct ctdb_client_ip);
1814         CTDB_NO_MEMORY(ctdb, ip);
1815
1816         ip->ctdb      = ctdb;
1817         ip->addr      = addr;
1818         ip->client_id = client_id;
1819         talloc_set_destructor(ip, ctdb_client_ip_destructor);
1820         DLIST_ADD(ctdb->client_ip_list, ip);
1821
1822         tcp = talloc(client, struct ctdb_tcp_list);
1823         CTDB_NO_MEMORY(ctdb, tcp);
1824
1825         tcp->connection.src = tcp_sock->src;
1826         tcp->connection.dst = tcp_sock->dst;
1827
1828         DLIST_ADD(client->tcp_list, tcp);
1829
1830         t.src = tcp_sock->src;
1831         t.dst = tcp_sock->dst;
1832
1833         data.dptr = (uint8_t *)&t;
1834         data.dsize = sizeof(t);
1835
1836         switch (addr.sa.sa_family) {
1837         case AF_INET:
1838                 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1839                         (unsigned)ntohs(tcp_sock->dst.ip.sin_port),
1840                         ctdb_addr_to_str(&tcp_sock->src),
1841                         (unsigned)ntohs(tcp_sock->src.ip.sin_port), client_id, client->pid));
1842                 break;
1843         case AF_INET6:
1844                 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1845                         (unsigned)ntohs(tcp_sock->dst.ip6.sin6_port),
1846                         ctdb_addr_to_str(&tcp_sock->src),
1847                         (unsigned)ntohs(tcp_sock->src.ip6.sin6_port), client_id, client->pid));
1848                 break;
1849         default:
1850                 DEBUG(DEBUG_ERR,(__location__ " Unknown family %d\n", addr.sa.sa_family));
1851         }
1852
1853
1854         /* tell all nodes about this tcp connection */
1855         ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0, 
1856                                        CTDB_CONTROL_TCP_ADD,
1857                                        0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
1858         if (ret != 0) {
1859                 DEBUG(DEBUG_ERR,(__location__ " Failed to send CTDB_CONTROL_TCP_ADD\n"));
1860                 return -1;
1861         }
1862
1863         return 0;
1864 }
1865
1866 /*
1867   find a tcp address on a list
1868  */
1869 static struct ctdb_connection *ctdb_tcp_find(struct ctdb_tcp_array *array,
1870                                            struct ctdb_connection *tcp)
1871 {
1872         int i;
1873
1874         if (array == NULL) {
1875                 return NULL;
1876         }
1877
1878         for (i=0;i<array->num;i++) {
1879                 if (ctdb_same_sockaddr(&array->connections[i].src, &tcp->src) &&
1880                     ctdb_same_sockaddr(&array->connections[i].dst, &tcp->dst)) {
1881                         return &array->connections[i];
1882                 }
1883         }
1884         return NULL;
1885 }
1886
1887
1888
1889 /*
1890   called by a daemon to inform us of a TCP connection that one of its
1891   clients managing that should tickled with an ACK when IP takeover is
1892   done
1893  */
1894 int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata, bool tcp_update_needed)
1895 {
1896         struct ctdb_connection *p = (struct ctdb_connection *)indata.dptr;
1897         struct ctdb_tcp_array *tcparray;
1898         struct ctdb_connection tcp;
1899         struct ctdb_vnn *vnn;
1900
1901         /* If we don't have public IPs, tickles are useless */
1902         if (ctdb->vnn == NULL) {
1903                 return 0;
1904         }
1905
1906         vnn = find_public_ip_vnn(ctdb, &p->dst);
1907         if (vnn == NULL) {
1908                 DEBUG(DEBUG_INFO,(__location__ " got TCP_ADD control for an address which is not a public address '%s'\n",
1909                         ctdb_addr_to_str(&p->dst)));
1910
1911                 return -1;
1912         }
1913
1914
1915         tcparray = vnn->tcp_array;
1916
1917         /* If this is the first tickle */
1918         if (tcparray == NULL) {
1919                 tcparray = talloc(vnn, struct ctdb_tcp_array);
1920                 CTDB_NO_MEMORY(ctdb, tcparray);
1921                 vnn->tcp_array = tcparray;
1922
1923                 tcparray->num = 0;
1924                 tcparray->connections = talloc_size(tcparray, sizeof(struct ctdb_connection));
1925                 CTDB_NO_MEMORY(ctdb, tcparray->connections);
1926
1927                 tcparray->connections[tcparray->num].src = p->src;
1928                 tcparray->connections[tcparray->num].dst = p->dst;
1929                 tcparray->num++;
1930
1931                 if (tcp_update_needed) {
1932                         vnn->tcp_update_needed = true;
1933                 }
1934                 return 0;
1935         }
1936
1937
1938         /* Do we already have this tickle ?*/
1939         tcp.src = p->src;
1940         tcp.dst = p->dst;
1941         if (ctdb_tcp_find(tcparray, &tcp) != NULL) {
1942                 DEBUG(DEBUG_DEBUG,("Already had tickle info for %s:%u for vnn:%u\n",
1943                         ctdb_addr_to_str(&tcp.dst),
1944                         ntohs(tcp.dst.ip.sin_port),
1945                         vnn->pnn));
1946                 return 0;
1947         }
1948
1949         /* A new tickle, we must add it to the array */
1950         tcparray->connections = talloc_realloc(tcparray, tcparray->connections,
1951                                         struct ctdb_connection,
1952                                         tcparray->num+1);
1953         CTDB_NO_MEMORY(ctdb, tcparray->connections);
1954
1955         tcparray->connections[tcparray->num].src = p->src;
1956         tcparray->connections[tcparray->num].dst = p->dst;
1957         tcparray->num++;
1958
1959         DEBUG(DEBUG_INFO,("Added tickle info for %s:%u from vnn %u\n",
1960                 ctdb_addr_to_str(&tcp.dst),
1961                 ntohs(tcp.dst.ip.sin_port),
1962                 vnn->pnn));
1963
1964         if (tcp_update_needed) {
1965                 vnn->tcp_update_needed = true;
1966         }
1967
1968         return 0;
1969 }
1970
1971
1972 static void ctdb_remove_connection(struct ctdb_vnn *vnn, struct ctdb_connection *conn)
1973 {
1974         struct ctdb_connection *tcpp;
1975
1976         if (vnn == NULL) {
1977                 return;
1978         }
1979
1980         /* if the array is empty we cant remove it
1981            and we don't need to do anything
1982          */
1983         if (vnn->tcp_array == NULL) {
1984                 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist (array is empty) %s:%u\n",
1985                         ctdb_addr_to_str(&conn->dst),
1986                         ntohs(conn->dst.ip.sin_port)));
1987                 return;
1988         }
1989
1990
1991         /* See if we know this connection
1992            if we don't know this connection  then we dont need to do anything
1993          */
1994         tcpp = ctdb_tcp_find(vnn->tcp_array, conn);
1995         if (tcpp == NULL) {
1996                 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist %s:%u\n",
1997                         ctdb_addr_to_str(&conn->dst),
1998                         ntohs(conn->dst.ip.sin_port)));
1999                 return;
2000         }
2001
2002
2003         /* We need to remove this entry from the array.
2004            Instead of allocating a new array and copying data to it
2005            we cheat and just copy the last entry in the existing array
2006            to the entry that is to be removed and just shring the 
2007            ->num field
2008          */
2009         *tcpp = vnn->tcp_array->connections[vnn->tcp_array->num - 1];
2010         vnn->tcp_array->num--;
2011
2012         /* If we deleted the last entry we also need to remove the entire array
2013          */
2014         if (vnn->tcp_array->num == 0) {
2015                 talloc_free(vnn->tcp_array);
2016                 vnn->tcp_array = NULL;
2017         }               
2018
2019         vnn->tcp_update_needed = true;
2020
2021         DEBUG(DEBUG_INFO,("Removed tickle info for %s:%u\n",
2022                 ctdb_addr_to_str(&conn->src),
2023                 ntohs(conn->src.ip.sin_port)));
2024 }
2025
2026
2027 /*
2028   called by a daemon to inform us of a TCP connection that one of its
2029   clients used are no longer needed in the tickle database
2030  */
2031 int32_t ctdb_control_tcp_remove(struct ctdb_context *ctdb, TDB_DATA indata)
2032 {
2033         struct ctdb_vnn *vnn;
2034         struct ctdb_connection *conn = (struct ctdb_connection *)indata.dptr;
2035
2036         /* If we don't have public IPs, tickles are useless */
2037         if (ctdb->vnn == NULL) {
2038                 return 0;
2039         }
2040
2041         vnn = find_public_ip_vnn(ctdb, &conn->dst);
2042         if (vnn == NULL) {
2043                 DEBUG(DEBUG_ERR,
2044                       (__location__ " unable to find public address %s\n",
2045                        ctdb_addr_to_str(&conn->dst)));
2046                 return 0;
2047         }
2048
2049         ctdb_remove_connection(vnn, conn);
2050
2051         return 0;
2052 }
2053
2054
2055 /*
2056   Called when another daemon starts - causes all tickles for all
2057   public addresses we are serving to be sent to the new node on the
2058   next check.  This actually causes the next scheduled call to
2059   tdb_update_tcp_tickles() to update all nodes.  This is simple and
2060   doesn't require careful error handling.
2061  */
2062 int32_t ctdb_control_startup(struct ctdb_context *ctdb, uint32_t pnn)
2063 {
2064         struct ctdb_vnn *vnn;
2065
2066         DEBUG(DEBUG_INFO, ("Received startup control from node %lu\n",
2067                            (unsigned long) pnn));
2068
2069         for (vnn = ctdb->vnn; vnn != NULL; vnn = vnn->next) {
2070                 vnn->tcp_update_needed = true;
2071         }
2072
2073         return 0;
2074 }
2075
2076
2077 /*
2078   called when a client structure goes away - hook to remove
2079   elements from the tcp_list in all daemons
2080  */
2081 void ctdb_takeover_client_destructor_hook(struct ctdb_client *client)
2082 {
2083         while (client->tcp_list) {
2084                 struct ctdb_vnn *vnn;
2085                 struct ctdb_tcp_list *tcp = client->tcp_list;
2086                 struct ctdb_connection *conn = &tcp->connection;
2087
2088                 DLIST_REMOVE(client->tcp_list, tcp);
2089
2090                 vnn = find_public_ip_vnn(client->ctdb,
2091                                          &conn->dst);
2092                 if (vnn == NULL) {
2093                         DEBUG(DEBUG_ERR,
2094                               (__location__ " unable to find public address %s\n",
2095                                ctdb_addr_to_str(&conn->dst)));
2096                         continue;
2097                 }
2098
2099                 /* If the IP address is hosted on this node then
2100                  * remove the connection. */
2101                 if (vnn->pnn == client->ctdb->pnn) {
2102                         ctdb_remove_connection(vnn, conn);
2103                 }
2104
2105                 /* Otherwise this function has been called because the
2106                  * server IP address has been released to another node
2107                  * and the client has exited.  This means that we
2108                  * should not delete the connection information.  The
2109                  * takeover node processes connections too. */
2110         }
2111 }
2112
2113
2114 void ctdb_release_all_ips(struct ctdb_context *ctdb)
2115 {
2116         struct ctdb_vnn *vnn;
2117         int count = 0;
2118         TDB_DATA data;
2119
2120         if (ctdb->tunable.disable_ip_failover == 1) {
2121                 return;
2122         }
2123
2124         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2125                 if (!ctdb_sys_have_ip(&vnn->public_address)) {
2126                         ctdb_vnn_unassign_iface(ctdb, vnn);
2127                         continue;
2128                 }
2129                 if (!vnn->iface) {
2130                         continue;
2131                 }
2132
2133                 /* Don't allow multiple releases at once.  Some code,
2134                  * particularly ctdb_tickle_sentenced_connections() is
2135                  * not re-entrant */
2136                 if (vnn->update_in_flight) {
2137                         DEBUG(DEBUG_WARNING,
2138                               (__location__
2139                                " Not releasing IP %s/%u on interface %s, an update is already in progess\n",
2140                                     ctdb_addr_to_str(&vnn->public_address),
2141                                     vnn->public_netmask_bits,
2142                                     ctdb_vnn_iface_string(vnn)));
2143                         continue;
2144                 }
2145                 vnn->update_in_flight = true;
2146
2147                 DEBUG(DEBUG_INFO,("Release of IP %s/%u on interface %s node:-1\n",
2148                                     ctdb_addr_to_str(&vnn->public_address),
2149                                     vnn->public_netmask_bits,
2150                                     ctdb_vnn_iface_string(vnn)));
2151
2152                 ctdb_event_script_args(ctdb, CTDB_EVENT_RELEASE_IP, "%s %s %u",
2153                                   ctdb_vnn_iface_string(vnn),
2154                                   ctdb_addr_to_str(&vnn->public_address),
2155                                   vnn->public_netmask_bits);
2156
2157                 data.dptr = (uint8_t *)talloc_strdup(
2158                                 vnn, ctdb_addr_to_str(&vnn->public_address));
2159                 if (data.dptr != NULL) {
2160                         data.dsize = strlen((char *)data.dptr) + 1;
2161                         ctdb_daemon_send_message(ctdb, ctdb->pnn,
2162                                                  CTDB_SRVID_RELEASE_IP, data);
2163                         talloc_free(data.dptr);
2164                 }
2165
2166                 ctdb_vnn_unassign_iface(ctdb, vnn);
2167                 vnn->update_in_flight = false;
2168                 count++;
2169         }
2170
2171         DEBUG(DEBUG_NOTICE,(__location__ " Released %d public IPs\n", count));
2172 }
2173
2174
2175 /*
2176   get list of public IPs
2177  */
2178 int32_t ctdb_control_get_public_ips(struct ctdb_context *ctdb, 
2179                                     struct ctdb_req_control_old *c, TDB_DATA *outdata)
2180 {
2181         int i, num, len;
2182         struct ctdb_public_ip_list_old *ips;
2183         struct ctdb_vnn *vnn;
2184         bool only_available = false;
2185
2186         if (c->flags & CTDB_PUBLIC_IP_FLAGS_ONLY_AVAILABLE) {
2187                 only_available = true;
2188         }
2189
2190         /* count how many public ip structures we have */
2191         num = 0;
2192         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2193                 num++;
2194         }
2195
2196         len = offsetof(struct ctdb_public_ip_list_old, ips) +
2197                 num*sizeof(struct ctdb_public_ip);
2198         ips = talloc_zero_size(outdata, len);
2199         CTDB_NO_MEMORY(ctdb, ips);
2200
2201         i = 0;
2202         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2203                 if (only_available && !ctdb_vnn_available(ctdb, vnn)) {
2204                         continue;
2205                 }
2206                 ips->ips[i].pnn  = vnn->pnn;
2207                 ips->ips[i].addr = vnn->public_address;
2208                 i++;
2209         }
2210         ips->num = i;
2211         len = offsetof(struct ctdb_public_ip_list_old, ips) +
2212                 i*sizeof(struct ctdb_public_ip);
2213
2214         outdata->dsize = len;
2215         outdata->dptr  = (uint8_t *)ips;
2216
2217         return 0;
2218 }
2219
2220
2221 int32_t ctdb_control_get_public_ip_info(struct ctdb_context *ctdb,
2222                                         struct ctdb_req_control_old *c,
2223                                         TDB_DATA indata,
2224                                         TDB_DATA *outdata)
2225 {
2226         int i, num, len;
2227         ctdb_sock_addr *addr;
2228         struct ctdb_public_ip_info_old *info;
2229         struct ctdb_vnn *vnn;
2230
2231         addr = (ctdb_sock_addr *)indata.dptr;
2232
2233         vnn = find_public_ip_vnn(ctdb, addr);
2234         if (vnn == NULL) {
2235                 DEBUG(DEBUG_ERR,(__location__ " Could not get public ip info, "
2236                                  "'%s'not a public address\n",
2237                                  ctdb_addr_to_str(addr)));
2238                 return -1;
2239         }
2240
2241         /* count how many public ip structures we have */
2242         num = 0;
2243         for (;vnn->ifaces[num];) {
2244                 num++;
2245         }
2246
2247         len = offsetof(struct ctdb_public_ip_info_old, ifaces) +
2248                 num*sizeof(struct ctdb_iface);
2249         info = talloc_zero_size(outdata, len);
2250         CTDB_NO_MEMORY(ctdb, info);
2251
2252         info->ip.addr = vnn->public_address;
2253         info->ip.pnn = vnn->pnn;
2254         info->active_idx = 0xFFFFFFFF;
2255
2256         for (i=0; vnn->ifaces[i]; i++) {
2257                 struct ctdb_interface *cur;
2258
2259                 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
2260                 if (cur == NULL) {
2261                         DEBUG(DEBUG_CRIT, (__location__ " internal error iface[%s] unknown\n",
2262                                            vnn->ifaces[i]));
2263                         return -1;
2264                 }
2265                 if (vnn->iface == cur) {
2266                         info->active_idx = i;
2267                 }
2268                 strncpy(info->ifaces[i].name, cur->name,
2269                         sizeof(info->ifaces[i].name));
2270                 info->ifaces[i].name[sizeof(info->ifaces[i].name)-1] = '\0';
2271                 info->ifaces[i].link_state = cur->link_up;
2272                 info->ifaces[i].references = cur->references;
2273         }
2274         info->num = i;
2275         len = offsetof(struct ctdb_public_ip_info_old, ifaces) +
2276                 i*sizeof(struct ctdb_iface);
2277
2278         outdata->dsize = len;
2279         outdata->dptr  = (uint8_t *)info;
2280
2281         return 0;
2282 }
2283
2284 int32_t ctdb_control_get_ifaces(struct ctdb_context *ctdb,
2285                                 struct ctdb_req_control_old *c,
2286                                 TDB_DATA *outdata)
2287 {
2288         int i, num, len;
2289         struct ctdb_iface_list_old *ifaces;
2290         struct ctdb_interface *cur;
2291
2292         /* count how many public ip structures we have */
2293         num = 0;
2294         for (cur=ctdb->ifaces;cur;cur=cur->next) {
2295                 num++;
2296         }
2297
2298         len = offsetof(struct ctdb_iface_list_old, ifaces) +
2299                 num*sizeof(struct ctdb_iface);
2300         ifaces = talloc_zero_size(outdata, len);
2301         CTDB_NO_MEMORY(ctdb, ifaces);
2302
2303         i = 0;
2304         for (cur=ctdb->ifaces;cur;cur=cur->next) {
2305                 strncpy(ifaces->ifaces[i].name, cur->name,
2306                         sizeof(ifaces->ifaces[i].name));
2307                 ifaces->ifaces[i].name[sizeof(ifaces->ifaces[i].name)-1] = '\0';
2308                 ifaces->ifaces[i].link_state = cur->link_up;
2309                 ifaces->ifaces[i].references = cur->references;
2310                 i++;
2311         }
2312         ifaces->num = i;
2313         len = offsetof(struct ctdb_iface_list_old, ifaces) +
2314                 i*sizeof(struct ctdb_iface);
2315
2316         outdata->dsize = len;
2317         outdata->dptr  = (uint8_t *)ifaces;
2318
2319         return 0;
2320 }
2321
2322 int32_t ctdb_control_set_iface_link(struct ctdb_context *ctdb,
2323                                     struct ctdb_req_control_old *c,
2324                                     TDB_DATA indata)
2325 {
2326         struct ctdb_iface *info;
2327         struct ctdb_interface *iface;
2328         bool link_up = false;
2329
2330         info = (struct ctdb_iface *)indata.dptr;
2331
2332         if (info->name[CTDB_IFACE_SIZE] != '\0') {
2333                 int len = strnlen(info->name, CTDB_IFACE_SIZE);
2334                 DEBUG(DEBUG_ERR, (__location__ " name[%*.*s] not terminated\n",
2335                                   len, len, info->name));
2336                 return -1;
2337         }
2338
2339         switch (info->link_state) {
2340         case 0:
2341                 link_up = false;
2342                 break;
2343         case 1:
2344                 link_up = true;
2345                 break;
2346         default:
2347                 DEBUG(DEBUG_ERR, (__location__ " link_state[%u] invalid\n",
2348                                   (unsigned int)info->link_state));
2349                 return -1;
2350         }
2351
2352         if (info->references != 0) {
2353                 DEBUG(DEBUG_ERR, (__location__ " references[%u] should be 0\n",
2354                                   (unsigned int)info->references));
2355                 return -1;
2356         }
2357
2358         iface = ctdb_find_iface(ctdb, info->name);
2359         if (iface == NULL) {
2360                 return -1;
2361         }
2362
2363         if (link_up == iface->link_up) {
2364                 return 0;
2365         }
2366
2367         DEBUG(DEBUG_ERR,
2368               ("iface[%s] has changed it's link status %s => %s\n",
2369                iface->name,
2370                iface->link_up?"up":"down",
2371                link_up?"up":"down"));
2372
2373         iface->link_up = link_up;
2374         return 0;
2375 }
2376
2377
2378 /*
2379   called by a daemon to inform us of the entire list of TCP tickles for
2380   a particular public address.
2381   this control should only be sent by the node that is currently serving
2382   that public address.
2383  */
2384 int32_t ctdb_control_set_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata)
2385 {
2386         struct ctdb_tickle_list_old *list = (struct ctdb_tickle_list_old *)indata.dptr;
2387         struct ctdb_tcp_array *tcparray;
2388         struct ctdb_vnn *vnn;
2389
2390         /* We must at least have tickles.num or else we cant verify the size
2391            of the received data blob
2392          */
2393         if (indata.dsize < offsetof(struct ctdb_tickle_list_old, connections)) {
2394                 DEBUG(DEBUG_ERR,("Bad indata in ctdb_tickle_list. Not enough data for the tickle.num field\n"));
2395                 return -1;
2396         }
2397
2398         /* verify that the size of data matches what we expect */
2399         if (indata.dsize < offsetof(struct ctdb_tickle_list_old, connections)
2400                          + sizeof(struct ctdb_connection) * list->num) {
2401                 DEBUG(DEBUG_ERR,("Bad indata in ctdb_tickle_list\n"));
2402                 return -1;
2403         }
2404
2405         DEBUG(DEBUG_INFO, ("Received tickle update for public address %s\n",
2406                            ctdb_addr_to_str(&list->addr)));
2407
2408         vnn = find_public_ip_vnn(ctdb, &list->addr);
2409         if (vnn == NULL) {
2410                 DEBUG(DEBUG_INFO,(__location__ " Could not set tcp tickle list, '%s' is not a public address\n",
2411                         ctdb_addr_to_str(&list->addr)));
2412
2413                 return 1;
2414         }
2415
2416         if (vnn->pnn == ctdb->pnn) {
2417                 DEBUG(DEBUG_INFO,
2418                       ("Ignoring redundant set tcp tickle list, this node hosts '%s'\n",
2419                        ctdb_addr_to_str(&list->addr)));
2420                 return 0;
2421         }
2422
2423         /* remove any old ticklelist we might have */
2424         talloc_free(vnn->tcp_array);
2425         vnn->tcp_array = NULL;
2426
2427         tcparray = talloc(vnn, struct ctdb_tcp_array);
2428         CTDB_NO_MEMORY(ctdb, tcparray);
2429
2430         tcparray->num = list->num;
2431
2432         tcparray->connections = talloc_array(tcparray, struct ctdb_connection, tcparray->num);
2433         CTDB_NO_MEMORY(ctdb, tcparray->connections);
2434
2435         memcpy(tcparray->connections, &list->connections[0],
2436                sizeof(struct ctdb_connection)*tcparray->num);
2437
2438         /* We now have a new fresh tickle list array for this vnn */
2439         vnn->tcp_array = tcparray;
2440
2441         return 0;
2442 }
2443
2444 /*
2445   called to return the full list of tickles for the puclic address associated 
2446   with the provided vnn
2447  */
2448 int32_t ctdb_control_get_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata)
2449 {
2450         ctdb_sock_addr *addr = (ctdb_sock_addr *)indata.dptr;
2451         struct ctdb_tickle_list_old *list;
2452         struct ctdb_tcp_array *tcparray;
2453         int num, i;
2454         struct ctdb_vnn *vnn;
2455         unsigned port;
2456
2457         vnn = find_public_ip_vnn(ctdb, addr);
2458         if (vnn == NULL) {
2459                 DEBUG(DEBUG_ERR,(__location__ " Could not get tcp tickle list, '%s' is not a public address\n",
2460                         ctdb_addr_to_str(addr)));
2461
2462                 return 1;
2463         }
2464
2465         port = ctdb_addr_to_port(addr);
2466
2467         tcparray = vnn->tcp_array;
2468         num = 0;
2469         if (tcparray != NULL) {
2470                 if (port == 0) {
2471                         /* All connections */
2472                         num = tcparray->num;
2473                 } else {
2474                         /* Count connections for port */
2475                         for (i = 0; i < tcparray->num; i++) {
2476                                 if (port == ctdb_addr_to_port(&tcparray->connections[i].dst)) {
2477                                         num++;
2478                                 }
2479                         }
2480                 }
2481         }
2482
2483         outdata->dsize = offsetof(struct ctdb_tickle_list_old, connections)
2484                         + sizeof(struct ctdb_connection) * num;
2485
2486         outdata->dptr  = talloc_size(outdata, outdata->dsize);
2487         CTDB_NO_MEMORY(ctdb, outdata->dptr);
2488         list = (struct ctdb_tickle_list_old *)outdata->dptr;
2489
2490         list->addr = *addr;
2491         list->num = num;
2492
2493         if (num == 0) {
2494                 return 0;
2495         }
2496
2497         num = 0;
2498         for (i = 0; i < tcparray->num; i++) {
2499                 if (port == 0 || \
2500                     port == ctdb_addr_to_port(&tcparray->connections[i].dst)) {
2501                         list->connections[num] = tcparray->connections[i];
2502                         num++;
2503                 }
2504         }
2505
2506         return 0;
2507 }
2508
2509
2510 /*
2511   set the list of all tcp tickles for a public address
2512  */
2513 static int ctdb_send_set_tcp_tickles_for_ip(struct ctdb_context *ctdb,
2514                                             ctdb_sock_addr *addr,
2515                                             struct ctdb_tcp_array *tcparray)
2516 {
2517         int ret, num;
2518         TDB_DATA data;
2519         struct ctdb_tickle_list_old *list;
2520
2521         if (tcparray) {
2522                 num = tcparray->num;
2523         } else {
2524                 num = 0;
2525         }
2526
2527         data.dsize = offsetof(struct ctdb_tickle_list_old, connections) +
2528                         sizeof(struct ctdb_connection) * num;
2529         data.dptr = talloc_size(ctdb, data.dsize);
2530         CTDB_NO_MEMORY(ctdb, data.dptr);
2531
2532         list = (struct ctdb_tickle_list_old *)data.dptr;
2533         list->addr = *addr;
2534         list->num = num;
2535         if (tcparray) {
2536                 memcpy(&list->connections[0], tcparray->connections, sizeof(struct ctdb_connection) * num);
2537         }
2538
2539         ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_ALL, 0,
2540                                        CTDB_CONTROL_SET_TCP_TICKLE_LIST,
2541                                        0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
2542         if (ret != 0) {
2543                 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set tcp tickles failed\n"));
2544                 return -1;
2545         }
2546
2547         talloc_free(data.dptr);
2548
2549         return ret;
2550 }
2551
2552
2553 /*
2554   perform tickle updates if required
2555  */
2556 static void ctdb_update_tcp_tickles(struct tevent_context *ev,
2557                                     struct tevent_timer *te,
2558                                     struct timeval t, void *private_data)
2559 {
2560         struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
2561         int ret;
2562         struct ctdb_vnn *vnn;
2563
2564         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2565                 /* we only send out updates for public addresses that 
2566                    we have taken over
2567                  */
2568                 if (ctdb->pnn != vnn->pnn) {
2569                         continue;
2570                 }
2571                 /* We only send out the updates if we need to */
2572                 if (!vnn->tcp_update_needed) {
2573                         continue;
2574                 }
2575                 ret = ctdb_send_set_tcp_tickles_for_ip(ctdb,
2576                                                        &vnn->public_address,
2577                                                        vnn->tcp_array);
2578                 if (ret != 0) {
2579                         DEBUG(DEBUG_ERR,("Failed to send the tickle update for public address %s\n",
2580                                 ctdb_addr_to_str(&vnn->public_address)));
2581                 } else {
2582                         DEBUG(DEBUG_INFO,
2583                               ("Sent tickle update for public address %s\n",
2584                                ctdb_addr_to_str(&vnn->public_address)));
2585                         vnn->tcp_update_needed = false;
2586                 }
2587         }
2588
2589         tevent_add_timer(ctdb->ev, ctdb->tickle_update_context,
2590                          timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
2591                          ctdb_update_tcp_tickles, ctdb);
2592 }
2593
2594 /*
2595   start periodic update of tcp tickles
2596  */
2597 void ctdb_start_tcp_tickle_update(struct ctdb_context *ctdb)
2598 {
2599         ctdb->tickle_update_context = talloc_new(ctdb);
2600
2601         tevent_add_timer(ctdb->ev, ctdb->tickle_update_context,
2602                          timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
2603                          ctdb_update_tcp_tickles, ctdb);
2604 }
2605
2606
2607
2608
2609 struct control_gratious_arp {
2610         struct ctdb_context *ctdb;
2611         ctdb_sock_addr addr;
2612         const char *iface;
2613         int count;
2614 };
2615
2616 /*
2617   send a control_gratuitous arp
2618  */
2619 static void send_gratious_arp(struct tevent_context *ev,
2620                               struct tevent_timer *te,
2621                               struct timeval t, void *private_data)
2622 {
2623         int ret;
2624         struct control_gratious_arp *arp = talloc_get_type(private_data, 
2625                                                         struct control_gratious_arp);
2626
2627         ret = ctdb_sys_send_arp(&arp->addr, arp->iface);
2628         if (ret != 0) {
2629                 DEBUG(DEBUG_ERR,(__location__ " sending of gratious arp on iface '%s' failed (%s)\n",
2630                                  arp->iface, strerror(errno)));
2631         }
2632
2633
2634         arp->count++;
2635         if (arp->count == CTDB_ARP_REPEAT) {
2636                 talloc_free(arp);
2637                 return;
2638         }
2639
2640         tevent_add_timer(arp->ctdb->ev, arp,
2641                          timeval_current_ofs(CTDB_ARP_INTERVAL, 0),
2642                          send_gratious_arp, arp);
2643 }
2644
2645
2646 /*
2647   send a gratious arp 
2648  */
2649 int32_t ctdb_control_send_gratious_arp(struct ctdb_context *ctdb, TDB_DATA indata)
2650 {
2651         struct ctdb_addr_info_old *gratious_arp = (struct ctdb_addr_info_old *)indata.dptr;
2652         struct control_gratious_arp *arp;
2653
2654         /* verify the size of indata */
2655         if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
2656                 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_gratious_arp structure. Got %u require %u bytes\n", 
2657                                  (unsigned)indata.dsize, 
2658                                  (unsigned)offsetof(struct ctdb_addr_info_old, iface)));
2659                 return -1;
2660         }
2661         if (indata.dsize != 
2662                 ( offsetof(struct ctdb_addr_info_old, iface)
2663                 + gratious_arp->len ) ){
2664
2665                 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2666                         "but should be %u bytes\n", 
2667                          (unsigned)indata.dsize, 
2668                          (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+gratious_arp->len)));
2669                 return -1;
2670         }
2671
2672
2673         arp = talloc(ctdb, struct control_gratious_arp);
2674         CTDB_NO_MEMORY(ctdb, arp);
2675
2676         arp->ctdb  = ctdb;
2677         arp->addr   = gratious_arp->addr;
2678         arp->iface = talloc_strdup(arp, gratious_arp->iface);
2679         CTDB_NO_MEMORY(ctdb, arp->iface);
2680         arp->count = 0;
2681
2682         tevent_add_timer(arp->ctdb->ev, arp,
2683                          timeval_zero(), send_gratious_arp, arp);
2684
2685         return 0;
2686 }
2687
2688 int32_t ctdb_control_add_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2689 {
2690         struct ctdb_addr_info_old *pub = (struct ctdb_addr_info_old *)indata.dptr;
2691         int ret;
2692
2693         /* verify the size of indata */
2694         if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
2695                 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_addr_info structure\n"));
2696                 return -1;
2697         }
2698         if (indata.dsize != 
2699                 ( offsetof(struct ctdb_addr_info_old, iface)
2700                 + pub->len ) ){
2701
2702                 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2703                         "but should be %u bytes\n", 
2704                          (unsigned)indata.dsize, 
2705                          (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+pub->len)));
2706                 return -1;
2707         }
2708
2709         DEBUG(DEBUG_NOTICE,("Add IP %s\n", ctdb_addr_to_str(&pub->addr)));
2710
2711         ret = ctdb_add_public_address(ctdb, &pub->addr, pub->mask, &pub->iface[0], true);
2712
2713         if (ret != 0) {
2714                 DEBUG(DEBUG_ERR,(__location__ " Failed to add public address\n"));
2715                 return -1;
2716         }
2717
2718         return 0;
2719 }
2720
2721 int32_t ctdb_control_del_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2722 {
2723         struct ctdb_addr_info_old *pub = (struct ctdb_addr_info_old *)indata.dptr;
2724         struct ctdb_vnn *vnn;
2725
2726         /* verify the size of indata */
2727         if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
2728                 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_addr_info structure\n"));
2729                 return -1;
2730         }
2731         if (indata.dsize != 
2732                 ( offsetof(struct ctdb_addr_info_old, iface)
2733                 + pub->len ) ){
2734
2735                 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2736                         "but should be %u bytes\n", 
2737                          (unsigned)indata.dsize, 
2738                          (unsigned)(offsetof(struct ctdb_addr_info_old, iface)+pub->len)));
2739                 return -1;
2740         }
2741
2742         DEBUG(DEBUG_NOTICE,("Delete IP %s\n", ctdb_addr_to_str(&pub->addr)));
2743
2744         /* walk over all public addresses until we find a match */
2745         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2746                 if (ctdb_same_ip(&vnn->public_address, &pub->addr)) {
2747                         if (vnn->pnn == ctdb->pnn) {
2748                                 /* This IP is currently being hosted.
2749                                  * Defer the deletion until the next
2750                                  * takeover run. "ctdb reloadips" will
2751                                  * always cause a takeover run.  "ctdb
2752                                  * delip" will now need an explicit
2753                                  * "ctdb ipreallocated" afterwards. */
2754                                 vnn->delete_pending = true;
2755                         } else {
2756                                 /* This IP is not hosted on the
2757                                  * current node so just delete it
2758                                  * now. */
2759                                 do_delete_ip(ctdb, vnn);
2760                         }
2761
2762                         return 0;
2763                 }
2764         }
2765
2766         DEBUG(DEBUG_ERR,("Delete IP of unknown public IP address %s\n",
2767                          ctdb_addr_to_str(&pub->addr)));
2768         return -1;
2769 }
2770
2771
2772 struct ipreallocated_callback_state {
2773         struct ctdb_req_control_old *c;
2774 };
2775
2776 static void ctdb_ipreallocated_callback(struct ctdb_context *ctdb,
2777                                         int status, void *p)
2778 {
2779         struct ipreallocated_callback_state *state =
2780                 talloc_get_type(p, struct ipreallocated_callback_state);
2781
2782         if (status != 0) {
2783                 DEBUG(DEBUG_ERR,
2784                       (" \"ipreallocated\" event script failed (status %d)\n",
2785                        status));
2786                 if (status == -ETIME) {
2787                         ctdb_ban_self(ctdb);
2788                 }
2789         }
2790
2791         ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
2792         talloc_free(state);
2793 }
2794
2795 /* A control to run the ipreallocated event */
2796 int32_t ctdb_control_ipreallocated(struct ctdb_context *ctdb,
2797                                    struct ctdb_req_control_old *c,
2798                                    bool *async_reply)
2799 {
2800         int ret;
2801         struct ipreallocated_callback_state *state;
2802
2803         state = talloc(ctdb, struct ipreallocated_callback_state);
2804         CTDB_NO_MEMORY(ctdb, state);
2805
2806         DEBUG(DEBUG_INFO,(__location__ " Running \"ipreallocated\" event\n"));
2807
2808         ret = ctdb_event_script_callback(ctdb, state,
2809                                          ctdb_ipreallocated_callback, state,
2810                                          CTDB_EVENT_IPREALLOCATED,
2811                                          "%s", "");
2812
2813         if (ret != 0) {
2814                 DEBUG(DEBUG_ERR,("Failed to run \"ipreallocated\" event \n"));
2815                 talloc_free(state);
2816                 return -1;
2817         }
2818
2819         /* tell the control that we will be reply asynchronously */
2820         state->c    = talloc_steal(state, c);
2821         *async_reply = true;
2822
2823         return 0;
2824 }
2825
2826
2827 struct ctdb_reloadips_handle {
2828         struct ctdb_context *ctdb;
2829         struct ctdb_req_control_old *c;
2830         int status;
2831         int fd[2];
2832         pid_t child;
2833         struct tevent_fd *fde;
2834 };
2835
2836 static int ctdb_reloadips_destructor(struct ctdb_reloadips_handle *h)
2837 {
2838         if (h == h->ctdb->reload_ips) {
2839                 h->ctdb->reload_ips = NULL;
2840         }
2841         if (h->c != NULL) {
2842                 ctdb_request_control_reply(h->ctdb, h->c, NULL, h->status, NULL);
2843                 h->c = NULL;
2844         }
2845         ctdb_kill(h->ctdb, h->child, SIGKILL);
2846         return 0;
2847 }
2848
2849 static void ctdb_reloadips_timeout_event(struct tevent_context *ev,
2850                                          struct tevent_timer *te,
2851                                          struct timeval t, void *private_data)
2852 {
2853         struct ctdb_reloadips_handle *h = talloc_get_type(private_data, struct ctdb_reloadips_handle);
2854
2855         talloc_free(h);
2856 }
2857
2858 static void ctdb_reloadips_child_handler(struct tevent_context *ev,
2859                                          struct tevent_fd *fde,
2860                                          uint16_t flags, void *private_data)
2861 {
2862         struct ctdb_reloadips_handle *h = talloc_get_type(private_data, struct ctdb_reloadips_handle);
2863
2864         char res;
2865         int ret;
2866
2867         ret = sys_read(h->fd[0], &res, 1);
2868         if (ret < 1 || res != 0) {
2869                 DEBUG(DEBUG_ERR, (__location__ " Reloadips child process returned error\n"));
2870                 res = 1;
2871         }
2872         h->status = res;
2873
2874         talloc_free(h);
2875 }
2876
2877 static int ctdb_reloadips_child(struct ctdb_context *ctdb)
2878 {
2879         TALLOC_CTX *mem_ctx = talloc_new(NULL);
2880         struct ctdb_public_ip_list_old *ips;
2881         struct ctdb_vnn *vnn;
2882         struct client_async_data *async_data;
2883         struct timeval timeout;
2884         TDB_DATA data;
2885         struct ctdb_client_control_state *state;
2886         bool first_add;
2887         int i, ret;
2888
2889         CTDB_NO_MEMORY(ctdb, mem_ctx);
2890
2891         /* Read IPs from local node */
2892         ret = ctdb_ctrl_get_public_ips(ctdb, TAKEOVER_TIMEOUT(),
2893                                        CTDB_CURRENT_NODE, mem_ctx, &ips);
2894         if (ret != 0) {
2895                 DEBUG(DEBUG_ERR,
2896                       ("Unable to fetch public IPs from local node\n"));
2897                 talloc_free(mem_ctx);
2898                 return -1;
2899         }
2900
2901         /* Read IPs file - this is safe since this is a child process */
2902         ctdb->vnn = NULL;
2903         if (ctdb_set_public_addresses(ctdb, false) != 0) {
2904                 DEBUG(DEBUG_ERR,("Failed to re-read public addresses file\n"));
2905                 talloc_free(mem_ctx);
2906                 return -1;
2907         }
2908
2909         async_data = talloc_zero(mem_ctx, struct client_async_data);
2910         CTDB_NO_MEMORY(ctdb, async_data);
2911
2912         /* Compare IPs between node and file for IPs to be deleted */
2913         for (i = 0; i < ips->num; i++) {
2914                 /* */
2915                 for (vnn = ctdb->vnn; vnn; vnn = vnn->next) {
2916                         if (ctdb_same_ip(&vnn->public_address,
2917                                          &ips->ips[i].addr)) {
2918                                 /* IP is still in file */
2919                                 break;
2920                         }
2921                 }
2922
2923                 if (vnn == NULL) {
2924                         /* Delete IP ips->ips[i] */
2925                         struct ctdb_addr_info_old *pub;
2926
2927                         DEBUG(DEBUG_NOTICE,
2928                               ("IP %s no longer configured, deleting it\n",
2929                                ctdb_addr_to_str(&ips->ips[i].addr)));
2930
2931                         pub = talloc_zero(mem_ctx, struct ctdb_addr_info_old);
2932                         CTDB_NO_MEMORY(ctdb, pub);
2933
2934                         pub->addr  = ips->ips[i].addr;
2935                         pub->mask  = 0;
2936                         pub->len   = 0;
2937
2938                         timeout = TAKEOVER_TIMEOUT();
2939
2940                         data.dsize = offsetof(struct ctdb_addr_info_old,
2941                                               iface) + pub->len;
2942                         data.dptr = (uint8_t *)pub;
2943
2944                         state = ctdb_control_send(ctdb, CTDB_CURRENT_NODE, 0,
2945                                                   CTDB_CONTROL_DEL_PUBLIC_IP,
2946                                                   0, data, async_data,
2947                                                   &timeout, NULL);
2948                         if (state == NULL) {
2949                                 DEBUG(DEBUG_ERR,
2950                                       (__location__
2951                                        " failed sending CTDB_CONTROL_DEL_PUBLIC_IP\n"));
2952                                 goto failed;
2953                         }
2954
2955                         ctdb_client_async_add(async_data, state);
2956                 }
2957         }
2958
2959         /* Compare IPs between node and file for IPs to be added */
2960         first_add = true;
2961         for (vnn = ctdb->vnn; vnn; vnn = vnn->next) {
2962                 for (i = 0; i < ips->num; i++) {
2963                         if (ctdb_same_ip(&vnn->public_address,
2964                                          &ips->ips[i].addr)) {
2965                                 /* IP already on node */
2966                                 break;
2967                         }
2968                 }
2969                 if (i == ips->num) {
2970                         /* Add IP ips->ips[i] */
2971                         struct ctdb_addr_info_old *pub;
2972                         const char *ifaces = NULL;
2973                         uint32_t len;
2974                         int iface = 0;
2975
2976                         DEBUG(DEBUG_NOTICE,
2977                               ("New IP %s configured, adding it\n",
2978                                ctdb_addr_to_str(&vnn->public_address)));
2979                         if (first_add) {
2980                                 uint32_t pnn = ctdb_get_pnn(ctdb);
2981
2982                                 data.dsize = sizeof(pnn);
2983                                 data.dptr  = (uint8_t *)&pnn;
2984
2985                                 ret = ctdb_client_send_message(
2986                                         ctdb,
2987                                         CTDB_BROADCAST_CONNECTED,
2988                                         CTDB_SRVID_REBALANCE_NODE,
2989                                         data);
2990                                 if (ret != 0) {
2991                                         DEBUG(DEBUG_WARNING,
2992                                               ("Failed to send message to force node reallocation - IPs may be unbalanced\n"));
2993                                 }
2994
2995                                 first_add = false;
2996                         }
2997
2998                         ifaces = vnn->ifaces[0];
2999                         iface = 1;
3000                         while (vnn->ifaces[iface] != NULL) {
3001                                 ifaces = talloc_asprintf(vnn, "%s,%s", ifaces,
3002                                                          vnn->ifaces[iface]);
3003                                 iface++;
3004                         }
3005
3006                         len   = strlen(ifaces) + 1;
3007                         pub = talloc_zero_size(mem_ctx,
3008                                                offsetof(struct ctdb_addr_info_old, iface) + len);
3009                         CTDB_NO_MEMORY(ctdb, pub);
3010
3011                         pub->addr  = vnn->public_address;
3012                         pub->mask  = vnn->public_netmask_bits;
3013                         pub->len   = len;
3014                         memcpy(&pub->iface[0], ifaces, pub->len);
3015
3016                         timeout = TAKEOVER_TIMEOUT();
3017
3018                         data.dsize = offsetof(struct ctdb_addr_info_old,
3019                                               iface) + pub->len;
3020                         data.dptr = (uint8_t *)pub;
3021
3022                         state = ctdb_control_send(ctdb, CTDB_CURRENT_NODE, 0,
3023                                                   CTDB_CONTROL_ADD_PUBLIC_IP,
3024                                                   0, data, async_data,
3025                                                   &timeout, NULL);
3026                         if (state == NULL) {
3027                                 DEBUG(DEBUG_ERR,
3028                                       (__location__
3029                                        " failed sending CTDB_CONTROL_ADD_PUBLIC_IP\n"));
3030                                 goto failed;
3031                         }
3032
3033                         ctdb_client_async_add(async_data, state);
3034                 }
3035         }
3036
3037         if (ctdb_client_async_wait(ctdb, async_data) != 0) {
3038                 DEBUG(DEBUG_ERR,(__location__ " Add/delete IPs failed\n"));
3039                 goto failed;
3040         }
3041
3042         talloc_free(mem_ctx);
3043         return 0;
3044
3045 failed:
3046         talloc_free(mem_ctx);
3047         return -1;
3048 }
3049
3050 /* This control is sent to force the node to re-read the public addresses file
3051    and drop any addresses we should nnot longer host, and add new addresses
3052    that we are now able to host
3053 */
3054 int32_t ctdb_control_reload_public_ips(struct ctdb_context *ctdb, struct ctdb_req_control_old *c, bool *async_reply)
3055 {
3056         struct ctdb_reloadips_handle *h;
3057         pid_t parent = getpid();
3058
3059         if (ctdb->reload_ips != NULL) {
3060                 talloc_free(ctdb->reload_ips);
3061                 ctdb->reload_ips = NULL;
3062         }
3063
3064         h = talloc(ctdb, struct ctdb_reloadips_handle);
3065         CTDB_NO_MEMORY(ctdb, h);
3066         h->ctdb     = ctdb;
3067         h->c        = NULL;
3068         h->status   = -1;
3069         
3070         if (pipe(h->fd) == -1) {
3071                 DEBUG(DEBUG_ERR,("Failed to create pipe for ctdb_freeze_lock\n"));
3072                 talloc_free(h);
3073                 return -1;
3074         }
3075
3076         h->child = ctdb_fork(ctdb);
3077         if (h->child == (pid_t)-1) {
3078                 DEBUG(DEBUG_ERR, ("Failed to fork a child for reloadips\n"));
3079                 close(h->fd[0]);
3080                 close(h->fd[1]);
3081                 talloc_free(h);
3082                 return -1;
3083         }
3084
3085         /* child process */
3086         if (h->child == 0) {
3087                 signed char res = 0;
3088
3089                 close(h->fd[0]);
3090                 debug_extra = talloc_asprintf(NULL, "reloadips:");
3091
3092                 prctl_set_comment("ctdb_reloadips");
3093                 if (switch_from_server_to_client(ctdb, "reloadips-child") != 0) {
3094                         DEBUG(DEBUG_CRIT,("ERROR: Failed to switch reloadips child into client mode\n"));
3095                         res = -1;
3096                 } else {
3097                         res = ctdb_reloadips_child(ctdb);
3098                         if (res != 0) {
3099                                 DEBUG(DEBUG_ERR,("Failed to reload ips on local node\n"));
3100                         }
3101                 }
3102
3103                 sys_write(h->fd[1], &res, 1);
3104                 ctdb_wait_for_process_to_exit(parent);
3105                 _exit(0);
3106         }
3107
3108         h->c             = talloc_steal(h, c);
3109
3110         close(h->fd[1]);
3111         set_close_on_exec(h->fd[0]);
3112
3113         talloc_set_destructor(h, ctdb_reloadips_destructor);
3114
3115
3116         h->fde = tevent_add_fd(ctdb->ev, h, h->fd[0], TEVENT_FD_READ,
3117                                ctdb_reloadips_child_handler, (void *)h);
3118         tevent_fd_set_auto_close(h->fde);
3119
3120         tevent_add_timer(ctdb->ev, h, timeval_current_ofs(120, 0),
3121                          ctdb_reloadips_timeout_event, h);
3122
3123         /* we reply later */
3124         *async_reply = true;
3125         return 0;
3126 }