Merge commit 'martins/master' into 1.2
[sahlberg/ctdb.git] / server / ctdb_takeover.c
1 /* 
2    ctdb ip takeover code
3
4    Copyright (C) Ronnie Sahlberg  2007
5    Copyright (C) Andrew Tridgell  2007
6
7    This program is free software; you can redistribute it and/or modify
8    it under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 3 of the License, or
10    (at your option) any later version.
11    
12    This program is distributed in the hope that it will be useful,
13    but WITHOUT ANY WARRANTY; without even the implied warranty of
14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15    GNU General Public License for more details.
16    
17    You should have received a copy of the GNU General Public License
18    along with this program; if not, see <http://www.gnu.org/licenses/>.
19 */
20 #include "includes.h"
21 #include "lib/tevent/tevent.h"
22 #include "lib/tdb/include/tdb.h"
23 #include "lib/util/dlinklist.h"
24 #include "system/network.h"
25 #include "system/filesys.h"
26 #include "system/wait.h"
27 #include "../include/ctdb_private.h"
28 #include "../common/rb_tree.h"
29
30
31 #define TAKEOVER_TIMEOUT() timeval_current_ofs(ctdb->tunable.takeover_timeout,0)
32
33 #define CTDB_ARP_INTERVAL 1
34 #define CTDB_ARP_REPEAT   3
35
36 struct ctdb_iface {
37         struct ctdb_iface *prev, *next;
38         const char *name;
39         bool link_up;
40         uint32_t references;
41 };
42
43 static const char *ctdb_vnn_iface_string(const struct ctdb_vnn *vnn)
44 {
45         if (vnn->iface) {
46                 return vnn->iface->name;
47         }
48
49         return "__none__";
50 }
51
52 static int ctdb_add_local_iface(struct ctdb_context *ctdb, const char *iface)
53 {
54         struct ctdb_iface *i;
55
56         /* Verify that we dont have an entry for this ip yet */
57         for (i=ctdb->ifaces;i;i=i->next) {
58                 if (strcmp(i->name, iface) == 0) {
59                         return 0;
60                 }
61         }
62
63         /* create a new structure for this interface */
64         i = talloc_zero(ctdb, struct ctdb_iface);
65         CTDB_NO_MEMORY_FATAL(ctdb, i);
66         i->name = talloc_strdup(i, iface);
67         CTDB_NO_MEMORY(ctdb, i->name);
68         i->link_up = false;
69
70         DLIST_ADD(ctdb->ifaces, i);
71
72         return 0;
73 }
74
75 static struct ctdb_iface *ctdb_find_iface(struct ctdb_context *ctdb,
76                                           const char *iface)
77 {
78         struct ctdb_iface *i;
79
80         /* Verify that we dont have an entry for this ip yet */
81         for (i=ctdb->ifaces;i;i=i->next) {
82                 if (strcmp(i->name, iface) == 0) {
83                         return i;
84                 }
85         }
86
87         return NULL;
88 }
89
90 static struct ctdb_iface *ctdb_vnn_best_iface(struct ctdb_context *ctdb,
91                                               struct ctdb_vnn *vnn)
92 {
93         int i;
94         struct ctdb_iface *cur = NULL;
95         struct ctdb_iface *best = NULL;
96
97         for (i=0; vnn->ifaces[i]; i++) {
98
99                 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
100                 if (cur == NULL) {
101                         continue;
102                 }
103
104                 if (!cur->link_up) {
105                         continue;
106                 }
107
108                 if (best == NULL) {
109                         best = cur;
110                         continue;
111                 }
112
113                 if (cur->references < best->references) {
114                         best = cur;
115                         continue;
116                 }
117         }
118
119         return best;
120 }
121
122 static int32_t ctdb_vnn_assign_iface(struct ctdb_context *ctdb,
123                                      struct ctdb_vnn *vnn)
124 {
125         struct ctdb_iface *best = NULL;
126
127         if (vnn->iface) {
128                 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
129                                    "still assigned to iface '%s'\n",
130                                    ctdb_addr_to_str(&vnn->public_address),
131                                    ctdb_vnn_iface_string(vnn)));
132                 return 0;
133         }
134
135         best = ctdb_vnn_best_iface(ctdb, vnn);
136         if (best == NULL) {
137                 DEBUG(DEBUG_ERR, (__location__ " public address '%s' "
138                                   "cannot assign to iface any iface\n",
139                                   ctdb_addr_to_str(&vnn->public_address)));
140                 return -1;
141         }
142
143         vnn->iface = best;
144         best->references++;
145         vnn->pnn = ctdb->pnn;
146
147         DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
148                            "now assigned to iface '%s' refs[%d]\n",
149                            ctdb_addr_to_str(&vnn->public_address),
150                            ctdb_vnn_iface_string(vnn),
151                            best->references));
152         return 0;
153 }
154
155 static void ctdb_vnn_unassign_iface(struct ctdb_context *ctdb,
156                                     struct ctdb_vnn *vnn)
157 {
158         DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
159                            "now unassigned (old iface '%s' refs[%d])\n",
160                            ctdb_addr_to_str(&vnn->public_address),
161                            ctdb_vnn_iface_string(vnn),
162                            vnn->iface?vnn->iface->references:0));
163         if (vnn->iface) {
164                 vnn->iface->references--;
165         }
166         vnn->iface = NULL;
167         if (vnn->pnn == ctdb->pnn) {
168                 vnn->pnn = -1;
169         }
170 }
171
172 static bool ctdb_vnn_available(struct ctdb_context *ctdb,
173                                struct ctdb_vnn *vnn)
174 {
175         int i;
176
177         if (vnn->iface && vnn->iface->link_up) {
178                 return true;
179         }
180
181         for (i=0; vnn->ifaces[i]; i++) {
182                 struct ctdb_iface *cur;
183
184                 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
185                 if (cur == NULL) {
186                         continue;
187                 }
188
189                 if (cur->link_up) {
190                         return true;
191                 }
192         }
193
194         return false;
195 }
196
197 struct ctdb_takeover_arp {
198         struct ctdb_context *ctdb;
199         uint32_t count;
200         ctdb_sock_addr addr;
201         struct ctdb_tcp_array *tcparray;
202         struct ctdb_vnn *vnn;
203 };
204
205
206 /*
207   lists of tcp endpoints
208  */
209 struct ctdb_tcp_list {
210         struct ctdb_tcp_list *prev, *next;
211         struct ctdb_tcp_connection connection;
212 };
213
214 /*
215   list of clients to kill on IP release
216  */
217 struct ctdb_client_ip {
218         struct ctdb_client_ip *prev, *next;
219         struct ctdb_context *ctdb;
220         ctdb_sock_addr addr;
221         uint32_t client_id;
222 };
223
224
225 /*
226   send a gratuitous arp
227  */
228 static void ctdb_control_send_arp(struct event_context *ev, struct timed_event *te, 
229                                   struct timeval t, void *private_data)
230 {
231         struct ctdb_takeover_arp *arp = talloc_get_type(private_data, 
232                                                         struct ctdb_takeover_arp);
233         int i, ret;
234         struct ctdb_tcp_array *tcparray;
235         const char *iface = ctdb_vnn_iface_string(arp->vnn);
236
237         ret = ctdb_sys_send_arp(&arp->addr, iface);
238         if (ret != 0) {
239                 DEBUG(DEBUG_CRIT,(__location__ " sending of arp failed on iface '%s' (%s)\n",
240                                   iface, strerror(errno)));
241         }
242
243         tcparray = arp->tcparray;
244         if (tcparray) {
245                 for (i=0;i<tcparray->num;i++) {
246                         struct ctdb_tcp_connection *tcon;
247
248                         tcon = &tcparray->connections[i];
249                         DEBUG(DEBUG_INFO,("sending tcp tickle ack for %u->%s:%u\n",
250                                 (unsigned)ntohs(tcon->dst_addr.ip.sin_port), 
251                                 ctdb_addr_to_str(&tcon->src_addr),
252                                 (unsigned)ntohs(tcon->src_addr.ip.sin_port)));
253                         ret = ctdb_sys_send_tcp(
254                                 &tcon->src_addr, 
255                                 &tcon->dst_addr,
256                                 0, 0, 0);
257                         if (ret != 0) {
258                                 DEBUG(DEBUG_CRIT,(__location__ " Failed to send tcp tickle ack for %s\n",
259                                         ctdb_addr_to_str(&tcon->src_addr)));
260                         }
261                 }
262         }
263
264         arp->count++;
265
266         if (arp->count == CTDB_ARP_REPEAT) {
267                 talloc_free(arp);
268                 return;
269         }
270
271         event_add_timed(arp->ctdb->ev, arp->vnn->takeover_ctx, 
272                         timeval_current_ofs(CTDB_ARP_INTERVAL, 100000), 
273                         ctdb_control_send_arp, arp);
274 }
275
276 static int32_t ctdb_announce_vnn_iface(struct ctdb_context *ctdb,
277                                        struct ctdb_vnn *vnn)
278 {
279         struct ctdb_takeover_arp *arp;
280         struct ctdb_tcp_array *tcparray;
281
282         if (!vnn->takeover_ctx) {
283                 vnn->takeover_ctx = talloc_new(vnn);
284                 if (!vnn->takeover_ctx) {
285                         return -1;
286                 }
287         }
288
289         arp = talloc_zero(vnn->takeover_ctx, struct ctdb_takeover_arp);
290         if (!arp) {
291                 return -1;
292         }
293
294         arp->ctdb = ctdb;
295         arp->addr = vnn->public_address;
296         arp->vnn  = vnn;
297
298         tcparray = vnn->tcp_array;
299         if (tcparray) {
300                 /* add all of the known tcp connections for this IP to the
301                    list of tcp connections to send tickle acks for */
302                 arp->tcparray = talloc_steal(arp, tcparray);
303
304                 vnn->tcp_array = NULL;
305                 vnn->tcp_update_needed = true;
306         }
307
308         event_add_timed(arp->ctdb->ev, vnn->takeover_ctx,
309                         timeval_zero(), ctdb_control_send_arp, arp);
310
311         return 0;
312 }
313
314 struct takeover_callback_state {
315         struct ctdb_req_control *c;
316         ctdb_sock_addr *addr;
317         struct ctdb_vnn *vnn;
318 };
319
320 struct ctdb_do_takeip_state {
321         struct ctdb_req_control *c;
322         struct ctdb_vnn *vnn;
323 };
324
325 /*
326   called when takeip event finishes
327  */
328 static void ctdb_do_takeip_callback(struct ctdb_context *ctdb, int status,
329                                     void *private_data)
330 {
331         struct ctdb_do_takeip_state *state =
332                 talloc_get_type(private_data, struct ctdb_do_takeip_state);
333         int32_t ret;
334
335         if (status != 0) {
336                 if (status == -ETIME) {
337                         ctdb_ban_self(ctdb);
338                 }
339                 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
340                                  ctdb_addr_to_str(&state->vnn->public_address),
341                                  ctdb_vnn_iface_string(state->vnn)));
342                 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
343                 talloc_free(state);
344                 return;
345         }
346
347         ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
348         if (ret != 0) {
349                 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
350                 talloc_free(state);
351                 return;
352         }
353
354         /* the control succeeded */
355         ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
356         talloc_free(state);
357         return;
358 }
359
360 /*
361   take over an ip address
362  */
363 static int32_t ctdb_do_takeip(struct ctdb_context *ctdb,
364                               struct ctdb_req_control *c,
365                               struct ctdb_vnn *vnn)
366 {
367         int ret;
368         struct ctdb_do_takeip_state *state;
369
370         ret = ctdb_vnn_assign_iface(ctdb, vnn);
371         if (ret != 0) {
372                 DEBUG(DEBUG_ERR,("Takeover of IP %s/%u failed to "
373                                  "assin a usable interface\n",
374                                  ctdb_addr_to_str(&vnn->public_address),
375                                  vnn->public_netmask_bits));
376                 return -1;
377         }
378
379         state = talloc(vnn, struct ctdb_do_takeip_state);
380         CTDB_NO_MEMORY(ctdb, state);
381
382         state->c = talloc_steal(ctdb, c);
383         state->vnn   = vnn;
384
385         DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u on interface %s\n",
386                             ctdb_addr_to_str(&vnn->public_address),
387                             vnn->public_netmask_bits,
388                             ctdb_vnn_iface_string(vnn)));
389
390         ret = ctdb_event_script_callback(ctdb,
391                                          state,
392                                          ctdb_do_takeip_callback,
393                                          state,
394                                          false,
395                                          CTDB_EVENT_TAKE_IP,
396                                          "%s %s %u",
397                                          ctdb_vnn_iface_string(vnn),
398                                          ctdb_addr_to_str(&vnn->public_address),
399                                          vnn->public_netmask_bits);
400
401         if (ret != 0) {
402                 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
403                         ctdb_addr_to_str(&vnn->public_address),
404                         ctdb_vnn_iface_string(vnn)));
405                 talloc_free(state);
406                 return -1;
407         }
408
409         return 0;
410 }
411
412 struct ctdb_do_updateip_state {
413         struct ctdb_req_control *c;
414         struct ctdb_iface *old;
415         struct ctdb_vnn *vnn;
416 };
417
418 /*
419   called when updateip event finishes
420  */
421 static void ctdb_do_updateip_callback(struct ctdb_context *ctdb, int status,
422                                       void *private_data)
423 {
424         struct ctdb_do_updateip_state *state =
425                 talloc_get_type(private_data, struct ctdb_do_updateip_state);
426         int32_t ret;
427
428         if (status != 0) {
429                 if (status == -ETIME) {
430                         ctdb_ban_self(ctdb);
431                 }
432                 DEBUG(DEBUG_ERR,(__location__ " Failed to move IP %s from interface %s to %s\n",
433                         ctdb_addr_to_str(&state->vnn->public_address),
434                         state->old->name,
435                         ctdb_vnn_iface_string(state->vnn)));
436
437                 /*
438                  * All we can do is reset the old interface
439                  * and let the next run fix it
440                  */
441                 ctdb_vnn_unassign_iface(ctdb, state->vnn);
442                 state->vnn->iface = state->old;
443                 state->vnn->iface->references++;
444
445                 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
446                 talloc_free(state);
447                 return;
448         }
449
450         ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
451         if (ret != 0) {
452                 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
453                 talloc_free(state);
454                 return;
455         }
456
457         /* the control succeeded */
458         ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
459         talloc_free(state);
460         return;
461 }
462
463 /*
464   update (move) an ip address
465  */
466 static int32_t ctdb_do_updateip(struct ctdb_context *ctdb,
467                                 struct ctdb_req_control *c,
468                                 struct ctdb_vnn *vnn)
469 {
470         int ret;
471         struct ctdb_do_updateip_state *state;
472         struct ctdb_iface *old = vnn->iface;
473
474         ctdb_vnn_unassign_iface(ctdb, vnn);
475         ret = ctdb_vnn_assign_iface(ctdb, vnn);
476         if (ret != 0) {
477                 DEBUG(DEBUG_ERR,("update of IP %s/%u failed to "
478                                  "assin a usable interface (old iface '%s')\n",
479                                  ctdb_addr_to_str(&vnn->public_address),
480                                  vnn->public_netmask_bits,
481                                  old->name));
482                 return -1;
483         }
484
485         if (vnn->iface == old) {
486                 DEBUG(DEBUG_ERR,("update of IP %s/%u trying to "
487                                  "assin a same interface '%s'\n",
488                                  ctdb_addr_to_str(&vnn->public_address),
489                                  vnn->public_netmask_bits,
490                                  old->name));
491                 return -1;
492         }
493
494         state = talloc(vnn, struct ctdb_do_updateip_state);
495         CTDB_NO_MEMORY(ctdb, state);
496
497         state->c = talloc_steal(ctdb, c);
498         state->old = old;
499         state->vnn = vnn;
500
501         DEBUG(DEBUG_NOTICE,("Update of IP %s/%u from "
502                             "interface %s to %s\n",
503                             ctdb_addr_to_str(&vnn->public_address),
504                             vnn->public_netmask_bits,
505                             old->name,
506                             ctdb_vnn_iface_string(vnn)));
507
508         ret = ctdb_event_script_callback(ctdb,
509                                          state,
510                                          ctdb_do_updateip_callback,
511                                          state,
512                                          false,
513                                          CTDB_EVENT_UPDATE_IP,
514                                          "%s %s %s %u",
515                                          state->old->name,
516                                          ctdb_vnn_iface_string(vnn),
517                                          ctdb_addr_to_str(&vnn->public_address),
518                                          vnn->public_netmask_bits);
519         if (ret != 0) {
520                 DEBUG(DEBUG_ERR,(__location__ " Failed update IP %s from interface %s to %s\n",
521                                  ctdb_addr_to_str(&vnn->public_address),
522                                  old->name, ctdb_vnn_iface_string(vnn)));
523                 talloc_free(state);
524                 return -1;
525         }
526
527         return 0;
528 }
529
530 /*
531   Find the vnn of the node that has a public ip address
532   returns -1 if the address is not known as a public address
533  */
534 static struct ctdb_vnn *find_public_ip_vnn(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
535 {
536         struct ctdb_vnn *vnn;
537
538         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
539                 if (ctdb_same_ip(&vnn->public_address, addr)) {
540                         return vnn;
541                 }
542         }
543
544         return NULL;
545 }
546
547 /*
548   take over an ip address
549  */
550 int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb,
551                                  struct ctdb_req_control *c,
552                                  TDB_DATA indata,
553                                  bool *async_reply)
554 {
555         int ret;
556         struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
557         struct ctdb_vnn *vnn;
558         bool have_ip = false;
559         bool do_updateip = false;
560         bool do_takeip = false;
561         struct ctdb_iface *best_iface = NULL;
562
563         if (pip->pnn != ctdb->pnn) {
564                 DEBUG(DEBUG_ERR,(__location__" takeoverip called for an ip '%s' "
565                                  "with pnn %d, but we're node %d\n",
566                                  ctdb_addr_to_str(&pip->addr),
567                                  pip->pnn, ctdb->pnn));
568                 return -1;
569         }
570
571         /* update out vnn list */
572         vnn = find_public_ip_vnn(ctdb, &pip->addr);
573         if (vnn == NULL) {
574                 DEBUG(DEBUG_INFO,("takeoverip called for an ip '%s' that is not a public address\n",
575                         ctdb_addr_to_str(&pip->addr)));
576                 return 0;
577         }
578
579         have_ip = ctdb_sys_have_ip(&pip->addr);
580         best_iface = ctdb_vnn_best_iface(ctdb, vnn);
581         if (best_iface == NULL) {
582                 DEBUG(DEBUG_ERR,("takeoverip of IP %s/%u failed to find"
583                                  "a usable interface (old %s, have_ip %d)\n",
584                                  ctdb_addr_to_str(&vnn->public_address),
585                                  vnn->public_netmask_bits,
586                                  ctdb_vnn_iface_string(vnn),
587                                  have_ip));
588                 return -1;
589         }
590
591         if (vnn->iface == NULL && vnn->pnn == -1 && have_ip && best_iface != NULL) {
592                 DEBUG(DEBUG_ERR,("Taking over newly created ip\n"));
593                 have_ip = false;
594         }
595
596         if (vnn->iface == NULL && have_ip) {
597                 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
598                                   "but we have no interface assigned, has someone manually configured it?"
599                                   "banning ourself\n",
600                                  ctdb_addr_to_str(&vnn->public_address)));
601                 ctdb_ban_self(ctdb);
602                 return -1;
603         }
604
605         if (vnn->pnn != ctdb->pnn && have_ip) {
606                 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
607                                   "and we have it on iface[%s], but it was assigned to node %d"
608                                   "and we are node %d, banning ourself\n",
609                                  ctdb_addr_to_str(&vnn->public_address),
610                                  ctdb_vnn_iface_string(vnn), vnn->pnn, ctdb->pnn));
611                 ctdb_ban_self(ctdb);
612                 return -1;
613         }
614
615         if (vnn->iface) {
616                 if (vnn->iface->link_up) {
617                         /* only move when the rebalance gains something */
618                         if (vnn->iface->references > (best_iface->references + 1)) {
619                                 do_updateip = true;
620                         }
621                 } else if (vnn->iface != best_iface) {
622                         do_updateip = true;
623                 }
624         }
625
626         if (!have_ip) {
627                 if (do_updateip) {
628                         ctdb_vnn_unassign_iface(ctdb, vnn);
629                         do_updateip = false;
630                 }
631                 do_takeip = true;
632         }
633
634         if (do_takeip) {
635                 ret = ctdb_do_takeip(ctdb, c, vnn);
636                 if (ret != 0) {
637                         return -1;
638                 }
639         } else if (do_updateip) {
640                 ret = ctdb_do_updateip(ctdb, c, vnn);
641                 if (ret != 0) {
642                         return -1;
643                 }
644         } else {
645                 /*
646                  * The interface is up and the kernel known the ip
647                  * => do nothing
648                  */
649                 DEBUG(DEBUG_INFO,("Redundant takeover of IP %s/%u on interface %s (ip already held)\n",
650                         ctdb_addr_to_str(&pip->addr),
651                         vnn->public_netmask_bits,
652                         ctdb_vnn_iface_string(vnn)));
653                 return 0;
654         }
655
656         /* tell ctdb_control.c that we will be replying asynchronously */
657         *async_reply = true;
658
659         return 0;
660 }
661
662 /*
663   takeover an ip address old v4 style
664  */
665 int32_t ctdb_control_takeover_ipv4(struct ctdb_context *ctdb, 
666                                 struct ctdb_req_control *c,
667                                 TDB_DATA indata, 
668                                 bool *async_reply)
669 {
670         TDB_DATA data;
671         
672         data.dsize = sizeof(struct ctdb_public_ip);
673         data.dptr  = (uint8_t *)talloc_zero(c, struct ctdb_public_ip);
674         CTDB_NO_MEMORY(ctdb, data.dptr);
675         
676         memcpy(data.dptr, indata.dptr, indata.dsize);
677         return ctdb_control_takeover_ip(ctdb, c, data, async_reply);
678 }
679
680 /*
681   kill any clients that are registered with a IP that is being released
682  */
683 static void release_kill_clients(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
684 {
685         struct ctdb_client_ip *ip;
686
687         DEBUG(DEBUG_INFO,("release_kill_clients for ip %s\n",
688                 ctdb_addr_to_str(addr)));
689
690         for (ip=ctdb->client_ip_list; ip; ip=ip->next) {
691                 ctdb_sock_addr tmp_addr;
692
693                 tmp_addr = ip->addr;
694                 DEBUG(DEBUG_INFO,("checking for client %u with IP %s\n", 
695                         ip->client_id,
696                         ctdb_addr_to_str(&ip->addr)));
697
698                 if (ctdb_same_ip(&tmp_addr, addr)) {
699                         struct ctdb_client *client = ctdb_reqid_find(ctdb, 
700                                                                      ip->client_id, 
701                                                                      struct ctdb_client);
702                         DEBUG(DEBUG_INFO,("matched client %u with IP %s and pid %u\n", 
703                                 ip->client_id,
704                                 ctdb_addr_to_str(&ip->addr),
705                                 client->pid));
706
707                         if (client->pid != 0) {
708                                 DEBUG(DEBUG_INFO,(__location__ " Killing client pid %u for IP %s on client_id %u\n",
709                                         (unsigned)client->pid,
710                                         ctdb_addr_to_str(addr),
711                                         ip->client_id));
712                                 kill(client->pid, SIGKILL);
713                         }
714                 }
715         }
716 }
717
718 /*
719   called when releaseip event finishes
720  */
721 static void release_ip_callback(struct ctdb_context *ctdb, int status, 
722                                 void *private_data)
723 {
724         struct takeover_callback_state *state = 
725                 talloc_get_type(private_data, struct takeover_callback_state);
726         TDB_DATA data;
727
728         if (status == -ETIME) {
729                 ctdb_ban_self(ctdb);
730         }
731
732         /* send a message to all clients of this node telling them
733            that the cluster has been reconfigured and they should
734            release any sockets on this IP */
735         data.dptr = (uint8_t *)talloc_strdup(state, ctdb_addr_to_str(state->addr));
736         CTDB_NO_MEMORY_VOID(ctdb, data.dptr);
737         data.dsize = strlen((char *)data.dptr)+1;
738
739         DEBUG(DEBUG_INFO,(__location__ " sending RELEASE_IP for '%s'\n", data.dptr));
740
741         ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_RELEASE_IP, data);
742
743         /* kill clients that have registered with this IP */
744         release_kill_clients(ctdb, state->addr);
745
746         ctdb_vnn_unassign_iface(ctdb, state->vnn);
747
748         /* the control succeeded */
749         ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
750         talloc_free(state);
751 }
752
753 /*
754   release an ip address
755  */
756 int32_t ctdb_control_release_ip(struct ctdb_context *ctdb, 
757                                 struct ctdb_req_control *c,
758                                 TDB_DATA indata, 
759                                 bool *async_reply)
760 {
761         int ret;
762         struct takeover_callback_state *state;
763         struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
764         struct ctdb_vnn *vnn;
765
766         /* update our vnn list */
767         vnn = find_public_ip_vnn(ctdb, &pip->addr);
768         if (vnn == NULL) {
769                 DEBUG(DEBUG_INFO,("releaseip called for an ip '%s' that is not a public address\n",
770                         ctdb_addr_to_str(&pip->addr)));
771                 return 0;
772         }
773         vnn->pnn = pip->pnn;
774
775         /* stop any previous arps */
776         talloc_free(vnn->takeover_ctx);
777         vnn->takeover_ctx = NULL;
778
779         if (!ctdb_sys_have_ip(&pip->addr)) {
780                 DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u on interface %s (ip not held)\n", 
781                         ctdb_addr_to_str(&pip->addr),
782                         vnn->public_netmask_bits, 
783                         ctdb_vnn_iface_string(vnn)));
784                 ctdb_vnn_unassign_iface(ctdb, vnn);
785                 return 0;
786         }
787
788         if (vnn->iface == NULL) {
789                 DEBUG(DEBUG_CRIT,(__location__ " release_ip of IP %s is known to the kernel, "
790                                   "but we have no interface assigned, has someone manually configured it?"
791                                   "banning ourself\n",
792                                  ctdb_addr_to_str(&vnn->public_address)));
793                 ctdb_ban_self(ctdb);
794                 return -1;
795         }
796
797         DEBUG(DEBUG_NOTICE,("Release of IP %s/%u on interface %s  node:%d\n",
798                 ctdb_addr_to_str(&pip->addr),
799                 vnn->public_netmask_bits, 
800                 ctdb_vnn_iface_string(vnn),
801                 pip->pnn));
802
803         state = talloc(ctdb, struct takeover_callback_state);
804         CTDB_NO_MEMORY(ctdb, state);
805
806         state->c = talloc_steal(state, c);
807         state->addr = talloc(state, ctdb_sock_addr);       
808         CTDB_NO_MEMORY(ctdb, state->addr);
809         *state->addr = pip->addr;
810         state->vnn   = vnn;
811
812         ret = ctdb_event_script_callback(ctdb, 
813                                          state, release_ip_callback, state,
814                                          false,
815                                          CTDB_EVENT_RELEASE_IP,
816                                          "%s %s %u",
817                                          ctdb_vnn_iface_string(vnn),
818                                          ctdb_addr_to_str(&pip->addr),
819                                          vnn->public_netmask_bits);
820         if (ret != 0) {
821                 DEBUG(DEBUG_ERR,(__location__ " Failed to release IP %s on interface %s\n",
822                         ctdb_addr_to_str(&pip->addr),
823                         ctdb_vnn_iface_string(vnn)));
824                 talloc_free(state);
825                 return -1;
826         }
827
828         /* tell the control that we will be reply asynchronously */
829         *async_reply = true;
830         return 0;
831 }
832
833 /*
834   release an ip address old v4 style
835  */
836 int32_t ctdb_control_release_ipv4(struct ctdb_context *ctdb, 
837                                 struct ctdb_req_control *c,
838                                 TDB_DATA indata, 
839                                 bool *async_reply)
840 {
841         TDB_DATA data;
842         
843         data.dsize = sizeof(struct ctdb_public_ip);
844         data.dptr  = (uint8_t *)talloc_zero(c, struct ctdb_public_ip);
845         CTDB_NO_MEMORY(ctdb, data.dptr);
846         
847         memcpy(data.dptr, indata.dptr, indata.dsize);
848         return ctdb_control_release_ip(ctdb, c, data, async_reply);
849 }
850
851
852 static int ctdb_add_public_address(struct ctdb_context *ctdb,
853                                    ctdb_sock_addr *addr,
854                                    unsigned mask, const char *ifaces)
855 {
856         struct ctdb_vnn      *vnn;
857         uint32_t num = 0;
858         char *tmp;
859         const char *iface;
860         int i;
861         int ret;
862
863         /* Verify that we dont have an entry for this ip yet */
864         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
865                 if (ctdb_same_sockaddr(addr, &vnn->public_address)) {
866                         DEBUG(DEBUG_CRIT,("Same ip '%s' specified multiple times in the public address list \n", 
867                                 ctdb_addr_to_str(addr)));
868                         return -1;
869                 }               
870         }
871
872         /* create a new vnn structure for this ip address */
873         vnn = talloc_zero(ctdb, struct ctdb_vnn);
874         CTDB_NO_MEMORY_FATAL(ctdb, vnn);
875         vnn->ifaces = talloc_array(vnn, const char *, num + 2);
876         tmp = talloc_strdup(vnn, ifaces);
877         CTDB_NO_MEMORY_FATAL(ctdb, tmp);
878         for (iface = strtok(tmp, ","); iface; iface = strtok(NULL, ",")) {
879                 vnn->ifaces = talloc_realloc(vnn, vnn->ifaces, const char *, num + 2);
880                 CTDB_NO_MEMORY_FATAL(ctdb, vnn->ifaces);
881                 vnn->ifaces[num] = talloc_strdup(vnn, iface);
882                 CTDB_NO_MEMORY_FATAL(ctdb, vnn->ifaces[num]);
883                 num++;
884         }
885         talloc_free(tmp);
886         vnn->ifaces[num] = NULL;
887         vnn->public_address      = *addr;
888         vnn->public_netmask_bits = mask;
889         vnn->pnn                 = -1;
890
891         for (i=0; vnn->ifaces[i]; i++) {
892                 ret = ctdb_add_local_iface(ctdb, vnn->ifaces[i]);
893                 if (ret != 0) {
894                         DEBUG(DEBUG_CRIT, (__location__ " failed to add iface[%s] "
895                                            "for public_address[%s]\n",
896                                            vnn->ifaces[i], ctdb_addr_to_str(addr)));
897                         talloc_free(vnn);
898                         return -1;
899                 }
900         }
901
902         DLIST_ADD(ctdb->vnn, vnn);
903
904         return 0;
905 }
906
907 /*
908   setup the event script directory
909 */
910 int ctdb_set_event_script_dir(struct ctdb_context *ctdb, const char *script_dir)
911 {
912         ctdb->event_script_dir = talloc_strdup(ctdb, script_dir);
913         CTDB_NO_MEMORY(ctdb, ctdb->event_script_dir);
914         return 0;
915 }
916
917 /*
918   setup the public address lists from a file
919 */
920 int ctdb_set_public_addresses(struct ctdb_context *ctdb, const char *alist)
921 {
922         char **lines;
923         int nlines;
924         int i;
925
926         lines = file_lines_load(alist, &nlines, ctdb);
927         if (lines == NULL) {
928                 ctdb_set_error(ctdb, "Failed to load public address list '%s'\n", alist);
929                 return -1;
930         }
931         while (nlines > 0 && strcmp(lines[nlines-1], "") == 0) {
932                 nlines--;
933         }
934
935         for (i=0;i<nlines;i++) {
936                 unsigned mask;
937                 ctdb_sock_addr addr;
938                 const char *addrstr;
939                 const char *ifaces;
940                 char *tok, *line;
941
942                 line = lines[i];
943                 while ((*line == ' ') || (*line == '\t')) {
944                         line++;
945                 }
946                 if (*line == '#') {
947                         continue;
948                 }
949                 if (strcmp(line, "") == 0) {
950                         continue;
951                 }
952                 tok = strtok(line, " \t");
953                 addrstr = tok;
954                 tok = strtok(NULL, " \t");
955                 if (tok == NULL) {
956                         if (NULL == ctdb->default_public_interface) {
957                                 DEBUG(DEBUG_CRIT,("No default public interface and no interface specified at line %u of public address list\n",
958                                          i+1));
959                                 talloc_free(lines);
960                                 return -1;
961                         }
962                         ifaces = ctdb->default_public_interface;
963                 } else {
964                         ifaces = tok;
965                 }
966
967                 if (!addrstr || !parse_ip_mask(addrstr, ifaces, &addr, &mask)) {
968                         DEBUG(DEBUG_CRIT,("Badly formed line %u in public address list\n", i+1));
969                         talloc_free(lines);
970                         return -1;
971                 }
972                 if (ctdb_add_public_address(ctdb, &addr, mask, ifaces)) {
973                         DEBUG(DEBUG_CRIT,("Failed to add line %u to the public address list\n", i+1));
974                         talloc_free(lines);
975                         return -1;
976                 }
977         }
978
979         talloc_free(lines);
980         return 0;
981 }
982
983 int ctdb_set_single_public_ip(struct ctdb_context *ctdb,
984                               const char *iface,
985                               const char *ip)
986 {
987         struct ctdb_vnn *svnn;
988         bool ok;
989         int ret;
990
991         svnn = talloc_zero(ctdb, struct ctdb_vnn);
992         CTDB_NO_MEMORY(ctdb, svnn);
993
994         svnn->ifaces = talloc_array(svnn, const char *, 2);
995         CTDB_NO_MEMORY(ctdb, svnn->ifaces);
996         svnn->ifaces[0] = talloc_strdup(svnn->ifaces, iface);
997         CTDB_NO_MEMORY(ctdb, svnn->ifaces[0]);
998         svnn->ifaces[1] = NULL;
999
1000         ok = parse_ip(ip, iface, 0, &svnn->public_address);
1001         if (!ok) {
1002                 talloc_free(svnn);
1003                 return -1;
1004         }
1005
1006         ret = ctdb_add_local_iface(ctdb, svnn->ifaces[0]);
1007         if (ret != 0) {
1008                 DEBUG(DEBUG_CRIT, (__location__ " failed to add iface[%s] "
1009                                    "for single_ip[%s]\n",
1010                                    svnn->ifaces[0],
1011                                    ctdb_addr_to_str(&svnn->public_address)));
1012                 talloc_free(svnn);
1013                 return -1;
1014         }
1015
1016         ret = ctdb_vnn_assign_iface(ctdb, svnn);
1017         if (ret != 0) {
1018                 talloc_free(svnn);
1019                 return -1;
1020         }
1021
1022         ctdb->single_ip_vnn = svnn;
1023         return 0;
1024 }
1025
1026 struct ctdb_public_ip_list {
1027         struct ctdb_public_ip_list *next;
1028         uint32_t pnn;
1029         ctdb_sock_addr addr;
1030 };
1031
1032
1033 /* Given a physical node, return the number of
1034    public addresses that is currently assigned to this node.
1035 */
1036 static int node_ip_coverage(struct ctdb_context *ctdb, 
1037         int32_t pnn,
1038         struct ctdb_public_ip_list *ips)
1039 {
1040         int num=0;
1041
1042         for (;ips;ips=ips->next) {
1043                 if (ips->pnn == pnn) {
1044                         num++;
1045                 }
1046         }
1047         return num;
1048 }
1049
1050
1051 /* Check if this is a public ip known to the node, i.e. can that
1052    node takeover this ip ?
1053 */
1054 static int can_node_serve_ip(struct ctdb_context *ctdb, int32_t pnn, 
1055                 struct ctdb_public_ip_list *ip)
1056 {
1057         struct ctdb_all_public_ips *public_ips;
1058         int i;
1059
1060         public_ips = ctdb->nodes[pnn]->available_public_ips;
1061
1062         if (public_ips == NULL) {
1063                 return -1;
1064         }
1065
1066         for (i=0;i<public_ips->num;i++) {
1067                 if (ctdb_same_ip(&ip->addr, &public_ips->ips[i].addr)) {
1068                         /* yes, this node can serve this public ip */
1069                         return 0;
1070                 }
1071         }
1072
1073         return -1;
1074 }
1075
1076
1077 /* search the node lists list for a node to takeover this ip.
1078    pick the node that currently are serving the least number of ips
1079    so that the ips get spread out evenly.
1080 */
1081 static int find_takeover_node(struct ctdb_context *ctdb, 
1082                 struct ctdb_node_map *nodemap, uint32_t mask, 
1083                 struct ctdb_public_ip_list *ip,
1084                 struct ctdb_public_ip_list *all_ips)
1085 {
1086         int pnn, min=0, num;
1087         int i;
1088
1089         pnn    = -1;
1090         for (i=0;i<nodemap->num;i++) {
1091                 if (nodemap->nodes[i].flags & mask) {
1092                         /* This node is not healty and can not be used to serve
1093                            a public address 
1094                         */
1095                         continue;
1096                 }
1097
1098                 /* verify that this node can serve this ip */
1099                 if (can_node_serve_ip(ctdb, i, ip)) {
1100                         /* no it couldnt   so skip to the next node */
1101                         continue;
1102                 }
1103
1104                 num = node_ip_coverage(ctdb, i, all_ips);
1105                 /* was this the first node we checked ? */
1106                 if (pnn == -1) {
1107                         pnn = i;
1108                         min  = num;
1109                 } else {
1110                         if (num < min) {
1111                                 pnn = i;
1112                                 min  = num;
1113                         }
1114                 }
1115         }       
1116         if (pnn == -1) {
1117                 DEBUG(DEBUG_WARNING,(__location__ " Could not find node to take over public address '%s'\n",
1118                         ctdb_addr_to_str(&ip->addr)));
1119
1120                 return -1;
1121         }
1122
1123         ip->pnn = pnn;
1124         return 0;
1125 }
1126
1127 #define IP_KEYLEN       4
1128 static uint32_t *ip_key(ctdb_sock_addr *ip)
1129 {
1130         static uint32_t key[IP_KEYLEN];
1131
1132         bzero(key, sizeof(key));
1133
1134         switch (ip->sa.sa_family) {
1135         case AF_INET:
1136                 key[3]  = htonl(ip->ip.sin_addr.s_addr);
1137                 break;
1138         case AF_INET6:
1139                 key[0]  = htonl(ip->ip6.sin6_addr.s6_addr32[0]);
1140                 key[1]  = htonl(ip->ip6.sin6_addr.s6_addr32[1]);
1141                 key[2]  = htonl(ip->ip6.sin6_addr.s6_addr32[2]);
1142                 key[3]  = htonl(ip->ip6.sin6_addr.s6_addr32[3]);
1143                 break;
1144         default:
1145                 DEBUG(DEBUG_ERR, (__location__ " ERROR, unknown family passed :%u\n", ip->sa.sa_family));
1146                 return key;
1147         }
1148
1149         return key;
1150 }
1151
1152 static void *add_ip_callback(void *parm, void *data)
1153 {
1154         return parm;
1155 }
1156
1157 void getips_count_callback(void *param, void *data)
1158 {
1159         struct ctdb_public_ip_list **ip_list = (struct ctdb_public_ip_list **)param;
1160         struct ctdb_public_ip_list *new_ip = (struct ctdb_public_ip_list *)data;
1161
1162         new_ip->next = *ip_list;
1163         *ip_list     = new_ip;
1164 }
1165
1166 static struct ctdb_public_ip_list *
1167 create_merged_ip_list(struct ctdb_context *ctdb)
1168 {
1169         int i, j;
1170         struct ctdb_public_ip_list *ip_list;
1171         struct ctdb_all_public_ips *public_ips;
1172
1173         if (ctdb->ip_tree != NULL) {
1174                 talloc_free(ctdb->ip_tree);
1175                 ctdb->ip_tree = NULL;
1176         }
1177         ctdb->ip_tree = trbt_create(ctdb, 0);
1178
1179         for (i=0;i<ctdb->num_nodes;i++) {
1180                 public_ips = ctdb->nodes[i]->known_public_ips;
1181
1182                 if (ctdb->nodes[i]->flags & NODE_FLAGS_DELETED) {
1183                         continue;
1184                 }
1185
1186                 /* there were no public ips for this node */
1187                 if (public_ips == NULL) {
1188                         continue;
1189                 }               
1190
1191                 for (j=0;j<public_ips->num;j++) {
1192                         struct ctdb_public_ip_list *tmp_ip; 
1193
1194                         tmp_ip = talloc_zero(ctdb->ip_tree, struct ctdb_public_ip_list);
1195                         CTDB_NO_MEMORY_NULL(ctdb, tmp_ip);
1196                         tmp_ip->pnn  = public_ips->ips[j].pnn;
1197                         tmp_ip->addr = public_ips->ips[j].addr;
1198                         tmp_ip->next = NULL;
1199
1200                         trbt_insertarray32_callback(ctdb->ip_tree,
1201                                 IP_KEYLEN, ip_key(&public_ips->ips[j].addr),
1202                                 add_ip_callback,
1203                                 tmp_ip);
1204                 }
1205         }
1206
1207         ip_list = NULL;
1208         trbt_traversearray32(ctdb->ip_tree, IP_KEYLEN, getips_count_callback, &ip_list);
1209
1210         return ip_list;
1211 }
1212
1213 /*
1214   make any IP alias changes for public addresses that are necessary 
1215  */
1216 int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap)
1217 {
1218         int i, num_healthy, retries, num_ips;
1219         struct ctdb_public_ip ip;
1220         struct ctdb_public_ipv4 ipv4;
1221         uint32_t mask, *nodes;
1222         struct ctdb_public_ip_list *all_ips, *tmp_ip;
1223         int maxnode, maxnum=0, minnode, minnum=0, num;
1224         TDB_DATA data;
1225         struct timeval timeout;
1226         struct client_async_data *async_data;
1227         struct ctdb_client_control_state *state;
1228         TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
1229
1230
1231         ZERO_STRUCT(ip);
1232
1233         /* Count how many completely healthy nodes we have */
1234         num_healthy = 0;
1235         for (i=0;i<nodemap->num;i++) {
1236                 if (!(nodemap->nodes[i].flags & (NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED))) {
1237                         num_healthy++;
1238                 }
1239         }
1240
1241         if (num_healthy > 0) {
1242                 /* We have healthy nodes, so only consider them for 
1243                    serving public addresses
1244                 */
1245                 mask = NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED;
1246         } else {
1247                 /* We didnt have any completely healthy nodes so
1248                    use "disabled" nodes as a fallback
1249                 */
1250                 mask = NODE_FLAGS_INACTIVE;
1251         }
1252
1253         /* since nodes only know about those public addresses that
1254            can be served by that particular node, no single node has
1255            a full list of all public addresses that exist in the cluster.
1256            Walk over all node structures and create a merged list of
1257            all public addresses that exist in the cluster.
1258
1259            keep the tree of ips around as ctdb->ip_tree
1260         */
1261         all_ips = create_merged_ip_list(ctdb);
1262
1263         /* If we want deterministic ip allocations, i.e. that the ip addresses
1264            will always be allocated the same way for a specific set of
1265            available/unavailable nodes.
1266         */
1267         if (1 == ctdb->tunable.deterministic_public_ips) {              
1268                 DEBUG(DEBUG_NOTICE,("Deterministic IPs enabled. Resetting all ip allocations\n"));
1269                 for (i=0,tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next,i++) {
1270                         tmp_ip->pnn = i%nodemap->num;
1271                 }
1272         }
1273
1274
1275         /* mark all public addresses with a masked node as being served by
1276            node -1
1277         */
1278         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1279                 if (tmp_ip->pnn == -1) {
1280                         continue;
1281                 }
1282                 if (nodemap->nodes[tmp_ip->pnn].flags & mask) {
1283                         tmp_ip->pnn = -1;
1284                 }
1285         }
1286
1287         /* verify that the assigned nodes can serve that public ip
1288            and set it to -1 if not
1289         */
1290         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1291                 if (tmp_ip->pnn == -1) {
1292                         continue;
1293                 }
1294                 if (can_node_serve_ip(ctdb, tmp_ip->pnn, tmp_ip) != 0) {
1295                         /* this node can not serve this ip. */
1296                         tmp_ip->pnn = -1;
1297                 }
1298         }
1299
1300
1301         /* now we must redistribute all public addresses with takeover node
1302            -1 among the nodes available
1303         */
1304         retries = 0;
1305 try_again:
1306         /* loop over all ip's and find a physical node to cover for 
1307            each unassigned ip.
1308         */
1309         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1310                 if (tmp_ip->pnn == -1) {
1311                         if (find_takeover_node(ctdb, nodemap, mask, tmp_ip, all_ips)) {
1312                                 DEBUG(DEBUG_WARNING,("Failed to find node to cover ip %s\n",
1313                                         ctdb_addr_to_str(&tmp_ip->addr)));
1314                         }
1315                 }
1316         }
1317
1318         /* If we dont want ips to fail back after a node becomes healthy
1319            again, we wont even try to reallocat the ip addresses so that
1320            they are evenly spread out.
1321            This can NOT be used at the same time as DeterministicIPs !
1322         */
1323         if (1 == ctdb->tunable.no_ip_failback) {
1324                 if (1 == ctdb->tunable.deterministic_public_ips) {
1325                         DEBUG(DEBUG_ERR, ("ERROR: You can not use 'DeterministicIPs' and 'NoIPFailback' at the same time\n"));
1326                 }
1327                 goto finished;
1328         }
1329
1330
1331         /* now, try to make sure the ip adresses are evenly distributed
1332            across the node.
1333            for each ip address, loop over all nodes that can serve this
1334            ip and make sure that the difference between the node
1335            serving the most and the node serving the least ip's are not greater
1336            than 1.
1337         */
1338         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1339                 if (tmp_ip->pnn == -1) {
1340                         continue;
1341                 }
1342
1343                 /* Get the highest and lowest number of ips's served by any 
1344                    valid node which can serve this ip.
1345                 */
1346                 maxnode = -1;
1347                 minnode = -1;
1348                 for (i=0;i<nodemap->num;i++) {
1349                         if (nodemap->nodes[i].flags & mask) {
1350                                 continue;
1351                         }
1352
1353                         /* only check nodes that can actually serve this ip */
1354                         if (can_node_serve_ip(ctdb, i, tmp_ip)) {
1355                                 /* no it couldnt   so skip to the next node */
1356                                 continue;
1357                         }
1358
1359                         num = node_ip_coverage(ctdb, i, all_ips);
1360                         if (maxnode == -1) {
1361                                 maxnode = i;
1362                                 maxnum  = num;
1363                         } else {
1364                                 if (num > maxnum) {
1365                                         maxnode = i;
1366                                         maxnum  = num;
1367                                 }
1368                         }
1369                         if (minnode == -1) {
1370                                 minnode = i;
1371                                 minnum  = num;
1372                         } else {
1373                                 if (num < minnum) {
1374                                         minnode = i;
1375                                         minnum  = num;
1376                                 }
1377                         }
1378                 }
1379                 if (maxnode == -1) {
1380                         DEBUG(DEBUG_WARNING,(__location__ " Could not find maxnode. May not be able to serve ip '%s'\n",
1381                                 ctdb_addr_to_str(&tmp_ip->addr)));
1382
1383                         continue;
1384                 }
1385
1386                 /* If we want deterministic IPs then dont try to reallocate 
1387                    them to spread out the load.
1388                 */
1389                 if (1 == ctdb->tunable.deterministic_public_ips) {
1390                         continue;
1391                 }
1392
1393                 /* if the spread between the smallest and largest coverage by
1394                    a node is >=2 we steal one of the ips from the node with
1395                    most coverage to even things out a bit.
1396                    try to do this at most 5 times  since we dont want to spend
1397                    too much time balancing the ip coverage.
1398                 */
1399                 if ( (maxnum > minnum+1)
1400                   && (retries < 5) ){
1401                         struct ctdb_public_ip_list *tmp;
1402
1403                         /* mark one of maxnode's vnn's as unassigned and try
1404                            again
1405                         */
1406                         for (tmp=all_ips;tmp;tmp=tmp->next) {
1407                                 if (tmp->pnn == maxnode) {
1408                                         tmp->pnn = -1;
1409                                         retries++;
1410                                         goto try_again;
1411                                 }
1412                         }
1413                 }
1414         }
1415
1416
1417         /* finished distributing the public addresses, now just send the 
1418            info out to the nodes
1419         */
1420 finished:
1421
1422         /* at this point ->pnn is the node which will own each IP
1423            or -1 if there is no node that can cover this ip
1424         */
1425
1426         /* now tell all nodes to delete any alias that they should not
1427            have.  This will be a NOOP on nodes that don't currently
1428            hold the given alias */
1429         async_data = talloc_zero(tmp_ctx, struct client_async_data);
1430         CTDB_NO_MEMORY_FATAL(ctdb, async_data);
1431
1432         for (i=0;i<nodemap->num;i++) {
1433                 /* don't talk to unconnected nodes, but do talk to banned nodes */
1434                 if (nodemap->nodes[i].flags & NODE_FLAGS_DISCONNECTED) {
1435                         continue;
1436                 }
1437
1438                 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1439                         if (tmp_ip->pnn == nodemap->nodes[i].pnn) {
1440                                 /* This node should be serving this
1441                                    vnn so dont tell it to release the ip
1442                                 */
1443                                 continue;
1444                         }
1445                         if (tmp_ip->addr.sa.sa_family == AF_INET) {
1446                                 ipv4.pnn = tmp_ip->pnn;
1447                                 ipv4.sin = tmp_ip->addr.ip;
1448
1449                                 timeout = TAKEOVER_TIMEOUT();
1450                                 data.dsize = sizeof(ipv4);
1451                                 data.dptr  = (uint8_t *)&ipv4;
1452                                 state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
1453                                                 0, CTDB_CONTROL_RELEASE_IPv4, 0,
1454                                                 data, async_data,
1455                                                 &timeout, NULL);
1456                         } else {
1457                                 ip.pnn  = tmp_ip->pnn;
1458                                 ip.addr = tmp_ip->addr;
1459
1460                                 timeout = TAKEOVER_TIMEOUT();
1461                                 data.dsize = sizeof(ip);
1462                                 data.dptr  = (uint8_t *)&ip;
1463                                 state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
1464                                                 0, CTDB_CONTROL_RELEASE_IP, 0,
1465                                                 data, async_data,
1466                                                 &timeout, NULL);
1467                         }
1468
1469                         if (state == NULL) {
1470                                 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_RELEASE_IP to node %u\n", nodemap->nodes[i].pnn));
1471                                 talloc_free(tmp_ctx);
1472                                 return -1;
1473                         }
1474                 
1475                         ctdb_client_async_add(async_data, state);
1476                 }
1477         }
1478         if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1479                 DEBUG(DEBUG_ERR,(__location__ " Async control CTDB_CONTROL_RELEASE_IP failed\n"));
1480                 talloc_free(tmp_ctx);
1481                 return -1;
1482         }
1483         talloc_free(async_data);
1484
1485
1486         /* tell all nodes to get their own IPs */
1487         async_data = talloc_zero(tmp_ctx, struct client_async_data);
1488         CTDB_NO_MEMORY_FATAL(ctdb, async_data);
1489         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1490                 if (tmp_ip->pnn == -1) {
1491                         /* this IP won't be taken over */
1492                         continue;
1493                 }
1494
1495                 if (tmp_ip->addr.sa.sa_family == AF_INET) {
1496                         ipv4.pnn = tmp_ip->pnn;
1497                         ipv4.sin = tmp_ip->addr.ip;
1498
1499                         timeout = TAKEOVER_TIMEOUT();
1500                         data.dsize = sizeof(ipv4);
1501                         data.dptr  = (uint8_t *)&ipv4;
1502                         state = ctdb_control_send(ctdb, tmp_ip->pnn,
1503                                         0, CTDB_CONTROL_TAKEOVER_IPv4, 0,
1504                                         data, async_data,
1505                                         &timeout, NULL);
1506                 } else {
1507                         ip.pnn  = tmp_ip->pnn;
1508                         ip.addr = tmp_ip->addr;
1509
1510                         timeout = TAKEOVER_TIMEOUT();
1511                         data.dsize = sizeof(ip);
1512                         data.dptr  = (uint8_t *)&ip;
1513                         state = ctdb_control_send(ctdb, tmp_ip->pnn,
1514                                         0, CTDB_CONTROL_TAKEOVER_IP, 0,
1515                                         data, async_data,
1516                                         &timeout, NULL);
1517                 }
1518                 if (state == NULL) {
1519                         DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_TAKEOVER_IP to node %u\n", tmp_ip->pnn));
1520                         talloc_free(tmp_ctx);
1521                         return -1;
1522                 }
1523                 
1524                 ctdb_client_async_add(async_data, state);
1525         }
1526         if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1527                 DEBUG(DEBUG_ERR,(__location__ " Async control CTDB_CONTROL_TAKEOVER_IP failed\n"));
1528                 talloc_free(tmp_ctx);
1529                 return -1;
1530         }
1531
1532         /* tell all nodes to update natwg */
1533         /* send the flags update natgw on all connected nodes */
1534         data.dptr  = discard_const("ipreallocated");
1535         data.dsize = strlen((char *)data.dptr) + 1; 
1536         nodes = list_of_connected_nodes(ctdb, nodemap, tmp_ctx, true);
1537         if (ctdb_client_async_control(ctdb, CTDB_CONTROL_RUN_EVENTSCRIPTS,
1538                                       nodes, 0, TAKEOVER_TIMEOUT(),
1539                                       false, data,
1540                                       NULL, NULL,
1541                                       NULL) != 0) {
1542                 DEBUG(DEBUG_ERR, (__location__ " ctdb_control to updatenatgw failed\n"));
1543         }
1544
1545         talloc_free(tmp_ctx);
1546         return 0;
1547 }
1548
1549
1550 /*
1551   destroy a ctdb_client_ip structure
1552  */
1553 static int ctdb_client_ip_destructor(struct ctdb_client_ip *ip)
1554 {
1555         DEBUG(DEBUG_DEBUG,("destroying client tcp for %s:%u (client_id %u)\n",
1556                 ctdb_addr_to_str(&ip->addr),
1557                 ntohs(ip->addr.ip.sin_port),
1558                 ip->client_id));
1559
1560         DLIST_REMOVE(ip->ctdb->client_ip_list, ip);
1561         return 0;
1562 }
1563
1564 /*
1565   called by a client to inform us of a TCP connection that it is managing
1566   that should tickled with an ACK when IP takeover is done
1567   we handle both the old ipv4 style of packets as well as the new ipv4/6
1568   pdus.
1569  */
1570 int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id,
1571                                 TDB_DATA indata)
1572 {
1573         struct ctdb_client *client = ctdb_reqid_find(ctdb, client_id, struct ctdb_client);
1574         struct ctdb_control_tcp *old_addr = NULL;
1575         struct ctdb_control_tcp_addr new_addr;
1576         struct ctdb_control_tcp_addr *tcp_sock = NULL;
1577         struct ctdb_tcp_list *tcp;
1578         struct ctdb_tcp_connection t;
1579         int ret;
1580         TDB_DATA data;
1581         struct ctdb_client_ip *ip;
1582         struct ctdb_vnn *vnn;
1583         ctdb_sock_addr addr;
1584
1585         switch (indata.dsize) {
1586         case sizeof(struct ctdb_control_tcp):
1587                 old_addr = (struct ctdb_control_tcp *)indata.dptr;
1588                 ZERO_STRUCT(new_addr);
1589                 tcp_sock = &new_addr;
1590                 tcp_sock->src.ip  = old_addr->src;
1591                 tcp_sock->dest.ip = old_addr->dest;
1592                 break;
1593         case sizeof(struct ctdb_control_tcp_addr):
1594                 tcp_sock = (struct ctdb_control_tcp_addr *)indata.dptr;
1595                 break;
1596         default:
1597                 DEBUG(DEBUG_ERR,(__location__ " Invalid data structure passed "
1598                                  "to ctdb_control_tcp_client. size was %d but "
1599                                  "only allowed sizes are %lu and %lu\n",
1600                                  (int)indata.dsize,
1601                                  (long unsigned)sizeof(struct ctdb_control_tcp),
1602                                  (long unsigned)sizeof(struct ctdb_control_tcp_addr)));
1603                 return -1;
1604         }
1605
1606         addr = tcp_sock->src;
1607         ctdb_canonicalize_ip(&addr,  &tcp_sock->src);
1608         addr = tcp_sock->dest;
1609         ctdb_canonicalize_ip(&addr, &tcp_sock->dest);
1610
1611         ZERO_STRUCT(addr);
1612         memcpy(&addr, &tcp_sock->dest, sizeof(addr));
1613         vnn = find_public_ip_vnn(ctdb, &addr);
1614         if (vnn == NULL) {
1615                 switch (addr.sa.sa_family) {
1616                 case AF_INET:
1617                         if (ntohl(addr.ip.sin_addr.s_addr) != INADDR_LOOPBACK) {
1618                                 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public address.\n", 
1619                                         ctdb_addr_to_str(&addr)));
1620                         }
1621                         break;
1622                 case AF_INET6:
1623                         DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public ipv6 address.\n", 
1624                                 ctdb_addr_to_str(&addr)));
1625                         break;
1626                 default:
1627                         DEBUG(DEBUG_ERR,(__location__ " Unknown family type %d\n", addr.sa.sa_family));
1628                 }
1629
1630                 return 0;
1631         }
1632
1633         if (vnn->pnn != ctdb->pnn) {
1634                 DEBUG(DEBUG_ERR,("Attempt to register tcp client for IP %s we don't hold - failing (client_id %u pid %u)\n",
1635                         ctdb_addr_to_str(&addr),
1636                         client_id, client->pid));
1637                 /* failing this call will tell smbd to die */
1638                 return -1;
1639         }
1640
1641         ip = talloc(client, struct ctdb_client_ip);
1642         CTDB_NO_MEMORY(ctdb, ip);
1643
1644         ip->ctdb      = ctdb;
1645         ip->addr      = addr;
1646         ip->client_id = client_id;
1647         talloc_set_destructor(ip, ctdb_client_ip_destructor);
1648         DLIST_ADD(ctdb->client_ip_list, ip);
1649
1650         tcp = talloc(client, struct ctdb_tcp_list);
1651         CTDB_NO_MEMORY(ctdb, tcp);
1652
1653         tcp->connection.src_addr = tcp_sock->src;
1654         tcp->connection.dst_addr = tcp_sock->dest;
1655
1656         DLIST_ADD(client->tcp_list, tcp);
1657
1658         t.src_addr = tcp_sock->src;
1659         t.dst_addr = tcp_sock->dest;
1660
1661         data.dptr = (uint8_t *)&t;
1662         data.dsize = sizeof(t);
1663
1664         switch (addr.sa.sa_family) {
1665         case AF_INET:
1666                 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1667                         (unsigned)ntohs(tcp_sock->dest.ip.sin_port), 
1668                         ctdb_addr_to_str(&tcp_sock->src),
1669                         (unsigned)ntohs(tcp_sock->src.ip.sin_port), client_id, client->pid));
1670                 break;
1671         case AF_INET6:
1672                 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1673                         (unsigned)ntohs(tcp_sock->dest.ip6.sin6_port), 
1674                         ctdb_addr_to_str(&tcp_sock->src),
1675                         (unsigned)ntohs(tcp_sock->src.ip6.sin6_port), client_id, client->pid));
1676                 break;
1677         default:
1678                 DEBUG(DEBUG_ERR,(__location__ " Unknown family %d\n", addr.sa.sa_family));
1679         }
1680
1681
1682         /* tell all nodes about this tcp connection */
1683         ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0, 
1684                                        CTDB_CONTROL_TCP_ADD,
1685                                        0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
1686         if (ret != 0) {
1687                 DEBUG(DEBUG_ERR,(__location__ " Failed to send CTDB_CONTROL_TCP_ADD\n"));
1688                 return -1;
1689         }
1690
1691         return 0;
1692 }
1693
1694 /*
1695   find a tcp address on a list
1696  */
1697 static struct ctdb_tcp_connection *ctdb_tcp_find(struct ctdb_tcp_array *array, 
1698                                            struct ctdb_tcp_connection *tcp)
1699 {
1700         int i;
1701
1702         if (array == NULL) {
1703                 return NULL;
1704         }
1705
1706         for (i=0;i<array->num;i++) {
1707                 if (ctdb_same_sockaddr(&array->connections[i].src_addr, &tcp->src_addr) &&
1708                     ctdb_same_sockaddr(&array->connections[i].dst_addr, &tcp->dst_addr)) {
1709                         return &array->connections[i];
1710                 }
1711         }
1712         return NULL;
1713 }
1714
1715
1716
1717 /*
1718   called by a daemon to inform us of a TCP connection that one of its
1719   clients managing that should tickled with an ACK when IP takeover is
1720   done
1721  */
1722 int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata, bool tcp_update_needed)
1723 {
1724         struct ctdb_tcp_connection *p = (struct ctdb_tcp_connection *)indata.dptr;
1725         struct ctdb_tcp_array *tcparray;
1726         struct ctdb_tcp_connection tcp;
1727         struct ctdb_vnn *vnn;
1728
1729         vnn = find_public_ip_vnn(ctdb, &p->dst_addr);
1730         if (vnn == NULL) {
1731                 DEBUG(DEBUG_INFO,(__location__ " got TCP_ADD control for an address which is not a public address '%s'\n",
1732                         ctdb_addr_to_str(&p->dst_addr)));
1733
1734                 return -1;
1735         }
1736
1737
1738         tcparray = vnn->tcp_array;
1739
1740         /* If this is the first tickle */
1741         if (tcparray == NULL) {
1742                 tcparray = talloc_size(ctdb->nodes, 
1743                         offsetof(struct ctdb_tcp_array, connections) +
1744                         sizeof(struct ctdb_tcp_connection) * 1);
1745                 CTDB_NO_MEMORY(ctdb, tcparray);
1746                 vnn->tcp_array = tcparray;
1747
1748                 tcparray->num = 0;
1749                 tcparray->connections = talloc_size(tcparray, sizeof(struct ctdb_tcp_connection));
1750                 CTDB_NO_MEMORY(ctdb, tcparray->connections);
1751
1752                 tcparray->connections[tcparray->num].src_addr = p->src_addr;
1753                 tcparray->connections[tcparray->num].dst_addr = p->dst_addr;
1754                 tcparray->num++;
1755
1756                 if (tcp_update_needed) {
1757                         vnn->tcp_update_needed = true;
1758                 }
1759                 return 0;
1760         }
1761
1762
1763         /* Do we already have this tickle ?*/
1764         tcp.src_addr = p->src_addr;
1765         tcp.dst_addr = p->dst_addr;
1766         if (ctdb_tcp_find(vnn->tcp_array, &tcp) != NULL) {
1767                 DEBUG(DEBUG_DEBUG,("Already had tickle info for %s:%u for vnn:%u\n",
1768                         ctdb_addr_to_str(&tcp.dst_addr),
1769                         ntohs(tcp.dst_addr.ip.sin_port),
1770                         vnn->pnn));
1771                 return 0;
1772         }
1773
1774         /* A new tickle, we must add it to the array */
1775         tcparray->connections = talloc_realloc(tcparray, tcparray->connections,
1776                                         struct ctdb_tcp_connection,
1777                                         tcparray->num+1);
1778         CTDB_NO_MEMORY(ctdb, tcparray->connections);
1779
1780         vnn->tcp_array = tcparray;
1781         tcparray->connections[tcparray->num].src_addr = p->src_addr;
1782         tcparray->connections[tcparray->num].dst_addr = p->dst_addr;
1783         tcparray->num++;
1784                                 
1785         DEBUG(DEBUG_INFO,("Added tickle info for %s:%u from vnn %u\n",
1786                 ctdb_addr_to_str(&tcp.dst_addr),
1787                 ntohs(tcp.dst_addr.ip.sin_port),
1788                 vnn->pnn));
1789
1790         if (tcp_update_needed) {
1791                 vnn->tcp_update_needed = true;
1792         }
1793
1794         return 0;
1795 }
1796
1797
1798 /*
1799   called by a daemon to inform us of a TCP connection that one of its
1800   clients managing that should tickled with an ACK when IP takeover is
1801   done
1802  */
1803 static void ctdb_remove_tcp_connection(struct ctdb_context *ctdb, struct ctdb_tcp_connection *conn)
1804 {
1805         struct ctdb_tcp_connection *tcpp;
1806         struct ctdb_vnn *vnn = find_public_ip_vnn(ctdb, &conn->dst_addr);
1807
1808         if (vnn == NULL) {
1809                 DEBUG(DEBUG_ERR,(__location__ " unable to find public address %s\n",
1810                         ctdb_addr_to_str(&conn->dst_addr)));
1811                 return;
1812         }
1813
1814         /* if the array is empty we cant remove it
1815            and we dont need to do anything
1816          */
1817         if (vnn->tcp_array == NULL) {
1818                 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist (array is empty) %s:%u\n",
1819                         ctdb_addr_to_str(&conn->dst_addr),
1820                         ntohs(conn->dst_addr.ip.sin_port)));
1821                 return;
1822         }
1823
1824
1825         /* See if we know this connection
1826            if we dont know this connection  then we dont need to do anything
1827          */
1828         tcpp = ctdb_tcp_find(vnn->tcp_array, conn);
1829         if (tcpp == NULL) {
1830                 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist %s:%u\n",
1831                         ctdb_addr_to_str(&conn->dst_addr),
1832                         ntohs(conn->dst_addr.ip.sin_port)));
1833                 return;
1834         }
1835
1836
1837         /* We need to remove this entry from the array.
1838            Instead of allocating a new array and copying data to it
1839            we cheat and just copy the last entry in the existing array
1840            to the entry that is to be removed and just shring the 
1841            ->num field
1842          */
1843         *tcpp = vnn->tcp_array->connections[vnn->tcp_array->num - 1];
1844         vnn->tcp_array->num--;
1845
1846         /* If we deleted the last entry we also need to remove the entire array
1847          */
1848         if (vnn->tcp_array->num == 0) {
1849                 talloc_free(vnn->tcp_array);
1850                 vnn->tcp_array = NULL;
1851         }               
1852
1853         vnn->tcp_update_needed = true;
1854
1855         DEBUG(DEBUG_INFO,("Removed tickle info for %s:%u\n",
1856                 ctdb_addr_to_str(&conn->src_addr),
1857                 ntohs(conn->src_addr.ip.sin_port)));
1858 }
1859
1860
1861 /*
1862   called by a daemon to inform us of a TCP connection that one of its
1863   clients used are no longer needed in the tickle database
1864  */
1865 int32_t ctdb_control_tcp_remove(struct ctdb_context *ctdb, TDB_DATA indata)
1866 {
1867         struct ctdb_tcp_connection *conn = (struct ctdb_tcp_connection *)indata.dptr;
1868
1869         ctdb_remove_tcp_connection(ctdb, conn);
1870
1871         return 0;
1872 }
1873
1874
1875 /*
1876   called when a daemon restarts - send all tickes for all public addresses
1877   we are serving immediately to the new node.
1878  */
1879 int32_t ctdb_control_startup(struct ctdb_context *ctdb, uint32_t vnn)
1880 {
1881 /*XXX here we should send all tickes we are serving to the new node */
1882         return 0;
1883 }
1884
1885
1886 /*
1887   called when a client structure goes away - hook to remove
1888   elements from the tcp_list in all daemons
1889  */
1890 void ctdb_takeover_client_destructor_hook(struct ctdb_client *client)
1891 {
1892         while (client->tcp_list) {
1893                 struct ctdb_tcp_list *tcp = client->tcp_list;
1894                 DLIST_REMOVE(client->tcp_list, tcp);
1895                 ctdb_remove_tcp_connection(client->ctdb, &tcp->connection);
1896         }
1897 }
1898
1899
1900 /*
1901   release all IPs on shutdown
1902  */
1903 void ctdb_release_all_ips(struct ctdb_context *ctdb)
1904 {
1905         struct ctdb_vnn *vnn;
1906
1907         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1908                 if (!ctdb_sys_have_ip(&vnn->public_address)) {
1909                         ctdb_vnn_unassign_iface(ctdb, vnn);
1910                         continue;
1911                 }
1912                 if (!vnn->iface) {
1913                         continue;
1914                 }
1915                 ctdb_event_script_args(ctdb, CTDB_EVENT_RELEASE_IP, "%s %s %u",
1916                                   ctdb_vnn_iface_string(vnn),
1917                                   ctdb_addr_to_str(&vnn->public_address),
1918                                   vnn->public_netmask_bits);
1919                 release_kill_clients(ctdb, &vnn->public_address);
1920                 ctdb_vnn_unassign_iface(ctdb, vnn);
1921         }
1922 }
1923
1924
1925 /*
1926   get list of public IPs
1927  */
1928 int32_t ctdb_control_get_public_ips(struct ctdb_context *ctdb, 
1929                                     struct ctdb_req_control *c, TDB_DATA *outdata)
1930 {
1931         int i, num, len;
1932         struct ctdb_all_public_ips *ips;
1933         struct ctdb_vnn *vnn;
1934         bool only_available = false;
1935
1936         if (c->flags & CTDB_PUBLIC_IP_FLAGS_ONLY_AVAILABLE) {
1937                 only_available = true;
1938         }
1939
1940         /* count how many public ip structures we have */
1941         num = 0;
1942         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1943                 num++;
1944         }
1945
1946         len = offsetof(struct ctdb_all_public_ips, ips) + 
1947                 num*sizeof(struct ctdb_public_ip);
1948         ips = talloc_zero_size(outdata, len);
1949         CTDB_NO_MEMORY(ctdb, ips);
1950
1951         i = 0;
1952         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1953                 if (only_available && !ctdb_vnn_available(ctdb, vnn)) {
1954                         continue;
1955                 }
1956                 ips->ips[i].pnn  = vnn->pnn;
1957                 ips->ips[i].addr = vnn->public_address;
1958                 i++;
1959         }
1960         ips->num = i;
1961         len = offsetof(struct ctdb_all_public_ips, ips) +
1962                 i*sizeof(struct ctdb_public_ip);
1963
1964         outdata->dsize = len;
1965         outdata->dptr  = (uint8_t *)ips;
1966
1967         return 0;
1968 }
1969
1970
1971 /*
1972   get list of public IPs, old ipv4 style.  only returns ipv4 addresses
1973  */
1974 int32_t ctdb_control_get_public_ipsv4(struct ctdb_context *ctdb, 
1975                                     struct ctdb_req_control *c, TDB_DATA *outdata)
1976 {
1977         int i, num, len;
1978         struct ctdb_all_public_ipsv4 *ips;
1979         struct ctdb_vnn *vnn;
1980
1981         /* count how many public ip structures we have */
1982         num = 0;
1983         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1984                 if (vnn->public_address.sa.sa_family != AF_INET) {
1985                         continue;
1986                 }
1987                 num++;
1988         }
1989
1990         len = offsetof(struct ctdb_all_public_ipsv4, ips) + 
1991                 num*sizeof(struct ctdb_public_ipv4);
1992         ips = talloc_zero_size(outdata, len);
1993         CTDB_NO_MEMORY(ctdb, ips);
1994
1995         outdata->dsize = len;
1996         outdata->dptr  = (uint8_t *)ips;
1997
1998         ips->num = num;
1999         i = 0;
2000         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2001                 if (vnn->public_address.sa.sa_family != AF_INET) {
2002                         continue;
2003                 }
2004                 ips->ips[i].pnn = vnn->pnn;
2005                 ips->ips[i].sin = vnn->public_address.ip;
2006                 i++;
2007         }
2008
2009         return 0;
2010 }
2011
2012 int32_t ctdb_control_get_public_ip_info(struct ctdb_context *ctdb,
2013                                         struct ctdb_req_control *c,
2014                                         TDB_DATA indata,
2015                                         TDB_DATA *outdata)
2016 {
2017         int i, num, len;
2018         ctdb_sock_addr *addr;
2019         struct ctdb_control_public_ip_info *info;
2020         struct ctdb_vnn *vnn;
2021
2022         addr = (ctdb_sock_addr *)indata.dptr;
2023
2024         vnn = find_public_ip_vnn(ctdb, addr);
2025         if (vnn == NULL) {
2026                 /* if it is not a public ip   it could be our 'single ip' */
2027                 if (ctdb->single_ip_vnn) {
2028                         if (ctdb_same_ip(&ctdb->single_ip_vnn->public_address, addr)) {
2029                                 vnn = ctdb->single_ip_vnn;
2030                         }
2031                 }
2032         }
2033         if (vnn == NULL) {
2034                 DEBUG(DEBUG_ERR,(__location__ " Could not get public ip info, "
2035                                  "'%s'not a public address\n",
2036                                  ctdb_addr_to_str(addr)));
2037                 return -1;
2038         }
2039
2040         /* count how many public ip structures we have */
2041         num = 0;
2042         for (;vnn->ifaces[num];) {
2043                 num++;
2044         }
2045
2046         len = offsetof(struct ctdb_control_public_ip_info, ifaces) +
2047                 num*sizeof(struct ctdb_control_iface_info);
2048         info = talloc_zero_size(outdata, len);
2049         CTDB_NO_MEMORY(ctdb, info);
2050
2051         info->ip.addr = vnn->public_address;
2052         info->ip.pnn = vnn->pnn;
2053         info->active_idx = 0xFFFFFFFF;
2054
2055         for (i=0; vnn->ifaces[i]; i++) {
2056                 struct ctdb_iface *cur;
2057
2058                 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
2059                 if (cur == NULL) {
2060                         DEBUG(DEBUG_CRIT, (__location__ " internal error iface[%s] unknown\n",
2061                                            vnn->ifaces[i]));
2062                         return -1;
2063                 }
2064                 if (vnn->iface == cur) {
2065                         info->active_idx = i;
2066                 }
2067                 strcpy(info->ifaces[i].name, cur->name);
2068                 info->ifaces[i].link_state = cur->link_up;
2069                 info->ifaces[i].references = cur->references;
2070         }
2071         info->num = i;
2072         len = offsetof(struct ctdb_control_public_ip_info, ifaces) +
2073                 i*sizeof(struct ctdb_control_iface_info);
2074
2075         outdata->dsize = len;
2076         outdata->dptr  = (uint8_t *)info;
2077
2078         return 0;
2079 }
2080
2081 int32_t ctdb_control_get_ifaces(struct ctdb_context *ctdb,
2082                                 struct ctdb_req_control *c,
2083                                 TDB_DATA *outdata)
2084 {
2085         int i, num, len;
2086         struct ctdb_control_get_ifaces *ifaces;
2087         struct ctdb_iface *cur;
2088
2089         /* count how many public ip structures we have */
2090         num = 0;
2091         for (cur=ctdb->ifaces;cur;cur=cur->next) {
2092                 num++;
2093         }
2094
2095         len = offsetof(struct ctdb_control_get_ifaces, ifaces) +
2096                 num*sizeof(struct ctdb_control_iface_info);
2097         ifaces = talloc_zero_size(outdata, len);
2098         CTDB_NO_MEMORY(ctdb, ifaces);
2099
2100         i = 0;
2101         for (cur=ctdb->ifaces;cur;cur=cur->next) {
2102                 strcpy(ifaces->ifaces[i].name, cur->name);
2103                 ifaces->ifaces[i].link_state = cur->link_up;
2104                 ifaces->ifaces[i].references = cur->references;
2105                 i++;
2106         }
2107         ifaces->num = i;
2108         len = offsetof(struct ctdb_control_get_ifaces, ifaces) +
2109                 i*sizeof(struct ctdb_control_iface_info);
2110
2111         outdata->dsize = len;
2112         outdata->dptr  = (uint8_t *)ifaces;
2113
2114         return 0;
2115 }
2116
2117 int32_t ctdb_control_set_iface_link(struct ctdb_context *ctdb,
2118                                     struct ctdb_req_control *c,
2119                                     TDB_DATA indata)
2120 {
2121         struct ctdb_control_iface_info *info;
2122         struct ctdb_iface *iface;
2123         bool link_up = false;
2124
2125         info = (struct ctdb_control_iface_info *)indata.dptr;
2126
2127         if (info->name[CTDB_IFACE_SIZE] != '\0') {
2128                 int len = strnlen(info->name, CTDB_IFACE_SIZE);
2129                 DEBUG(DEBUG_ERR, (__location__ " name[%*.*s] not terminated\n",
2130                                   len, len, info->name));
2131                 return -1;
2132         }
2133
2134         switch (info->link_state) {
2135         case 0:
2136                 link_up = false;
2137                 break;
2138         case 1:
2139                 link_up = true;
2140                 break;
2141         default:
2142                 DEBUG(DEBUG_ERR, (__location__ " link_state[%u] invalid\n",
2143                                   (unsigned int)info->link_state));
2144                 return -1;
2145         }
2146
2147         if (info->references != 0) {
2148                 DEBUG(DEBUG_ERR, (__location__ " references[%u] should be 0\n",
2149                                   (unsigned int)info->references));
2150                 return -1;
2151         }
2152
2153         iface = ctdb_find_iface(ctdb, info->name);
2154         if (iface == NULL) {
2155                 DEBUG(DEBUG_ERR, (__location__ "iface[%s] is unknown\n",
2156                                   info->name));
2157                 return -1;
2158         }
2159
2160         if (link_up == iface->link_up) {
2161                 return 0;
2162         }
2163
2164         DEBUG(iface->link_up?DEBUG_ERR:DEBUG_NOTICE,
2165               ("iface[%s] has changed it's link status %s => %s\n",
2166                iface->name,
2167                iface->link_up?"up":"down",
2168                link_up?"up":"down"));
2169
2170         iface->link_up = link_up;
2171         return 0;
2172 }
2173
2174
2175 /* 
2176    structure containing the listening socket and the list of tcp connections
2177    that the ctdb daemon is to kill
2178 */
2179 struct ctdb_kill_tcp {
2180         struct ctdb_vnn *vnn;
2181         struct ctdb_context *ctdb;
2182         int capture_fd;
2183         struct fd_event *fde;
2184         trbt_tree_t *connections;
2185         void *private_data;
2186 };
2187
2188 /*
2189   a tcp connection that is to be killed
2190  */
2191 struct ctdb_killtcp_con {
2192         ctdb_sock_addr src_addr;
2193         ctdb_sock_addr dst_addr;
2194         int count;
2195         struct ctdb_kill_tcp *killtcp;
2196 };
2197
2198 /* this function is used to create a key to represent this socketpair
2199    in the killtcp tree.
2200    this key is used to insert and lookup matching socketpairs that are
2201    to be tickled and RST
2202 */
2203 #define KILLTCP_KEYLEN  10
2204 static uint32_t *killtcp_key(ctdb_sock_addr *src, ctdb_sock_addr *dst)
2205 {
2206         static uint32_t key[KILLTCP_KEYLEN];
2207
2208         bzero(key, sizeof(key));
2209
2210         if (src->sa.sa_family != dst->sa.sa_family) {
2211                 DEBUG(DEBUG_ERR, (__location__ " ERROR, different families passed :%u vs %u\n", src->sa.sa_family, dst->sa.sa_family));
2212                 return key;
2213         }
2214         
2215         switch (src->sa.sa_family) {
2216         case AF_INET:
2217                 key[0]  = dst->ip.sin_addr.s_addr;
2218                 key[1]  = src->ip.sin_addr.s_addr;
2219                 key[2]  = dst->ip.sin_port;
2220                 key[3]  = src->ip.sin_port;
2221                 break;
2222         case AF_INET6:
2223                 key[0]  = dst->ip6.sin6_addr.s6_addr32[3];
2224                 key[1]  = src->ip6.sin6_addr.s6_addr32[3];
2225                 key[2]  = dst->ip6.sin6_addr.s6_addr32[2];
2226                 key[3]  = src->ip6.sin6_addr.s6_addr32[2];
2227                 key[4]  = dst->ip6.sin6_addr.s6_addr32[1];
2228                 key[5]  = src->ip6.sin6_addr.s6_addr32[1];
2229                 key[6]  = dst->ip6.sin6_addr.s6_addr32[0];
2230                 key[7]  = src->ip6.sin6_addr.s6_addr32[0];
2231                 key[8]  = dst->ip6.sin6_port;
2232                 key[9]  = src->ip6.sin6_port;
2233                 break;
2234         default:
2235                 DEBUG(DEBUG_ERR, (__location__ " ERROR, unknown family passed :%u\n", src->sa.sa_family));
2236                 return key;
2237         }
2238
2239         return key;
2240 }
2241
2242 /*
2243   called when we get a read event on the raw socket
2244  */
2245 static void capture_tcp_handler(struct event_context *ev, struct fd_event *fde, 
2246                                 uint16_t flags, void *private_data)
2247 {
2248         struct ctdb_kill_tcp *killtcp = talloc_get_type(private_data, struct ctdb_kill_tcp);
2249         struct ctdb_killtcp_con *con;
2250         ctdb_sock_addr src, dst;
2251         uint32_t ack_seq, seq;
2252
2253         if (!(flags & EVENT_FD_READ)) {
2254                 return;
2255         }
2256
2257         if (ctdb_sys_read_tcp_packet(killtcp->capture_fd,
2258                                 killtcp->private_data,
2259                                 &src, &dst,
2260                                 &ack_seq, &seq) != 0) {
2261                 /* probably a non-tcp ACK packet */
2262                 return;
2263         }
2264
2265         /* check if we have this guy in our list of connections
2266            to kill
2267         */
2268         con = trbt_lookuparray32(killtcp->connections, 
2269                         KILLTCP_KEYLEN, killtcp_key(&src, &dst));
2270         if (con == NULL) {
2271                 /* no this was some other packet we can just ignore */
2272                 return;
2273         }
2274
2275         /* This one has been tickled !
2276            now reset him and remove him from the list.
2277          */
2278         DEBUG(DEBUG_INFO, ("sending a tcp reset to kill connection :%d -> %s:%d\n",
2279                 ntohs(con->dst_addr.ip.sin_port),
2280                 ctdb_addr_to_str(&con->src_addr),
2281                 ntohs(con->src_addr.ip.sin_port)));
2282
2283         ctdb_sys_send_tcp(&con->dst_addr, &con->src_addr, ack_seq, seq, 1);
2284         talloc_free(con);
2285 }
2286
2287
2288 /* when traversing the list of all tcp connections to send tickle acks to
2289    (so that we can capture the ack coming back and kill the connection
2290     by a RST)
2291    this callback is called for each connection we are currently trying to kill
2292 */
2293 static void tickle_connection_traverse(void *param, void *data)
2294 {
2295         struct ctdb_killtcp_con *con = talloc_get_type(data, struct ctdb_killtcp_con);
2296
2297         /* have tried too many times, just give up */
2298         if (con->count >= 5) {
2299                 /* can't delete in traverse: reparent to delete_cons */
2300                 talloc_steal(param, con);
2301                 return;
2302         }
2303
2304         /* othervise, try tickling it again */
2305         con->count++;
2306         ctdb_sys_send_tcp(
2307                 (ctdb_sock_addr *)&con->dst_addr,
2308                 (ctdb_sock_addr *)&con->src_addr,
2309                 0, 0, 0);
2310 }
2311
2312
2313 /* 
2314    called every second until all sentenced connections have been reset
2315  */
2316 static void ctdb_tickle_sentenced_connections(struct event_context *ev, struct timed_event *te, 
2317                                               struct timeval t, void *private_data)
2318 {
2319         struct ctdb_kill_tcp *killtcp = talloc_get_type(private_data, struct ctdb_kill_tcp);
2320         void *delete_cons = talloc_new(NULL);
2321
2322         /* loop over all connections sending tickle ACKs */
2323         trbt_traversearray32(killtcp->connections, KILLTCP_KEYLEN, tickle_connection_traverse, delete_cons);
2324
2325         /* now we've finished traverse, it's safe to do deletion. */
2326         talloc_free(delete_cons);
2327
2328         /* If there are no more connections to kill we can remove the
2329            entire killtcp structure
2330          */
2331         if ( (killtcp->connections == NULL) || 
2332              (killtcp->connections->root == NULL) ) {
2333                 talloc_free(killtcp);
2334                 return;
2335         }
2336
2337         /* try tickling them again in a seconds time
2338          */
2339         event_add_timed(killtcp->ctdb->ev, killtcp, timeval_current_ofs(1, 0), 
2340                         ctdb_tickle_sentenced_connections, killtcp);
2341 }
2342
2343 /*
2344   destroy the killtcp structure
2345  */
2346 static int ctdb_killtcp_destructor(struct ctdb_kill_tcp *killtcp)
2347 {
2348         if (killtcp->vnn) {
2349                 killtcp->vnn->killtcp = NULL;
2350         }
2351         return 0;
2352 }
2353
2354
2355 /* nothing fancy here, just unconditionally replace any existing
2356    connection structure with the new one.
2357
2358    dont even free the old one if it did exist, that one is talloc_stolen
2359    by the same node in the tree anyway and will be deleted when the new data 
2360    is deleted
2361 */
2362 static void *add_killtcp_callback(void *parm, void *data)
2363 {
2364         return parm;
2365 }
2366
2367 /*
2368   add a tcp socket to the list of connections we want to RST
2369  */
2370 static int ctdb_killtcp_add_connection(struct ctdb_context *ctdb, 
2371                                        ctdb_sock_addr *s,
2372                                        ctdb_sock_addr *d)
2373 {
2374         ctdb_sock_addr src, dst;
2375         struct ctdb_kill_tcp *killtcp;
2376         struct ctdb_killtcp_con *con;
2377         struct ctdb_vnn *vnn;
2378
2379         ctdb_canonicalize_ip(s, &src);
2380         ctdb_canonicalize_ip(d, &dst);
2381
2382         vnn = find_public_ip_vnn(ctdb, &dst);
2383         if (vnn == NULL) {
2384                 vnn = find_public_ip_vnn(ctdb, &src);
2385         }
2386         if (vnn == NULL) {
2387                 /* if it is not a public ip   it could be our 'single ip' */
2388                 if (ctdb->single_ip_vnn) {
2389                         if (ctdb_same_ip(&ctdb->single_ip_vnn->public_address, &dst)) {
2390                                 vnn = ctdb->single_ip_vnn;
2391                         }
2392                 }
2393         }
2394         if (vnn == NULL) {
2395                 DEBUG(DEBUG_ERR,(__location__ " Could not killtcp, not a public address\n")); 
2396                 return -1;
2397         }
2398
2399         killtcp = vnn->killtcp;
2400         
2401         /* If this is the first connection to kill we must allocate
2402            a new structure
2403          */
2404         if (killtcp == NULL) {
2405                 killtcp = talloc_zero(ctdb, struct ctdb_kill_tcp);
2406                 CTDB_NO_MEMORY(ctdb, killtcp);
2407
2408                 killtcp->vnn         = vnn;
2409                 killtcp->ctdb        = ctdb;
2410                 killtcp->capture_fd  = -1;
2411                 killtcp->connections = trbt_create(killtcp, 0);
2412
2413                 vnn->killtcp         = killtcp;
2414                 talloc_set_destructor(killtcp, ctdb_killtcp_destructor);
2415         }
2416
2417
2418
2419         /* create a structure that describes this connection we want to
2420            RST and store it in killtcp->connections
2421         */
2422         con = talloc(killtcp, struct ctdb_killtcp_con);
2423         CTDB_NO_MEMORY(ctdb, con);
2424         con->src_addr = src;
2425         con->dst_addr = dst;
2426         con->count    = 0;
2427         con->killtcp  = killtcp;
2428
2429
2430         trbt_insertarray32_callback(killtcp->connections,
2431                         KILLTCP_KEYLEN, killtcp_key(&con->dst_addr, &con->src_addr),
2432                         add_killtcp_callback, con);
2433
2434         /* 
2435            If we dont have a socket to listen on yet we must create it
2436          */
2437         if (killtcp->capture_fd == -1) {
2438                 const char *iface = ctdb_vnn_iface_string(vnn);
2439                 killtcp->capture_fd = ctdb_sys_open_capture_socket(iface, &killtcp->private_data);
2440                 if (killtcp->capture_fd == -1) {
2441                         DEBUG(DEBUG_CRIT,(__location__ " Failed to open capturing "
2442                                           "socket on iface '%s' for killtcp (%s)\n",
2443                                           iface, strerror(errno)));
2444                         goto failed;
2445                 }
2446         }
2447
2448
2449         if (killtcp->fde == NULL) {
2450                 killtcp->fde = event_add_fd(ctdb->ev, killtcp, killtcp->capture_fd, 
2451                                             EVENT_FD_READ,
2452                                             capture_tcp_handler, killtcp);
2453                 tevent_fd_set_auto_close(killtcp->fde);
2454
2455                 /* We also need to set up some events to tickle all these connections
2456                    until they are all reset
2457                 */
2458                 event_add_timed(ctdb->ev, killtcp, timeval_current_ofs(1, 0), 
2459                                 ctdb_tickle_sentenced_connections, killtcp);
2460         }
2461
2462         /* tickle him once now */
2463         ctdb_sys_send_tcp(
2464                 &con->dst_addr,
2465                 &con->src_addr,
2466                 0, 0, 0);
2467
2468         return 0;
2469
2470 failed:
2471         talloc_free(vnn->killtcp);
2472         vnn->killtcp = NULL;
2473         return -1;
2474 }
2475
2476 /*
2477   kill a TCP connection.
2478  */
2479 int32_t ctdb_control_kill_tcp(struct ctdb_context *ctdb, TDB_DATA indata)
2480 {
2481         struct ctdb_control_killtcp *killtcp = (struct ctdb_control_killtcp *)indata.dptr;
2482
2483         return ctdb_killtcp_add_connection(ctdb, &killtcp->src_addr, &killtcp->dst_addr);
2484 }
2485
2486 /*
2487   called by a daemon to inform us of the entire list of TCP tickles for
2488   a particular public address.
2489   this control should only be sent by the node that is currently serving
2490   that public address.
2491  */
2492 int32_t ctdb_control_set_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata)
2493 {
2494         struct ctdb_control_tcp_tickle_list *list = (struct ctdb_control_tcp_tickle_list *)indata.dptr;
2495         struct ctdb_tcp_array *tcparray;
2496         struct ctdb_vnn *vnn;
2497
2498         /* We must at least have tickles.num or else we cant verify the size
2499            of the received data blob
2500          */
2501         if (indata.dsize < offsetof(struct ctdb_control_tcp_tickle_list, 
2502                                         tickles.connections)) {
2503                 DEBUG(DEBUG_ERR,("Bad indata in ctdb_control_set_tcp_tickle_list. Not enough data for the tickle.num field\n"));
2504                 return -1;
2505         }
2506
2507         /* verify that the size of data matches what we expect */
2508         if (indata.dsize < offsetof(struct ctdb_control_tcp_tickle_list, 
2509                                 tickles.connections)
2510                          + sizeof(struct ctdb_tcp_connection)
2511                                  * list->tickles.num) {
2512                 DEBUG(DEBUG_ERR,("Bad indata in ctdb_control_set_tcp_tickle_list\n"));
2513                 return -1;
2514         }       
2515
2516         vnn = find_public_ip_vnn(ctdb, &list->addr);
2517         if (vnn == NULL) {
2518                 DEBUG(DEBUG_INFO,(__location__ " Could not set tcp tickle list, '%s' is not a public address\n", 
2519                         ctdb_addr_to_str(&list->addr)));
2520
2521                 return 1;
2522         }
2523
2524         /* remove any old ticklelist we might have */
2525         talloc_free(vnn->tcp_array);
2526         vnn->tcp_array = NULL;
2527
2528         tcparray = talloc(ctdb->nodes, struct ctdb_tcp_array);
2529         CTDB_NO_MEMORY(ctdb, tcparray);
2530
2531         tcparray->num = list->tickles.num;
2532
2533         tcparray->connections = talloc_array(tcparray, struct ctdb_tcp_connection, tcparray->num);
2534         CTDB_NO_MEMORY(ctdb, tcparray->connections);
2535
2536         memcpy(tcparray->connections, &list->tickles.connections[0], 
2537                sizeof(struct ctdb_tcp_connection)*tcparray->num);
2538
2539         /* We now have a new fresh tickle list array for this vnn */
2540         vnn->tcp_array = talloc_steal(vnn, tcparray);
2541         
2542         return 0;
2543 }
2544
2545 /*
2546   called to return the full list of tickles for the puclic address associated 
2547   with the provided vnn
2548  */
2549 int32_t ctdb_control_get_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata)
2550 {
2551         ctdb_sock_addr *addr = (ctdb_sock_addr *)indata.dptr;
2552         struct ctdb_control_tcp_tickle_list *list;
2553         struct ctdb_tcp_array *tcparray;
2554         int num;
2555         struct ctdb_vnn *vnn;
2556
2557         vnn = find_public_ip_vnn(ctdb, addr);
2558         if (vnn == NULL) {
2559                 DEBUG(DEBUG_ERR,(__location__ " Could not get tcp tickle list, '%s' is not a public address\n", 
2560                         ctdb_addr_to_str(addr)));
2561
2562                 return 1;
2563         }
2564
2565         tcparray = vnn->tcp_array;
2566         if (tcparray) {
2567                 num = tcparray->num;
2568         } else {
2569                 num = 0;
2570         }
2571
2572         outdata->dsize = offsetof(struct ctdb_control_tcp_tickle_list, 
2573                                 tickles.connections)
2574                         + sizeof(struct ctdb_tcp_connection) * num;
2575
2576         outdata->dptr  = talloc_size(outdata, outdata->dsize);
2577         CTDB_NO_MEMORY(ctdb, outdata->dptr);
2578         list = (struct ctdb_control_tcp_tickle_list *)outdata->dptr;
2579
2580         list->addr = *addr;
2581         list->tickles.num = num;
2582         if (num) {
2583                 memcpy(&list->tickles.connections[0], tcparray->connections, 
2584                         sizeof(struct ctdb_tcp_connection) * num);
2585         }
2586
2587         return 0;
2588 }
2589
2590
2591 /*
2592   set the list of all tcp tickles for a public address
2593  */
2594 static int ctdb_ctrl_set_tcp_tickles(struct ctdb_context *ctdb, 
2595                               struct timeval timeout, uint32_t destnode, 
2596                               ctdb_sock_addr *addr,
2597                               struct ctdb_tcp_array *tcparray)
2598 {
2599         int ret, num;
2600         TDB_DATA data;
2601         struct ctdb_control_tcp_tickle_list *list;
2602
2603         if (tcparray) {
2604                 num = tcparray->num;
2605         } else {
2606                 num = 0;
2607         }
2608
2609         data.dsize = offsetof(struct ctdb_control_tcp_tickle_list, 
2610                                 tickles.connections) +
2611                         sizeof(struct ctdb_tcp_connection) * num;
2612         data.dptr = talloc_size(ctdb, data.dsize);
2613         CTDB_NO_MEMORY(ctdb, data.dptr);
2614
2615         list = (struct ctdb_control_tcp_tickle_list *)data.dptr;
2616         list->addr = *addr;
2617         list->tickles.num = num;
2618         if (tcparray) {
2619                 memcpy(&list->tickles.connections[0], tcparray->connections, sizeof(struct ctdb_tcp_connection) * num);
2620         }
2621
2622         ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0, 
2623                                        CTDB_CONTROL_SET_TCP_TICKLE_LIST,
2624                                        0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
2625         if (ret != 0) {
2626                 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set tcp tickles failed\n"));
2627                 return -1;
2628         }
2629
2630         talloc_free(data.dptr);
2631
2632         return ret;
2633 }
2634
2635
2636 /*
2637   perform tickle updates if required
2638  */
2639 static void ctdb_update_tcp_tickles(struct event_context *ev, 
2640                                 struct timed_event *te, 
2641                                 struct timeval t, void *private_data)
2642 {
2643         struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
2644         int ret;
2645         struct ctdb_vnn *vnn;
2646
2647         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2648                 /* we only send out updates for public addresses that 
2649                    we have taken over
2650                  */
2651                 if (ctdb->pnn != vnn->pnn) {
2652                         continue;
2653                 }
2654                 /* We only send out the updates if we need to */
2655                 if (!vnn->tcp_update_needed) {
2656                         continue;
2657                 }
2658                 ret = ctdb_ctrl_set_tcp_tickles(ctdb, 
2659                                 TAKEOVER_TIMEOUT(),
2660                                 CTDB_BROADCAST_CONNECTED,
2661                                 &vnn->public_address,
2662                                 vnn->tcp_array);
2663                 if (ret != 0) {
2664                         DEBUG(DEBUG_ERR,("Failed to send the tickle update for public address %s\n",
2665                                 ctdb_addr_to_str(&vnn->public_address)));
2666                 }
2667         }
2668
2669         event_add_timed(ctdb->ev, ctdb->tickle_update_context,
2670                              timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0), 
2671                              ctdb_update_tcp_tickles, ctdb);
2672 }               
2673         
2674
2675 /*
2676   start periodic update of tcp tickles
2677  */
2678 void ctdb_start_tcp_tickle_update(struct ctdb_context *ctdb)
2679 {
2680         ctdb->tickle_update_context = talloc_new(ctdb);
2681
2682         event_add_timed(ctdb->ev, ctdb->tickle_update_context,
2683                              timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0), 
2684                              ctdb_update_tcp_tickles, ctdb);
2685 }
2686
2687
2688
2689
2690 struct control_gratious_arp {
2691         struct ctdb_context *ctdb;
2692         ctdb_sock_addr addr;
2693         const char *iface;
2694         int count;
2695 };
2696
2697 /*
2698   send a control_gratuitous arp
2699  */
2700 static void send_gratious_arp(struct event_context *ev, struct timed_event *te, 
2701                                   struct timeval t, void *private_data)
2702 {
2703         int ret;
2704         struct control_gratious_arp *arp = talloc_get_type(private_data, 
2705                                                         struct control_gratious_arp);
2706
2707         ret = ctdb_sys_send_arp(&arp->addr, arp->iface);
2708         if (ret != 0) {
2709                 DEBUG(DEBUG_ERR,(__location__ " sending of gratious arp on iface '%s' failed (%s)\n",
2710                                  arp->iface, strerror(errno)));
2711         }
2712
2713
2714         arp->count++;
2715         if (arp->count == CTDB_ARP_REPEAT) {
2716                 talloc_free(arp);
2717                 return;
2718         }
2719
2720         event_add_timed(arp->ctdb->ev, arp, 
2721                         timeval_current_ofs(CTDB_ARP_INTERVAL, 0), 
2722                         send_gratious_arp, arp);
2723 }
2724
2725
2726 /*
2727   send a gratious arp 
2728  */
2729 int32_t ctdb_control_send_gratious_arp(struct ctdb_context *ctdb, TDB_DATA indata)
2730 {
2731         struct ctdb_control_gratious_arp *gratious_arp = (struct ctdb_control_gratious_arp *)indata.dptr;
2732         struct control_gratious_arp *arp;
2733
2734         /* verify the size of indata */
2735         if (indata.dsize < offsetof(struct ctdb_control_gratious_arp, iface)) {
2736                 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_gratious_arp structure. Got %u require %u bytes\n", 
2737                                  (unsigned)indata.dsize, 
2738                                  (unsigned)offsetof(struct ctdb_control_gratious_arp, iface)));
2739                 return -1;
2740         }
2741         if (indata.dsize != 
2742                 ( offsetof(struct ctdb_control_gratious_arp, iface)
2743                 + gratious_arp->len ) ){
2744
2745                 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2746                         "but should be %u bytes\n", 
2747                          (unsigned)indata.dsize, 
2748                          (unsigned)(offsetof(struct ctdb_control_gratious_arp, iface)+gratious_arp->len)));
2749                 return -1;
2750         }
2751
2752
2753         arp = talloc(ctdb, struct control_gratious_arp);
2754         CTDB_NO_MEMORY(ctdb, arp);
2755
2756         arp->ctdb  = ctdb;
2757         arp->addr   = gratious_arp->addr;
2758         arp->iface = talloc_strdup(arp, gratious_arp->iface);
2759         CTDB_NO_MEMORY(ctdb, arp->iface);
2760         arp->count = 0;
2761         
2762         event_add_timed(arp->ctdb->ev, arp, 
2763                         timeval_zero(), send_gratious_arp, arp);
2764
2765         return 0;
2766 }
2767
2768 int32_t ctdb_control_add_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2769 {
2770         struct ctdb_control_ip_iface *pub = (struct ctdb_control_ip_iface *)indata.dptr;
2771         int ret;
2772
2773         /* verify the size of indata */
2774         if (indata.dsize < offsetof(struct ctdb_control_ip_iface, iface)) {
2775                 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_ip_iface structure\n"));
2776                 return -1;
2777         }
2778         if (indata.dsize != 
2779                 ( offsetof(struct ctdb_control_ip_iface, iface)
2780                 + pub->len ) ){
2781
2782                 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2783                         "but should be %u bytes\n", 
2784                          (unsigned)indata.dsize, 
2785                          (unsigned)(offsetof(struct ctdb_control_ip_iface, iface)+pub->len)));
2786                 return -1;
2787         }
2788
2789         ret = ctdb_add_public_address(ctdb, &pub->addr, pub->mask, &pub->iface[0]);
2790
2791         if (ret != 0) {
2792                 DEBUG(DEBUG_ERR,(__location__ " Failed to add public address\n"));
2793                 return -1;
2794         }
2795
2796         return 0;
2797 }
2798
2799 /*
2800   called when releaseip event finishes for del_public_address
2801  */
2802 static void delete_ip_callback(struct ctdb_context *ctdb, int status, 
2803                                 void *private_data)
2804 {
2805         talloc_free(private_data);
2806 }
2807
2808 int32_t ctdb_control_del_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2809 {
2810         struct ctdb_control_ip_iface *pub = (struct ctdb_control_ip_iface *)indata.dptr;
2811         struct ctdb_vnn *vnn;
2812         int ret;
2813
2814         /* verify the size of indata */
2815         if (indata.dsize < offsetof(struct ctdb_control_ip_iface, iface)) {
2816                 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_ip_iface structure\n"));
2817                 return -1;
2818         }
2819         if (indata.dsize != 
2820                 ( offsetof(struct ctdb_control_ip_iface, iface)
2821                 + pub->len ) ){
2822
2823                 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2824                         "but should be %u bytes\n", 
2825                          (unsigned)indata.dsize, 
2826                          (unsigned)(offsetof(struct ctdb_control_ip_iface, iface)+pub->len)));
2827                 return -1;
2828         }
2829
2830         /* walk over all public addresses until we find a match */
2831         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2832                 if (ctdb_same_ip(&vnn->public_address, &pub->addr)) {
2833                         TALLOC_CTX *mem_ctx;
2834
2835                         DLIST_REMOVE(ctdb->vnn, vnn);
2836                         if (vnn->iface == NULL) {
2837                                 talloc_free(vnn);
2838                                 return 0;
2839                         }
2840
2841                         mem_ctx = talloc_new(ctdb);
2842                         ret = ctdb_event_script_callback(ctdb, 
2843                                          mem_ctx, delete_ip_callback, mem_ctx,
2844                                          false,
2845                                          CTDB_EVENT_RELEASE_IP,
2846                                          "%s %s %u",
2847                                          ctdb_vnn_iface_string(vnn),
2848                                          ctdb_addr_to_str(&vnn->public_address),
2849                                          vnn->public_netmask_bits);
2850                         ctdb_vnn_unassign_iface(ctdb, vnn);
2851                         talloc_free(vnn);
2852                         if (ret != 0) {
2853                                 return -1;
2854                         }
2855                         return 0;
2856                 }
2857         }
2858
2859         return -1;
2860 }
2861
2862 /* This function is called from the recovery daemon to verify that a remote
2863    node has the expected ip allocation.
2864    This is verified against ctdb->ip_tree
2865 */
2866 int verify_remote_ip_allocation(struct ctdb_context *ctdb, struct ctdb_all_public_ips *ips)
2867 {
2868         struct ctdb_public_ip_list *tmp_ip; 
2869         int i;
2870
2871         if (ctdb->ip_tree == NULL) {
2872                 /* dont know the expected allocation yet, assume remote node
2873                    is correct. */
2874                 return 0;
2875         }
2876
2877         if (ips == NULL) {
2878                 return 0;
2879         }
2880
2881         for (i=0; i<ips->num; i++) {
2882                 tmp_ip = trbt_lookuparray32(ctdb->ip_tree, IP_KEYLEN, ip_key(&ips->ips[i].addr));
2883                 if (tmp_ip == NULL) {
2884                         DEBUG(DEBUG_ERR,(__location__ " Could not find host for address %s, reassign ips\n", ctdb_addr_to_str(&ips->ips[i].addr)));
2885                         return -1;
2886                 }
2887
2888                 if (tmp_ip->pnn == -1 || ips->ips[i].pnn == -1) {
2889                         continue;
2890                 }
2891
2892                 if (tmp_ip->pnn != ips->ips[i].pnn) {
2893                         DEBUG(DEBUG_ERR,("Inconsistent ip allocation. Trigger reallocation. Thinks %s is held by node %u while it is held by node %u\n", ctdb_addr_to_str(&ips->ips[i].addr), ips->ips[i].pnn, tmp_ip->pnn));
2894                         return -1;
2895                 }
2896         }
2897
2898         return 0;
2899 }
2900
2901 int update_ip_assignment_tree(struct ctdb_context *ctdb, struct ctdb_public_ip *ip)
2902 {
2903         struct ctdb_public_ip_list *tmp_ip; 
2904
2905         if (ctdb->ip_tree == NULL) {
2906                 DEBUG(DEBUG_ERR,("No ctdb->ip_tree yet. Failed to update ip assignment\n"));
2907                 return -1;
2908         }
2909
2910         tmp_ip = trbt_lookuparray32(ctdb->ip_tree, IP_KEYLEN, ip_key(&ip->addr));
2911         if (tmp_ip == NULL) {
2912                 DEBUG(DEBUG_ERR,(__location__ " Could not find record for address %s, update ip\n", ctdb_addr_to_str(&ip->addr)));
2913                 return -1;
2914         }
2915
2916         DEBUG(DEBUG_NOTICE,("Updated ip assignment tree for ip : %s from node %u to node %u\n", ctdb_addr_to_str(&ip->addr), tmp_ip->pnn, ip->pnn));
2917         tmp_ip->pnn = ip->pnn;
2918
2919         return 0;
2920 }