The recent change to the recovery daemon to keep track of and
[sahlberg/ctdb.git] / server / ctdb_takeover.c
1 /* 
2    ctdb ip takeover code
3
4    Copyright (C) Ronnie Sahlberg  2007
5    Copyright (C) Andrew Tridgell  2007
6
7    This program is free software; you can redistribute it and/or modify
8    it under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 3 of the License, or
10    (at your option) any later version.
11    
12    This program is distributed in the hope that it will be useful,
13    but WITHOUT ANY WARRANTY; without even the implied warranty of
14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15    GNU General Public License for more details.
16    
17    You should have received a copy of the GNU General Public License
18    along with this program; if not, see <http://www.gnu.org/licenses/>.
19 */
20 #include "includes.h"
21 #include "lib/events/events.h"
22 #include "lib/tdb/include/tdb.h"
23 #include "lib/util/dlinklist.h"
24 #include "system/network.h"
25 #include "system/filesys.h"
26 #include "system/wait.h"
27 #include "../include/ctdb_private.h"
28 #include "../common/rb_tree.h"
29
30
31 #define TAKEOVER_TIMEOUT() timeval_current_ofs(ctdb->tunable.takeover_timeout,0)
32
33 #define CTDB_ARP_INTERVAL 1
34 #define CTDB_ARP_REPEAT   3
35
36 struct ctdb_iface {
37         struct ctdb_iface *prev, *next;
38         const char *name;
39         bool link_up;
40         uint32_t references;
41 };
42
43 static const char *ctdb_vnn_iface_string(const struct ctdb_vnn *vnn)
44 {
45         if (vnn->iface) {
46                 return vnn->iface->name;
47         }
48
49         return "__none__";
50 }
51
52 static int ctdb_add_local_iface(struct ctdb_context *ctdb, const char *iface)
53 {
54         struct ctdb_iface *i;
55
56         /* Verify that we dont have an entry for this ip yet */
57         for (i=ctdb->ifaces;i;i=i->next) {
58                 if (strcmp(i->name, iface) == 0) {
59                         return 0;
60                 }
61         }
62
63         /* create a new structure for this interface */
64         i = talloc_zero(ctdb, struct ctdb_iface);
65         CTDB_NO_MEMORY_FATAL(ctdb, i);
66         i->name = talloc_strdup(i, iface);
67         CTDB_NO_MEMORY(ctdb, i->name);
68         i->link_up = false;
69
70         DLIST_ADD(ctdb->ifaces, i);
71
72         return 0;
73 }
74
75 static struct ctdb_iface *ctdb_find_iface(struct ctdb_context *ctdb,
76                                           const char *iface)
77 {
78         struct ctdb_iface *i;
79
80         /* Verify that we dont have an entry for this ip yet */
81         for (i=ctdb->ifaces;i;i=i->next) {
82                 if (strcmp(i->name, iface) == 0) {
83                         return i;
84                 }
85         }
86
87         return NULL;
88 }
89
90 static struct ctdb_iface *ctdb_vnn_best_iface(struct ctdb_context *ctdb,
91                                               struct ctdb_vnn *vnn)
92 {
93         int i;
94         struct ctdb_iface *cur = NULL;
95         struct ctdb_iface *best = NULL;
96
97         for (i=0; vnn->ifaces[i]; i++) {
98
99                 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
100                 if (cur == NULL) {
101                         continue;
102                 }
103
104                 if (!cur->link_up) {
105                         continue;
106                 }
107
108                 if (best == NULL) {
109                         best = cur;
110                         continue;
111                 }
112
113                 if (cur->references < best->references) {
114                         best = cur;
115                         continue;
116                 }
117         }
118
119         return best;
120 }
121
122 static int32_t ctdb_vnn_assign_iface(struct ctdb_context *ctdb,
123                                      struct ctdb_vnn *vnn)
124 {
125         struct ctdb_iface *best = NULL;
126
127         if (vnn->iface) {
128                 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
129                                    "still assigned to iface '%s'\n",
130                                    ctdb_addr_to_str(&vnn->public_address),
131                                    ctdb_vnn_iface_string(vnn)));
132                 return 0;
133         }
134
135         best = ctdb_vnn_best_iface(ctdb, vnn);
136         if (best == NULL) {
137                 DEBUG(DEBUG_ERR, (__location__ " public address '%s' "
138                                   "cannot assign to iface any iface\n",
139                                   ctdb_addr_to_str(&vnn->public_address)));
140                 return -1;
141         }
142
143         vnn->iface = best;
144         best->references++;
145         vnn->pnn = ctdb->pnn;
146
147         DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
148                            "now assigned to iface '%s' refs[%d]\n",
149                            ctdb_addr_to_str(&vnn->public_address),
150                            ctdb_vnn_iface_string(vnn),
151                            best->references));
152         return 0;
153 }
154
155 static void ctdb_vnn_unassign_iface(struct ctdb_context *ctdb,
156                                     struct ctdb_vnn *vnn)
157 {
158         DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
159                            "now unassigned (old iface '%s' refs[%d])\n",
160                            ctdb_addr_to_str(&vnn->public_address),
161                            ctdb_vnn_iface_string(vnn),
162                            vnn->iface?vnn->iface->references:0));
163         if (vnn->iface) {
164                 vnn->iface->references--;
165         }
166         vnn->iface = NULL;
167         if (vnn->pnn == ctdb->pnn) {
168                 vnn->pnn = -1;
169         }
170 }
171
172 static bool ctdb_vnn_available(struct ctdb_context *ctdb,
173                                struct ctdb_vnn *vnn)
174 {
175         int i;
176
177         if (vnn->iface && vnn->iface->link_up) {
178                 return true;
179         }
180
181         for (i=0; vnn->ifaces[i]; i++) {
182                 struct ctdb_iface *cur;
183
184                 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
185                 if (cur == NULL) {
186                         continue;
187                 }
188
189                 if (cur->link_up) {
190                         return true;
191                 }
192         }
193
194         return false;
195 }
196
197 struct ctdb_takeover_arp {
198         struct ctdb_context *ctdb;
199         uint32_t count;
200         ctdb_sock_addr addr;
201         struct ctdb_tcp_array *tcparray;
202         struct ctdb_vnn *vnn;
203 };
204
205
206 /*
207   lists of tcp endpoints
208  */
209 struct ctdb_tcp_list {
210         struct ctdb_tcp_list *prev, *next;
211         struct ctdb_tcp_connection connection;
212 };
213
214 /*
215   list of clients to kill on IP release
216  */
217 struct ctdb_client_ip {
218         struct ctdb_client_ip *prev, *next;
219         struct ctdb_context *ctdb;
220         ctdb_sock_addr addr;
221         uint32_t client_id;
222 };
223
224
225 /*
226   send a gratuitous arp
227  */
228 static void ctdb_control_send_arp(struct event_context *ev, struct timed_event *te, 
229                                   struct timeval t, void *private_data)
230 {
231         struct ctdb_takeover_arp *arp = talloc_get_type(private_data, 
232                                                         struct ctdb_takeover_arp);
233         int i, ret;
234         struct ctdb_tcp_array *tcparray;
235         const char *iface = ctdb_vnn_iface_string(arp->vnn);
236
237         ret = ctdb_sys_send_arp(&arp->addr, iface);
238         if (ret != 0) {
239                 DEBUG(DEBUG_CRIT,(__location__ " sending of arp failed on iface '%s' (%s)\n",
240                                   iface, strerror(errno)));
241         }
242
243         tcparray = arp->tcparray;
244         if (tcparray) {
245                 for (i=0;i<tcparray->num;i++) {
246                         struct ctdb_tcp_connection *tcon;
247
248                         tcon = &tcparray->connections[i];
249                         DEBUG(DEBUG_INFO,("sending tcp tickle ack for %u->%s:%u\n",
250                                 (unsigned)ntohs(tcon->dst_addr.ip.sin_port), 
251                                 ctdb_addr_to_str(&tcon->src_addr),
252                                 (unsigned)ntohs(tcon->src_addr.ip.sin_port)));
253                         ret = ctdb_sys_send_tcp(
254                                 &tcon->src_addr, 
255                                 &tcon->dst_addr,
256                                 0, 0, 0);
257                         if (ret != 0) {
258                                 DEBUG(DEBUG_CRIT,(__location__ " Failed to send tcp tickle ack for %s\n",
259                                         ctdb_addr_to_str(&tcon->src_addr)));
260                         }
261                 }
262         }
263
264         arp->count++;
265
266         if (arp->count == CTDB_ARP_REPEAT) {
267                 talloc_free(arp);
268                 return;
269         }
270
271         event_add_timed(arp->ctdb->ev, arp->vnn->takeover_ctx, 
272                         timeval_current_ofs(CTDB_ARP_INTERVAL, 100000), 
273                         ctdb_control_send_arp, arp);
274 }
275
276 static int32_t ctdb_announce_vnn_iface(struct ctdb_context *ctdb,
277                                        struct ctdb_vnn *vnn)
278 {
279         struct ctdb_takeover_arp *arp;
280         struct ctdb_tcp_array *tcparray;
281
282         if (!vnn->takeover_ctx) {
283                 vnn->takeover_ctx = talloc_new(vnn);
284                 if (!vnn->takeover_ctx) {
285                         return -1;
286                 }
287         }
288
289         arp = talloc_zero(vnn->takeover_ctx, struct ctdb_takeover_arp);
290         if (!arp) {
291                 return -1;
292         }
293
294         arp->ctdb = ctdb;
295         arp->addr = vnn->public_address;
296         arp->vnn  = vnn;
297
298         tcparray = vnn->tcp_array;
299         if (tcparray) {
300                 /* add all of the known tcp connections for this IP to the
301                    list of tcp connections to send tickle acks for */
302                 arp->tcparray = talloc_steal(arp, tcparray);
303
304                 vnn->tcp_array = NULL;
305                 vnn->tcp_update_needed = true;
306         }
307
308         event_add_timed(arp->ctdb->ev, vnn->takeover_ctx,
309                         timeval_zero(), ctdb_control_send_arp, arp);
310
311         return 0;
312 }
313
314 struct takeover_callback_state {
315         struct ctdb_req_control *c;
316         ctdb_sock_addr *addr;
317         struct ctdb_vnn *vnn;
318 };
319
320 struct ctdb_do_takeip_state {
321         struct ctdb_req_control *c;
322         struct ctdb_vnn *vnn;
323 };
324
325 /*
326   called when takeip event finishes
327  */
328 static void ctdb_do_takeip_callback(struct ctdb_context *ctdb, int status,
329                                     void *private_data)
330 {
331         struct ctdb_do_takeip_state *state =
332                 talloc_get_type(private_data, struct ctdb_do_takeip_state);
333         int32_t ret;
334
335         if (status != 0) {
336                 if (status == -ETIME) {
337                         ctdb_ban_self(ctdb);
338                 }
339                 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
340                                  ctdb_addr_to_str(&state->vnn->public_address),
341                                  ctdb_vnn_iface_string(state->vnn)));
342                 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
343                 talloc_free(state);
344                 return;
345         }
346
347         ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
348         if (ret != 0) {
349                 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
350                 talloc_free(state);
351                 return;
352         }
353
354         /* the control succeeded */
355         ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
356         talloc_free(state);
357         return;
358 }
359
360 /*
361   take over an ip address
362  */
363 static int32_t ctdb_do_takeip(struct ctdb_context *ctdb,
364                               struct ctdb_req_control *c,
365                               struct ctdb_vnn *vnn)
366 {
367         int ret;
368         struct ctdb_do_takeip_state *state;
369
370         ret = ctdb_vnn_assign_iface(ctdb, vnn);
371         if (ret != 0) {
372                 DEBUG(DEBUG_ERR,("Takeover of IP %s/%u failed to "
373                                  "assin a usable interface\n",
374                                  ctdb_addr_to_str(&vnn->public_address),
375                                  vnn->public_netmask_bits));
376                 return -1;
377         }
378
379         state = talloc(vnn, struct ctdb_do_takeip_state);
380         CTDB_NO_MEMORY(ctdb, state);
381
382         state->c = talloc_steal(ctdb, c);
383         state->vnn   = vnn;
384
385         DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u on interface %s\n",
386                             ctdb_addr_to_str(&vnn->public_address),
387                             vnn->public_netmask_bits,
388                             ctdb_vnn_iface_string(vnn)));
389
390         ret = ctdb_event_script_callback(ctdb,
391                                          state,
392                                          ctdb_do_takeip_callback,
393                                          state,
394                                          false,
395                                          CTDB_EVENT_TAKE_IP,
396                                          "%s %s %u",
397                                          ctdb_vnn_iface_string(vnn),
398                                          ctdb_addr_to_str(&vnn->public_address),
399                                          vnn->public_netmask_bits);
400
401         if (ret != 0) {
402                 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
403                         ctdb_addr_to_str(&vnn->public_address),
404                         ctdb_vnn_iface_string(vnn)));
405                 talloc_free(state);
406                 return -1;
407         }
408
409         return 0;
410 }
411
412 struct ctdb_do_updateip_state {
413         struct ctdb_req_control *c;
414         struct ctdb_iface *old;
415         struct ctdb_vnn *vnn;
416 };
417
418 /*
419   called when updateip event finishes
420  */
421 static void ctdb_do_updateip_callback(struct ctdb_context *ctdb, int status,
422                                       void *private_data)
423 {
424         struct ctdb_do_updateip_state *state =
425                 talloc_get_type(private_data, struct ctdb_do_updateip_state);
426         int32_t ret;
427
428         if (status != 0) {
429                 if (status == -ETIME) {
430                         ctdb_ban_self(ctdb);
431                 }
432                 DEBUG(DEBUG_ERR,(__location__ " Failed to move IP %s from interface %s to %s\n",
433                         ctdb_addr_to_str(&state->vnn->public_address),
434                         state->old->name,
435                         ctdb_vnn_iface_string(state->vnn)));
436
437                 /*
438                  * All we can do is reset the old interface
439                  * and let the next run fix it
440                  */
441                 ctdb_vnn_unassign_iface(ctdb, state->vnn);
442                 state->vnn->iface = state->old;
443                 state->vnn->iface->references++;
444
445                 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
446                 talloc_free(state);
447                 return;
448         }
449
450         ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
451         if (ret != 0) {
452                 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
453                 talloc_free(state);
454                 return;
455         }
456
457         /* the control succeeded */
458         ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
459         talloc_free(state);
460         return;
461 }
462
463 /*
464   update (move) an ip address
465  */
466 static int32_t ctdb_do_updateip(struct ctdb_context *ctdb,
467                                 struct ctdb_req_control *c,
468                                 struct ctdb_vnn *vnn)
469 {
470         int ret;
471         struct ctdb_do_updateip_state *state;
472         struct ctdb_iface *old = vnn->iface;
473
474         ctdb_vnn_unassign_iface(ctdb, vnn);
475         ret = ctdb_vnn_assign_iface(ctdb, vnn);
476         if (ret != 0) {
477                 DEBUG(DEBUG_ERR,("update of IP %s/%u failed to "
478                                  "assin a usable interface (old iface '%s')\n",
479                                  ctdb_addr_to_str(&vnn->public_address),
480                                  vnn->public_netmask_bits,
481                                  old->name));
482                 return -1;
483         }
484
485         if (vnn->iface == old) {
486                 DEBUG(DEBUG_ERR,("update of IP %s/%u trying to "
487                                  "assin a same interface '%s'\n",
488                                  ctdb_addr_to_str(&vnn->public_address),
489                                  vnn->public_netmask_bits,
490                                  old->name));
491                 return -1;
492         }
493
494         state = talloc(vnn, struct ctdb_do_updateip_state);
495         CTDB_NO_MEMORY(ctdb, state);
496
497         state->c = talloc_steal(ctdb, c);
498         state->old = old;
499         state->vnn = vnn;
500
501         DEBUG(DEBUG_NOTICE,("Update of IP %s/%u from "
502                             "interface %s to %s\n",
503                             ctdb_addr_to_str(&vnn->public_address),
504                             vnn->public_netmask_bits,
505                             old->name,
506                             ctdb_vnn_iface_string(vnn)));
507
508         ret = ctdb_event_script_callback(ctdb,
509                                          state,
510                                          ctdb_do_updateip_callback,
511                                          state,
512                                          false,
513                                          CTDB_EVENT_UPDATE_IP,
514                                          "%s %s %s %u",
515                                          state->old->name,
516                                          ctdb_vnn_iface_string(vnn),
517                                          ctdb_addr_to_str(&vnn->public_address),
518                                          vnn->public_netmask_bits);
519         if (ret != 0) {
520                 DEBUG(DEBUG_ERR,(__location__ " Failed update IP %s from interface %s to %s\n",
521                                  ctdb_addr_to_str(&vnn->public_address),
522                                  old->name, ctdb_vnn_iface_string(vnn)));
523                 talloc_free(state);
524                 return -1;
525         }
526
527         return 0;
528 }
529
530 /*
531   Find the vnn of the node that has a public ip address
532   returns -1 if the address is not known as a public address
533  */
534 static struct ctdb_vnn *find_public_ip_vnn(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
535 {
536         struct ctdb_vnn *vnn;
537
538         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
539                 if (ctdb_same_ip(&vnn->public_address, addr)) {
540                         return vnn;
541                 }
542         }
543
544         return NULL;
545 }
546
547 /*
548   take over an ip address
549  */
550 int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb,
551                                  struct ctdb_req_control *c,
552                                  TDB_DATA indata,
553                                  bool *async_reply)
554 {
555         int ret;
556         struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
557         struct ctdb_vnn *vnn;
558         bool have_ip = false;
559         bool do_updateip = false;
560         bool do_takeip = false;
561         struct ctdb_iface *best_iface = NULL;
562
563         if (pip->pnn != ctdb->pnn) {
564                 DEBUG(DEBUG_ERR,(__location__" takeoverip called for an ip '%s' "
565                                  "with pnn %d, but we're node %d\n",
566                                  ctdb_addr_to_str(&pip->addr),
567                                  pip->pnn, ctdb->pnn));
568                 return -1;
569         }
570
571         /* update out vnn list */
572         vnn = find_public_ip_vnn(ctdb, &pip->addr);
573         if (vnn == NULL) {
574                 DEBUG(DEBUG_INFO,("takeoverip called for an ip '%s' that is not a public address\n",
575                         ctdb_addr_to_str(&pip->addr)));
576                 return 0;
577         }
578
579         have_ip = ctdb_sys_have_ip(&pip->addr);
580         best_iface = ctdb_vnn_best_iface(ctdb, vnn);
581         if (best_iface == NULL) {
582                 DEBUG(DEBUG_ERR,("takeoverip of IP %s/%u failed to find"
583                                  "a usable interface (old %s, have_ip %d)\n",
584                                  ctdb_addr_to_str(&vnn->public_address),
585                                  vnn->public_netmask_bits,
586                                  ctdb_vnn_iface_string(vnn),
587                                  have_ip));
588                 return -1;
589         }
590
591         if (vnn->iface == NULL && have_ip) {
592                 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
593                                   "but we have no interface assigned, has someone manually configured it?"
594                                   "banning ourself\n",
595                                  ctdb_addr_to_str(&vnn->public_address)));
596                 ctdb_ban_self(ctdb);
597                 return -1;
598         }
599
600         if (vnn->pnn != ctdb->pnn && have_ip) {
601                 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
602                                   "and we have it on iface[%s], but it was assigned to node %d"
603                                   "and we are node %d, banning ourself\n",
604                                  ctdb_addr_to_str(&vnn->public_address),
605                                  ctdb_vnn_iface_string(vnn), vnn->pnn, ctdb->pnn));
606                 ctdb_ban_self(ctdb);
607                 return -1;
608         }
609
610         if (vnn->iface) {
611                 if (vnn->iface->link_up) {
612                         /* only move when the rebalance gains something */
613                         if (vnn->iface->references > (best_iface->references + 1)) {
614                                 do_updateip = true;
615                         }
616                 } else if (vnn->iface != best_iface) {
617                         do_updateip = true;
618                 }
619         }
620
621         if (!have_ip) {
622                 if (do_updateip) {
623                         ctdb_vnn_unassign_iface(ctdb, vnn);
624                         do_updateip = false;
625                 }
626                 do_takeip = true;
627         }
628
629         if (do_takeip) {
630                 ret = ctdb_do_takeip(ctdb, c, vnn);
631                 if (ret != 0) {
632                         return -1;
633                 }
634         } else if (do_updateip) {
635                 ret = ctdb_do_updateip(ctdb, c, vnn);
636                 if (ret != 0) {
637                         return -1;
638                 }
639         } else {
640                 /*
641                  * The interface is up and the kernel known the ip
642                  * => do nothing
643                  */
644                 DEBUG(DEBUG_INFO,("Redundant takeover of IP %s/%u on interface %s (ip already held)\n",
645                         ctdb_addr_to_str(&pip->addr),
646                         vnn->public_netmask_bits,
647                         ctdb_vnn_iface_string(vnn)));
648                 return 0;
649         }
650
651         /* tell ctdb_control.c that we will be replying asynchronously */
652         *async_reply = true;
653
654         return 0;
655 }
656
657 /*
658   takeover an ip address old v4 style
659  */
660 int32_t ctdb_control_takeover_ipv4(struct ctdb_context *ctdb, 
661                                 struct ctdb_req_control *c,
662                                 TDB_DATA indata, 
663                                 bool *async_reply)
664 {
665         TDB_DATA data;
666         
667         data.dsize = sizeof(struct ctdb_public_ip);
668         data.dptr  = (uint8_t *)talloc_zero(c, struct ctdb_public_ip);
669         CTDB_NO_MEMORY(ctdb, data.dptr);
670         
671         memcpy(data.dptr, indata.dptr, indata.dsize);
672         return ctdb_control_takeover_ip(ctdb, c, data, async_reply);
673 }
674
675 /*
676   kill any clients that are registered with a IP that is being released
677  */
678 static void release_kill_clients(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
679 {
680         struct ctdb_client_ip *ip;
681
682         DEBUG(DEBUG_INFO,("release_kill_clients for ip %s\n",
683                 ctdb_addr_to_str(addr)));
684
685         for (ip=ctdb->client_ip_list; ip; ip=ip->next) {
686                 ctdb_sock_addr tmp_addr;
687
688                 tmp_addr = ip->addr;
689                 DEBUG(DEBUG_INFO,("checking for client %u with IP %s\n", 
690                         ip->client_id,
691                         ctdb_addr_to_str(&ip->addr)));
692
693                 if (ctdb_same_ip(&tmp_addr, addr)) {
694                         struct ctdb_client *client = ctdb_reqid_find(ctdb, 
695                                                                      ip->client_id, 
696                                                                      struct ctdb_client);
697                         DEBUG(DEBUG_INFO,("matched client %u with IP %s and pid %u\n", 
698                                 ip->client_id,
699                                 ctdb_addr_to_str(&ip->addr),
700                                 client->pid));
701
702                         if (client->pid != 0) {
703                                 DEBUG(DEBUG_INFO,(__location__ " Killing client pid %u for IP %s on client_id %u\n",
704                                         (unsigned)client->pid,
705                                         ctdb_addr_to_str(addr),
706                                         ip->client_id));
707                                 kill(client->pid, SIGKILL);
708                         }
709                 }
710         }
711 }
712
713 /*
714   called when releaseip event finishes
715  */
716 static void release_ip_callback(struct ctdb_context *ctdb, int status, 
717                                 void *private_data)
718 {
719         struct takeover_callback_state *state = 
720                 talloc_get_type(private_data, struct takeover_callback_state);
721         TDB_DATA data;
722
723         if (status == -ETIME) {
724                 ctdb_ban_self(ctdb);
725         }
726
727         /* send a message to all clients of this node telling them
728            that the cluster has been reconfigured and they should
729            release any sockets on this IP */
730         data.dptr = (uint8_t *)talloc_strdup(state, ctdb_addr_to_str(state->addr));
731         CTDB_NO_MEMORY_VOID(ctdb, data.dptr);
732         data.dsize = strlen((char *)data.dptr)+1;
733
734         DEBUG(DEBUG_INFO,(__location__ " sending RELEASE_IP for '%s'\n", data.dptr));
735
736         ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_RELEASE_IP, data);
737
738         /* kill clients that have registered with this IP */
739         release_kill_clients(ctdb, state->addr);
740
741         ctdb_vnn_unassign_iface(ctdb, state->vnn);
742
743         /* the control succeeded */
744         ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
745         talloc_free(state);
746 }
747
748 /*
749   release an ip address
750  */
751 int32_t ctdb_control_release_ip(struct ctdb_context *ctdb, 
752                                 struct ctdb_req_control *c,
753                                 TDB_DATA indata, 
754                                 bool *async_reply)
755 {
756         int ret;
757         struct takeover_callback_state *state;
758         struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
759         struct ctdb_vnn *vnn;
760
761         /* update our vnn list */
762         vnn = find_public_ip_vnn(ctdb, &pip->addr);
763         if (vnn == NULL) {
764                 DEBUG(DEBUG_INFO,("releaseip called for an ip '%s' that is not a public address\n",
765                         ctdb_addr_to_str(&pip->addr)));
766                 return 0;
767         }
768         vnn->pnn = pip->pnn;
769
770         /* stop any previous arps */
771         talloc_free(vnn->takeover_ctx);
772         vnn->takeover_ctx = NULL;
773
774         if (!ctdb_sys_have_ip(&pip->addr)) {
775                 DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u on interface %s (ip not held)\n", 
776                         ctdb_addr_to_str(&pip->addr),
777                         vnn->public_netmask_bits, 
778                         ctdb_vnn_iface_string(vnn)));
779                 ctdb_vnn_unassign_iface(ctdb, vnn);
780                 return 0;
781         }
782
783         if (vnn->iface == NULL) {
784                 DEBUG(DEBUG_CRIT,(__location__ " release_ip of IP %s is known to the kernel, "
785                                   "but we have no interface assigned, has someone manually configured it?"
786                                   "banning ourself\n",
787                                  ctdb_addr_to_str(&vnn->public_address)));
788                 ctdb_ban_self(ctdb);
789                 return -1;
790         }
791
792         DEBUG(DEBUG_NOTICE,("Release of IP %s/%u on interface %s  node:%d\n",
793                 ctdb_addr_to_str(&pip->addr),
794                 vnn->public_netmask_bits, 
795                 ctdb_vnn_iface_string(vnn),
796                 pip->pnn));
797
798         state = talloc(ctdb, struct takeover_callback_state);
799         CTDB_NO_MEMORY(ctdb, state);
800
801         state->c = talloc_steal(state, c);
802         state->addr = talloc(state, ctdb_sock_addr);       
803         CTDB_NO_MEMORY(ctdb, state->addr);
804         *state->addr = pip->addr;
805         state->vnn   = vnn;
806
807         ret = ctdb_event_script_callback(ctdb, 
808                                          state, release_ip_callback, state,
809                                          false,
810                                          CTDB_EVENT_RELEASE_IP,
811                                          "%s %s %u",
812                                          ctdb_vnn_iface_string(vnn),
813                                          ctdb_addr_to_str(&pip->addr),
814                                          vnn->public_netmask_bits);
815         if (ret != 0) {
816                 DEBUG(DEBUG_ERR,(__location__ " Failed to release IP %s on interface %s\n",
817                         ctdb_addr_to_str(&pip->addr),
818                         ctdb_vnn_iface_string(vnn)));
819                 talloc_free(state);
820                 return -1;
821         }
822
823         /* tell the control that we will be reply asynchronously */
824         *async_reply = true;
825         return 0;
826 }
827
828 /*
829   release an ip address old v4 style
830  */
831 int32_t ctdb_control_release_ipv4(struct ctdb_context *ctdb, 
832                                 struct ctdb_req_control *c,
833                                 TDB_DATA indata, 
834                                 bool *async_reply)
835 {
836         TDB_DATA data;
837         
838         data.dsize = sizeof(struct ctdb_public_ip);
839         data.dptr  = (uint8_t *)talloc_zero(c, struct ctdb_public_ip);
840         CTDB_NO_MEMORY(ctdb, data.dptr);
841         
842         memcpy(data.dptr, indata.dptr, indata.dsize);
843         return ctdb_control_release_ip(ctdb, c, data, async_reply);
844 }
845
846
847 static int ctdb_add_public_address(struct ctdb_context *ctdb,
848                                    ctdb_sock_addr *addr,
849                                    unsigned mask, const char *ifaces)
850 {
851         struct ctdb_vnn      *vnn;
852         uint32_t num = 0;
853         char *tmp;
854         const char *iface;
855         int i;
856         int ret;
857
858         /* Verify that we dont have an entry for this ip yet */
859         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
860                 if (ctdb_same_sockaddr(addr, &vnn->public_address)) {
861                         DEBUG(DEBUG_CRIT,("Same ip '%s' specified multiple times in the public address list \n", 
862                                 ctdb_addr_to_str(addr)));
863                         return -1;
864                 }               
865         }
866
867         /* create a new vnn structure for this ip address */
868         vnn = talloc_zero(ctdb, struct ctdb_vnn);
869         CTDB_NO_MEMORY_FATAL(ctdb, vnn);
870         vnn->ifaces = talloc_array(vnn, const char *, num + 2);
871         tmp = talloc_strdup(vnn, ifaces);
872         CTDB_NO_MEMORY_FATAL(ctdb, tmp);
873         for (iface = strtok(tmp, ","); iface; iface = strtok(NULL, ",")) {
874                 vnn->ifaces = talloc_realloc(vnn, vnn->ifaces, const char *, num + 2);
875                 CTDB_NO_MEMORY_FATAL(ctdb, vnn->ifaces);
876                 vnn->ifaces[num] = talloc_strdup(vnn, iface);
877                 CTDB_NO_MEMORY_FATAL(ctdb, vnn->ifaces[num]);
878                 num++;
879         }
880         talloc_free(tmp);
881         vnn->ifaces[num] = NULL;
882         vnn->public_address      = *addr;
883         vnn->public_netmask_bits = mask;
884         vnn->pnn                 = -1;
885
886         for (i=0; vnn->ifaces[i]; i++) {
887                 ret = ctdb_add_local_iface(ctdb, vnn->ifaces[i]);
888                 if (ret != 0) {
889                         DEBUG(DEBUG_CRIT, (__location__ " failed to add iface[%s] "
890                                            "for public_address[%s]\n",
891                                            vnn->ifaces[i], ctdb_addr_to_str(addr)));
892                         talloc_free(vnn);
893                         return -1;
894                 }
895         }
896
897         DLIST_ADD(ctdb->vnn, vnn);
898
899         return 0;
900 }
901
902 /*
903   setup the event script directory
904 */
905 int ctdb_set_event_script_dir(struct ctdb_context *ctdb, const char *script_dir)
906 {
907         ctdb->event_script_dir = talloc_strdup(ctdb, script_dir);
908         CTDB_NO_MEMORY(ctdb, ctdb->event_script_dir);
909         return 0;
910 }
911
912 /*
913   setup the public address lists from a file
914 */
915 int ctdb_set_public_addresses(struct ctdb_context *ctdb, const char *alist)
916 {
917         char **lines;
918         int nlines;
919         int i;
920
921         lines = file_lines_load(alist, &nlines, ctdb);
922         if (lines == NULL) {
923                 ctdb_set_error(ctdb, "Failed to load public address list '%s'\n", alist);
924                 return -1;
925         }
926         while (nlines > 0 && strcmp(lines[nlines-1], "") == 0) {
927                 nlines--;
928         }
929
930         for (i=0;i<nlines;i++) {
931                 unsigned mask;
932                 ctdb_sock_addr addr;
933                 const char *addrstr;
934                 const char *ifaces;
935                 char *tok, *line;
936
937                 line = lines[i];
938                 while ((*line == ' ') || (*line == '\t')) {
939                         line++;
940                 }
941                 if (*line == '#') {
942                         continue;
943                 }
944                 if (strcmp(line, "") == 0) {
945                         continue;
946                 }
947                 tok = strtok(line, " \t");
948                 addrstr = tok;
949                 tok = strtok(NULL, " \t");
950                 if (tok == NULL) {
951                         if (NULL == ctdb->default_public_interface) {
952                                 DEBUG(DEBUG_CRIT,("No default public interface and no interface specified at line %u of public address list\n",
953                                          i+1));
954                                 talloc_free(lines);
955                                 return -1;
956                         }
957                         ifaces = ctdb->default_public_interface;
958                 } else {
959                         ifaces = tok;
960                 }
961
962                 if (!addrstr || !parse_ip_mask(addrstr, ifaces, &addr, &mask)) {
963                         DEBUG(DEBUG_CRIT,("Badly formed line %u in public address list\n", i+1));
964                         talloc_free(lines);
965                         return -1;
966                 }
967                 if (ctdb_add_public_address(ctdb, &addr, mask, ifaces)) {
968                         DEBUG(DEBUG_CRIT,("Failed to add line %u to the public address list\n", i+1));
969                         talloc_free(lines);
970                         return -1;
971                 }
972         }
973
974         talloc_free(lines);
975         return 0;
976 }
977
978 int ctdb_set_single_public_ip(struct ctdb_context *ctdb,
979                               const char *iface,
980                               const char *ip)
981 {
982         struct ctdb_vnn *svnn;
983         bool ok;
984         int ret;
985
986         svnn = talloc_zero(ctdb, struct ctdb_vnn);
987         CTDB_NO_MEMORY(ctdb, svnn);
988
989         svnn->ifaces = talloc_array(svnn, const char *, 2);
990         CTDB_NO_MEMORY(ctdb, svnn->ifaces);
991         svnn->ifaces[0] = talloc_strdup(svnn->ifaces, iface);
992         CTDB_NO_MEMORY(ctdb, svnn->ifaces[0]);
993         svnn->ifaces[1] = NULL;
994
995         ok = parse_ip(ip, iface, 0, &svnn->public_address);
996         if (!ok) {
997                 talloc_free(svnn);
998                 return -1;
999         }
1000
1001         ret = ctdb_add_local_iface(ctdb, svnn->ifaces[0]);
1002         if (ret != 0) {
1003                 DEBUG(DEBUG_CRIT, (__location__ " failed to add iface[%s] "
1004                                    "for single_ip[%s]\n",
1005                                    svnn->ifaces[0],
1006                                    ctdb_addr_to_str(&svnn->public_address)));
1007                 talloc_free(svnn);
1008                 return -1;
1009         }
1010
1011         ret = ctdb_vnn_assign_iface(ctdb, svnn);
1012         if (ret != 0) {
1013                 talloc_free(svnn);
1014                 return -1;
1015         }
1016
1017         ctdb->single_ip_vnn = svnn;
1018         return 0;
1019 }
1020
1021 struct ctdb_public_ip_list {
1022         struct ctdb_public_ip_list *next;
1023         uint32_t pnn;
1024         ctdb_sock_addr addr;
1025 };
1026
1027
1028 /* Given a physical node, return the number of
1029    public addresses that is currently assigned to this node.
1030 */
1031 static int node_ip_coverage(struct ctdb_context *ctdb, 
1032         int32_t pnn,
1033         struct ctdb_public_ip_list *ips)
1034 {
1035         int num=0;
1036
1037         for (;ips;ips=ips->next) {
1038                 if (ips->pnn == pnn) {
1039                         num++;
1040                 }
1041         }
1042         return num;
1043 }
1044
1045
1046 /* Check if this is a public ip known to the node, i.e. can that
1047    node takeover this ip ?
1048 */
1049 static int can_node_serve_ip(struct ctdb_context *ctdb, int32_t pnn, 
1050                 struct ctdb_public_ip_list *ip)
1051 {
1052         struct ctdb_all_public_ips *public_ips;
1053         int i;
1054
1055         public_ips = ctdb->nodes[pnn]->available_public_ips;
1056
1057         if (public_ips == NULL) {
1058                 return -1;
1059         }
1060
1061         for (i=0;i<public_ips->num;i++) {
1062                 if (ctdb_same_ip(&ip->addr, &public_ips->ips[i].addr)) {
1063                         /* yes, this node can serve this public ip */
1064                         return 0;
1065                 }
1066         }
1067
1068         return -1;
1069 }
1070
1071
1072 /* search the node lists list for a node to takeover this ip.
1073    pick the node that currently are serving the least number of ips
1074    so that the ips get spread out evenly.
1075 */
1076 static int find_takeover_node(struct ctdb_context *ctdb, 
1077                 struct ctdb_node_map *nodemap, uint32_t mask, 
1078                 struct ctdb_public_ip_list *ip,
1079                 struct ctdb_public_ip_list *all_ips)
1080 {
1081         int pnn, min=0, num;
1082         int i;
1083
1084         pnn    = -1;
1085         for (i=0;i<nodemap->num;i++) {
1086                 if (nodemap->nodes[i].flags & mask) {
1087                         /* This node is not healty and can not be used to serve
1088                            a public address 
1089                         */
1090                         continue;
1091                 }
1092
1093                 /* verify that this node can serve this ip */
1094                 if (can_node_serve_ip(ctdb, i, ip)) {
1095                         /* no it couldnt   so skip to the next node */
1096                         continue;
1097                 }
1098
1099                 num = node_ip_coverage(ctdb, i, all_ips);
1100                 /* was this the first node we checked ? */
1101                 if (pnn == -1) {
1102                         pnn = i;
1103                         min  = num;
1104                 } else {
1105                         if (num < min) {
1106                                 pnn = i;
1107                                 min  = num;
1108                         }
1109                 }
1110         }       
1111         if (pnn == -1) {
1112                 DEBUG(DEBUG_WARNING,(__location__ " Could not find node to take over public address '%s'\n",
1113                         ctdb_addr_to_str(&ip->addr)));
1114
1115                 return -1;
1116         }
1117
1118         ip->pnn = pnn;
1119         return 0;
1120 }
1121
1122 #define IP_KEYLEN       4
1123 static uint32_t *ip_key(ctdb_sock_addr *ip)
1124 {
1125         static uint32_t key[IP_KEYLEN];
1126
1127         bzero(key, sizeof(key));
1128
1129         switch (ip->sa.sa_family) {
1130         case AF_INET:
1131                 key[3]  = htonl(ip->ip.sin_addr.s_addr);
1132                 break;
1133         case AF_INET6:
1134                 key[0]  = htonl(ip->ip6.sin6_addr.s6_addr32[0]);
1135                 key[1]  = htonl(ip->ip6.sin6_addr.s6_addr32[1]);
1136                 key[2]  = htonl(ip->ip6.sin6_addr.s6_addr32[2]);
1137                 key[3]  = htonl(ip->ip6.sin6_addr.s6_addr32[3]);
1138                 break;
1139         default:
1140                 DEBUG(DEBUG_ERR, (__location__ " ERROR, unknown family passed :%u\n", ip->sa.sa_family));
1141                 return key;
1142         }
1143
1144         return key;
1145 }
1146
1147 static void *add_ip_callback(void *parm, void *data)
1148 {
1149         return parm;
1150 }
1151
1152 void getips_count_callback(void *param, void *data)
1153 {
1154         struct ctdb_public_ip_list **ip_list = (struct ctdb_public_ip_list **)param;
1155         struct ctdb_public_ip_list *new_ip = (struct ctdb_public_ip_list *)data;
1156
1157         new_ip->next = *ip_list;
1158         *ip_list     = new_ip;
1159 }
1160
1161 static struct ctdb_public_ip_list *
1162 create_merged_ip_list(struct ctdb_context *ctdb)
1163 {
1164         int i, j;
1165         struct ctdb_public_ip_list *ip_list;
1166         struct ctdb_all_public_ips *public_ips;
1167
1168         if (ctdb->ip_tree != NULL) {
1169                 talloc_free(ctdb->ip_tree);
1170                 ctdb->ip_tree = NULL;
1171         }
1172         ctdb->ip_tree = trbt_create(ctdb, 0);
1173
1174         for (i=0;i<ctdb->num_nodes;i++) {
1175                 public_ips = ctdb->nodes[i]->known_public_ips;
1176
1177                 if (ctdb->nodes[i]->flags & NODE_FLAGS_DELETED) {
1178                         continue;
1179                 }
1180
1181                 /* there were no public ips for this node */
1182                 if (public_ips == NULL) {
1183                         continue;
1184                 }               
1185
1186                 for (j=0;j<public_ips->num;j++) {
1187                         struct ctdb_public_ip_list *tmp_ip; 
1188
1189                         tmp_ip = talloc_zero(ctdb->ip_tree, struct ctdb_public_ip_list);
1190                         CTDB_NO_MEMORY_NULL(ctdb, tmp_ip);
1191                         tmp_ip->pnn  = public_ips->ips[j].pnn;
1192                         tmp_ip->addr = public_ips->ips[j].addr;
1193                         tmp_ip->next = NULL;
1194
1195                         trbt_insertarray32_callback(ctdb->ip_tree,
1196                                 IP_KEYLEN, ip_key(&public_ips->ips[j].addr),
1197                                 add_ip_callback,
1198                                 tmp_ip);
1199                 }
1200         }
1201
1202         ip_list = NULL;
1203         trbt_traversearray32(ctdb->ip_tree, IP_KEYLEN, getips_count_callback, &ip_list);
1204
1205         return ip_list;
1206 }
1207
1208 /*
1209   make any IP alias changes for public addresses that are necessary 
1210  */
1211 int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap)
1212 {
1213         int i, num_healthy, retries;
1214         struct ctdb_public_ip ip;
1215         struct ctdb_public_ipv4 ipv4;
1216         uint32_t mask;
1217         struct ctdb_public_ip_list *all_ips, *tmp_ip;
1218         int maxnode, maxnum=0, minnode, minnum=0, num;
1219         TDB_DATA data;
1220         struct timeval timeout;
1221         struct client_async_data *async_data;
1222         struct ctdb_client_control_state *state;
1223         TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
1224
1225
1226         ZERO_STRUCT(ip);
1227
1228         /* Count how many completely healthy nodes we have */
1229         num_healthy = 0;
1230         for (i=0;i<nodemap->num;i++) {
1231                 if (!(nodemap->nodes[i].flags & (NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED))) {
1232                         num_healthy++;
1233                 }
1234         }
1235
1236         if (num_healthy > 0) {
1237                 /* We have healthy nodes, so only consider them for 
1238                    serving public addresses
1239                 */
1240                 mask = NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED;
1241         } else {
1242                 /* We didnt have any completely healthy nodes so
1243                    use "disabled" nodes as a fallback
1244                 */
1245                 mask = NODE_FLAGS_INACTIVE;
1246         }
1247
1248         /* since nodes only know about those public addresses that
1249            can be served by that particular node, no single node has
1250            a full list of all public addresses that exist in the cluster.
1251            Walk over all node structures and create a merged list of
1252            all public addresses that exist in the cluster.
1253
1254            keep the tree of ips around as ctdb->ip_tree
1255         */
1256         all_ips = create_merged_ip_list(ctdb);
1257
1258         /* If we want deterministic ip allocations, i.e. that the ip addresses
1259            will always be allocated the same way for a specific set of
1260            available/unavailable nodes.
1261         */
1262         if (1 == ctdb->tunable.deterministic_public_ips) {              
1263                 DEBUG(DEBUG_NOTICE,("Deterministic IPs enabled. Resetting all ip allocations\n"));
1264                 for (i=0,tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next,i++) {
1265                         tmp_ip->pnn = i%nodemap->num;
1266                 }
1267         }
1268
1269
1270         /* mark all public addresses with a masked node as being served by
1271            node -1
1272         */
1273         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1274                 if (tmp_ip->pnn == -1) {
1275                         continue;
1276                 }
1277                 if (nodemap->nodes[tmp_ip->pnn].flags & mask) {
1278                         tmp_ip->pnn = -1;
1279                 }
1280         }
1281
1282         /* verify that the assigned nodes can serve that public ip
1283            and set it to -1 if not
1284         */
1285         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1286                 if (tmp_ip->pnn == -1) {
1287                         continue;
1288                 }
1289                 if (can_node_serve_ip(ctdb, tmp_ip->pnn, tmp_ip) != 0) {
1290                         /* this node can not serve this ip. */
1291                         tmp_ip->pnn = -1;
1292                 }
1293         }
1294
1295
1296         /* now we must redistribute all public addresses with takeover node
1297            -1 among the nodes available
1298         */
1299         retries = 0;
1300 try_again:
1301         /* loop over all ip's and find a physical node to cover for 
1302            each unassigned ip.
1303         */
1304         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1305                 if (tmp_ip->pnn == -1) {
1306                         if (find_takeover_node(ctdb, nodemap, mask, tmp_ip, all_ips)) {
1307                                 DEBUG(DEBUG_WARNING,("Failed to find node to cover ip %s\n",
1308                                         ctdb_addr_to_str(&tmp_ip->addr)));
1309                         }
1310                 }
1311         }
1312
1313         /* If we dont want ips to fail back after a node becomes healthy
1314            again, we wont even try to reallocat the ip addresses so that
1315            they are evenly spread out.
1316            This can NOT be used at the same time as DeterministicIPs !
1317         */
1318         if (1 == ctdb->tunable.no_ip_failback) {
1319                 if (1 == ctdb->tunable.deterministic_public_ips) {
1320                         DEBUG(DEBUG_ERR, ("ERROR: You can not use 'DeterministicIPs' and 'NoIPFailback' at the same time\n"));
1321                 }
1322                 goto finished;
1323         }
1324
1325
1326         /* now, try to make sure the ip adresses are evenly distributed
1327            across the node.
1328            for each ip address, loop over all nodes that can serve this
1329            ip and make sure that the difference between the node
1330            serving the most and the node serving the least ip's are not greater
1331            than 1.
1332         */
1333         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1334                 if (tmp_ip->pnn == -1) {
1335                         continue;
1336                 }
1337
1338                 /* Get the highest and lowest number of ips's served by any 
1339                    valid node which can serve this ip.
1340                 */
1341                 maxnode = -1;
1342                 minnode = -1;
1343                 for (i=0;i<nodemap->num;i++) {
1344                         if (nodemap->nodes[i].flags & mask) {
1345                                 continue;
1346                         }
1347
1348                         /* only check nodes that can actually serve this ip */
1349                         if (can_node_serve_ip(ctdb, i, tmp_ip)) {
1350                                 /* no it couldnt   so skip to the next node */
1351                                 continue;
1352                         }
1353
1354                         num = node_ip_coverage(ctdb, i, all_ips);
1355                         if (maxnode == -1) {
1356                                 maxnode = i;
1357                                 maxnum  = num;
1358                         } else {
1359                                 if (num > maxnum) {
1360                                         maxnode = i;
1361                                         maxnum  = num;
1362                                 }
1363                         }
1364                         if (minnode == -1) {
1365                                 minnode = i;
1366                                 minnum  = num;
1367                         } else {
1368                                 if (num < minnum) {
1369                                         minnode = i;
1370                                         minnum  = num;
1371                                 }
1372                         }
1373                 }
1374                 if (maxnode == -1) {
1375                         DEBUG(DEBUG_WARNING,(__location__ " Could not find maxnode. May not be able to serve ip '%s'\n",
1376                                 ctdb_addr_to_str(&tmp_ip->addr)));
1377
1378                         continue;
1379                 }
1380
1381                 /* If we want deterministic IPs then dont try to reallocate 
1382                    them to spread out the load.
1383                 */
1384                 if (1 == ctdb->tunable.deterministic_public_ips) {
1385                         continue;
1386                 }
1387
1388                 /* if the spread between the smallest and largest coverage by
1389                    a node is >=2 we steal one of the ips from the node with
1390                    most coverage to even things out a bit.
1391                    try to do this at most 5 times  since we dont want to spend
1392                    too much time balancing the ip coverage.
1393                 */
1394                 if ( (maxnum > minnum+1)
1395                   && (retries < 5) ){
1396                         struct ctdb_public_ip_list *tmp;
1397
1398                         /* mark one of maxnode's vnn's as unassigned and try
1399                            again
1400                         */
1401                         for (tmp=all_ips;tmp;tmp=tmp->next) {
1402                                 if (tmp->pnn == maxnode) {
1403                                         tmp->pnn = -1;
1404                                         retries++;
1405                                         goto try_again;
1406                                 }
1407                         }
1408                 }
1409         }
1410
1411
1412         /* finished distributing the public addresses, now just send the 
1413            info out to the nodes
1414         */
1415 finished:
1416
1417         /* at this point ->pnn is the node which will own each IP
1418            or -1 if there is no node that can cover this ip
1419         */
1420
1421         /* now tell all nodes to delete any alias that they should not
1422            have.  This will be a NOOP on nodes that don't currently
1423            hold the given alias */
1424         async_data = talloc_zero(tmp_ctx, struct client_async_data);
1425         CTDB_NO_MEMORY_FATAL(ctdb, async_data);
1426
1427         for (i=0;i<nodemap->num;i++) {
1428                 /* don't talk to unconnected nodes, but do talk to banned nodes */
1429                 if (nodemap->nodes[i].flags & NODE_FLAGS_DISCONNECTED) {
1430                         continue;
1431                 }
1432
1433                 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1434                         if (tmp_ip->pnn == nodemap->nodes[i].pnn) {
1435                                 /* This node should be serving this
1436                                    vnn so dont tell it to release the ip
1437                                 */
1438                                 continue;
1439                         }
1440                         if (tmp_ip->addr.sa.sa_family == AF_INET) {
1441                                 ipv4.pnn = tmp_ip->pnn;
1442                                 ipv4.sin = tmp_ip->addr.ip;
1443
1444                                 timeout = TAKEOVER_TIMEOUT();
1445                                 data.dsize = sizeof(ipv4);
1446                                 data.dptr  = (uint8_t *)&ipv4;
1447                                 state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
1448                                                 0, CTDB_CONTROL_RELEASE_IPv4, 0,
1449                                                 data, async_data,
1450                                                 &timeout, NULL);
1451                         } else {
1452                                 ip.pnn  = tmp_ip->pnn;
1453                                 ip.addr = tmp_ip->addr;
1454
1455                                 timeout = TAKEOVER_TIMEOUT();
1456                                 data.dsize = sizeof(ip);
1457                                 data.dptr  = (uint8_t *)&ip;
1458                                 state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
1459                                                 0, CTDB_CONTROL_RELEASE_IP, 0,
1460                                                 data, async_data,
1461                                                 &timeout, NULL);
1462                         }
1463
1464                         if (state == NULL) {
1465                                 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_RELEASE_IP to node %u\n", nodemap->nodes[i].pnn));
1466                                 talloc_free(tmp_ctx);
1467                                 return -1;
1468                         }
1469                 
1470                         ctdb_client_async_add(async_data, state);
1471                 }
1472         }
1473         if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1474                 DEBUG(DEBUG_ERR,(__location__ " Async control CTDB_CONTROL_RELEASE_IP failed\n"));
1475                 talloc_free(tmp_ctx);
1476                 return -1;
1477         }
1478         talloc_free(async_data);
1479
1480
1481         /* tell all nodes to get their own IPs */
1482         async_data = talloc_zero(tmp_ctx, struct client_async_data);
1483         CTDB_NO_MEMORY_FATAL(ctdb, async_data);
1484         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1485                 if (tmp_ip->pnn == -1) {
1486                         /* this IP won't be taken over */
1487                         continue;
1488                 }
1489
1490                 if (tmp_ip->addr.sa.sa_family == AF_INET) {
1491                         ipv4.pnn = tmp_ip->pnn;
1492                         ipv4.sin = tmp_ip->addr.ip;
1493
1494                         timeout = TAKEOVER_TIMEOUT();
1495                         data.dsize = sizeof(ipv4);
1496                         data.dptr  = (uint8_t *)&ipv4;
1497                         state = ctdb_control_send(ctdb, tmp_ip->pnn,
1498                                         0, CTDB_CONTROL_TAKEOVER_IPv4, 0,
1499                                         data, async_data,
1500                                         &timeout, NULL);
1501                 } else {
1502                         ip.pnn  = tmp_ip->pnn;
1503                         ip.addr = tmp_ip->addr;
1504
1505                         timeout = TAKEOVER_TIMEOUT();
1506                         data.dsize = sizeof(ip);
1507                         data.dptr  = (uint8_t *)&ip;
1508                         state = ctdb_control_send(ctdb, tmp_ip->pnn,
1509                                         0, CTDB_CONTROL_TAKEOVER_IP, 0,
1510                                         data, async_data,
1511                                         &timeout, NULL);
1512                 }
1513                 if (state == NULL) {
1514                         DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_TAKEOVER_IP to node %u\n", tmp_ip->pnn));
1515                         talloc_free(tmp_ctx);
1516                         return -1;
1517                 }
1518                 
1519                 ctdb_client_async_add(async_data, state);
1520         }
1521         if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1522                 DEBUG(DEBUG_ERR,(__location__ " Async control CTDB_CONTROL_TAKEOVER_IP failed\n"));
1523                 talloc_free(tmp_ctx);
1524                 return -1;
1525         }
1526
1527         talloc_free(tmp_ctx);
1528         return 0;
1529 }
1530
1531
1532 /*
1533   destroy a ctdb_client_ip structure
1534  */
1535 static int ctdb_client_ip_destructor(struct ctdb_client_ip *ip)
1536 {
1537         DEBUG(DEBUG_DEBUG,("destroying client tcp for %s:%u (client_id %u)\n",
1538                 ctdb_addr_to_str(&ip->addr),
1539                 ntohs(ip->addr.ip.sin_port),
1540                 ip->client_id));
1541
1542         DLIST_REMOVE(ip->ctdb->client_ip_list, ip);
1543         return 0;
1544 }
1545
1546 /*
1547   called by a client to inform us of a TCP connection that it is managing
1548   that should tickled with an ACK when IP takeover is done
1549   we handle both the old ipv4 style of packets as well as the new ipv4/6
1550   pdus.
1551  */
1552 int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id,
1553                                 TDB_DATA indata)
1554 {
1555         struct ctdb_client *client = ctdb_reqid_find(ctdb, client_id, struct ctdb_client);
1556         struct ctdb_control_tcp *old_addr = NULL;
1557         struct ctdb_control_tcp_addr new_addr;
1558         struct ctdb_control_tcp_addr *tcp_sock = NULL;
1559         struct ctdb_tcp_list *tcp;
1560         struct ctdb_control_tcp_vnn t;
1561         int ret;
1562         TDB_DATA data;
1563         struct ctdb_client_ip *ip;
1564         struct ctdb_vnn *vnn;
1565         ctdb_sock_addr addr;
1566
1567         switch (indata.dsize) {
1568         case sizeof(struct ctdb_control_tcp):
1569                 old_addr = (struct ctdb_control_tcp *)indata.dptr;
1570                 ZERO_STRUCT(new_addr);
1571                 tcp_sock = &new_addr;
1572                 tcp_sock->src.ip  = old_addr->src;
1573                 tcp_sock->dest.ip = old_addr->dest;
1574                 break;
1575         case sizeof(struct ctdb_control_tcp_addr):
1576                 tcp_sock = (struct ctdb_control_tcp_addr *)indata.dptr;
1577                 break;
1578         default:
1579                 DEBUG(DEBUG_ERR,(__location__ " Invalid data structure passed "
1580                                  "to ctdb_control_tcp_client. size was %d but "
1581                                  "only allowed sizes are %lu and %lu\n",
1582                                  (int)indata.dsize,
1583                                  (long unsigned)sizeof(struct ctdb_control_tcp),
1584                                  (long unsigned)sizeof(struct ctdb_control_tcp_addr)));
1585                 return -1;
1586         }
1587
1588         addr = tcp_sock->src;
1589         ctdb_canonicalize_ip(&addr,  &tcp_sock->src);
1590         addr = tcp_sock->dest;
1591         ctdb_canonicalize_ip(&addr, &tcp_sock->dest);
1592
1593         ZERO_STRUCT(addr);
1594         memcpy(&addr, &tcp_sock->dest, sizeof(addr));
1595         vnn = find_public_ip_vnn(ctdb, &addr);
1596         if (vnn == NULL) {
1597                 switch (addr.sa.sa_family) {
1598                 case AF_INET:
1599                         if (ntohl(addr.ip.sin_addr.s_addr) != INADDR_LOOPBACK) {
1600                                 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public address.\n", 
1601                                         ctdb_addr_to_str(&addr)));
1602                         }
1603                         break;
1604                 case AF_INET6:
1605                         DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public ipv6 address.\n", 
1606                                 ctdb_addr_to_str(&addr)));
1607                         break;
1608                 default:
1609                         DEBUG(DEBUG_ERR,(__location__ " Unknown family type %d\n", addr.sa.sa_family));
1610                 }
1611
1612                 return 0;
1613         }
1614
1615         if (vnn->pnn != ctdb->pnn) {
1616                 DEBUG(DEBUG_ERR,("Attempt to register tcp client for IP %s we don't hold - failing (client_id %u pid %u)\n",
1617                         ctdb_addr_to_str(&addr),
1618                         client_id, client->pid));
1619                 /* failing this call will tell smbd to die */
1620                 return -1;
1621         }
1622
1623         ip = talloc(client, struct ctdb_client_ip);
1624         CTDB_NO_MEMORY(ctdb, ip);
1625
1626         ip->ctdb      = ctdb;
1627         ip->addr      = addr;
1628         ip->client_id = client_id;
1629         talloc_set_destructor(ip, ctdb_client_ip_destructor);
1630         DLIST_ADD(ctdb->client_ip_list, ip);
1631
1632         tcp = talloc(client, struct ctdb_tcp_list);
1633         CTDB_NO_MEMORY(ctdb, tcp);
1634
1635         tcp->connection.src_addr = tcp_sock->src;
1636         tcp->connection.dst_addr = tcp_sock->dest;
1637
1638         DLIST_ADD(client->tcp_list, tcp);
1639
1640         t.src  = tcp_sock->src;
1641         t.dest = tcp_sock->dest;
1642
1643         data.dptr = (uint8_t *)&t;
1644         data.dsize = sizeof(t);
1645
1646         switch (addr.sa.sa_family) {
1647         case AF_INET:
1648                 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1649                         (unsigned)ntohs(tcp_sock->dest.ip.sin_port), 
1650                         ctdb_addr_to_str(&tcp_sock->src),
1651                         (unsigned)ntohs(tcp_sock->src.ip.sin_port), client_id, client->pid));
1652                 break;
1653         case AF_INET6:
1654                 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1655                         (unsigned)ntohs(tcp_sock->dest.ip6.sin6_port), 
1656                         ctdb_addr_to_str(&tcp_sock->src),
1657                         (unsigned)ntohs(tcp_sock->src.ip6.sin6_port), client_id, client->pid));
1658                 break;
1659         default:
1660                 DEBUG(DEBUG_ERR,(__location__ " Unknown family %d\n", addr.sa.sa_family));
1661         }
1662
1663
1664         /* tell all nodes about this tcp connection */
1665         ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0, 
1666                                        CTDB_CONTROL_TCP_ADD,
1667                                        0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
1668         if (ret != 0) {
1669                 DEBUG(DEBUG_ERR,(__location__ " Failed to send CTDB_CONTROL_TCP_ADD\n"));
1670                 return -1;
1671         }
1672
1673         return 0;
1674 }
1675
1676 /*
1677   find a tcp address on a list
1678  */
1679 static struct ctdb_tcp_connection *ctdb_tcp_find(struct ctdb_tcp_array *array, 
1680                                            struct ctdb_tcp_connection *tcp)
1681 {
1682         int i;
1683
1684         if (array == NULL) {
1685                 return NULL;
1686         }
1687
1688         for (i=0;i<array->num;i++) {
1689                 if (ctdb_same_sockaddr(&array->connections[i].src_addr, &tcp->src_addr) &&
1690                     ctdb_same_sockaddr(&array->connections[i].dst_addr, &tcp->dst_addr)) {
1691                         return &array->connections[i];
1692                 }
1693         }
1694         return NULL;
1695 }
1696
1697 /*
1698   called by a daemon to inform us of a TCP connection that one of its
1699   clients managing that should tickled with an ACK when IP takeover is
1700   done
1701  */
1702 int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata)
1703 {
1704         struct ctdb_control_tcp_vnn *p = (struct ctdb_control_tcp_vnn *)indata.dptr;
1705         struct ctdb_tcp_array *tcparray;
1706         struct ctdb_tcp_connection tcp;
1707         struct ctdb_vnn *vnn;
1708
1709         vnn = find_public_ip_vnn(ctdb, &p->dest);
1710         if (vnn == NULL) {
1711                 DEBUG(DEBUG_INFO,(__location__ " got TCP_ADD control for an address which is not a public address '%s'\n",
1712                         ctdb_addr_to_str(&p->dest)));
1713
1714                 return -1;
1715         }
1716
1717
1718         tcparray = vnn->tcp_array;
1719
1720         /* If this is the first tickle */
1721         if (tcparray == NULL) {
1722                 tcparray = talloc_size(ctdb->nodes, 
1723                         offsetof(struct ctdb_tcp_array, connections) +
1724                         sizeof(struct ctdb_tcp_connection) * 1);
1725                 CTDB_NO_MEMORY(ctdb, tcparray);
1726                 vnn->tcp_array = tcparray;
1727
1728                 tcparray->num = 0;
1729                 tcparray->connections = talloc_size(tcparray, sizeof(struct ctdb_tcp_connection));
1730                 CTDB_NO_MEMORY(ctdb, tcparray->connections);
1731
1732                 tcparray->connections[tcparray->num].src_addr = p->src;
1733                 tcparray->connections[tcparray->num].dst_addr = p->dest;
1734                 tcparray->num++;
1735                 return 0;
1736         }
1737
1738
1739         /* Do we already have this tickle ?*/
1740         tcp.src_addr = p->src;
1741         tcp.dst_addr = p->dest;
1742         if (ctdb_tcp_find(vnn->tcp_array, &tcp) != NULL) {
1743                 DEBUG(DEBUG_DEBUG,("Already had tickle info for %s:%u for vnn:%u\n",
1744                         ctdb_addr_to_str(&tcp.dst_addr),
1745                         ntohs(tcp.dst_addr.ip.sin_port),
1746                         vnn->pnn));
1747                 return 0;
1748         }
1749
1750         /* A new tickle, we must add it to the array */
1751         tcparray->connections = talloc_realloc(tcparray, tcparray->connections,
1752                                         struct ctdb_tcp_connection,
1753                                         tcparray->num+1);
1754         CTDB_NO_MEMORY(ctdb, tcparray->connections);
1755
1756         vnn->tcp_array = tcparray;
1757         tcparray->connections[tcparray->num].src_addr = p->src;
1758         tcparray->connections[tcparray->num].dst_addr = p->dest;
1759         tcparray->num++;
1760                                 
1761         DEBUG(DEBUG_INFO,("Added tickle info for %s:%u from vnn %u\n",
1762                 ctdb_addr_to_str(&tcp.dst_addr),
1763                 ntohs(tcp.dst_addr.ip.sin_port),
1764                 vnn->pnn));
1765
1766         return 0;
1767 }
1768
1769
1770 /*
1771   called by a daemon to inform us of a TCP connection that one of its
1772   clients managing that should tickled with an ACK when IP takeover is
1773   done
1774  */
1775 static void ctdb_remove_tcp_connection(struct ctdb_context *ctdb, struct ctdb_tcp_connection *conn)
1776 {
1777         struct ctdb_tcp_connection *tcpp;
1778         struct ctdb_vnn *vnn = find_public_ip_vnn(ctdb, &conn->dst_addr);
1779
1780         if (vnn == NULL) {
1781                 DEBUG(DEBUG_ERR,(__location__ " unable to find public address %s\n",
1782                         ctdb_addr_to_str(&conn->dst_addr)));
1783                 return;
1784         }
1785
1786         /* if the array is empty we cant remove it
1787            and we dont need to do anything
1788          */
1789         if (vnn->tcp_array == NULL) {
1790                 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist (array is empty) %s:%u\n",
1791                         ctdb_addr_to_str(&conn->dst_addr),
1792                         ntohs(conn->dst_addr.ip.sin_port)));
1793                 return;
1794         }
1795
1796
1797         /* See if we know this connection
1798            if we dont know this connection  then we dont need to do anything
1799          */
1800         tcpp = ctdb_tcp_find(vnn->tcp_array, conn);
1801         if (tcpp == NULL) {
1802                 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist %s:%u\n",
1803                         ctdb_addr_to_str(&conn->dst_addr),
1804                         ntohs(conn->dst_addr.ip.sin_port)));
1805                 return;
1806         }
1807
1808
1809         /* We need to remove this entry from the array.
1810            Instead of allocating a new array and copying data to it
1811            we cheat and just copy the last entry in the existing array
1812            to the entry that is to be removed and just shring the 
1813            ->num field
1814          */
1815         *tcpp = vnn->tcp_array->connections[vnn->tcp_array->num - 1];
1816         vnn->tcp_array->num--;
1817
1818         /* If we deleted the last entry we also need to remove the entire array
1819          */
1820         if (vnn->tcp_array->num == 0) {
1821                 talloc_free(vnn->tcp_array);
1822                 vnn->tcp_array = NULL;
1823         }               
1824
1825         vnn->tcp_update_needed = true;
1826
1827         DEBUG(DEBUG_INFO,("Removed tickle info for %s:%u\n",
1828                 ctdb_addr_to_str(&conn->src_addr),
1829                 ntohs(conn->src_addr.ip.sin_port)));
1830 }
1831
1832
1833 /*
1834   called when a daemon restarts - send all tickes for all public addresses
1835   we are serving immediately to the new node.
1836  */
1837 int32_t ctdb_control_startup(struct ctdb_context *ctdb, uint32_t vnn)
1838 {
1839 /*XXX here we should send all tickes we are serving to the new node */
1840         return 0;
1841 }
1842
1843
1844 /*
1845   called when a client structure goes away - hook to remove
1846   elements from the tcp_list in all daemons
1847  */
1848 void ctdb_takeover_client_destructor_hook(struct ctdb_client *client)
1849 {
1850         while (client->tcp_list) {
1851                 struct ctdb_tcp_list *tcp = client->tcp_list;
1852                 DLIST_REMOVE(client->tcp_list, tcp);
1853                 ctdb_remove_tcp_connection(client->ctdb, &tcp->connection);
1854         }
1855 }
1856
1857
1858 /*
1859   release all IPs on shutdown
1860  */
1861 void ctdb_release_all_ips(struct ctdb_context *ctdb)
1862 {
1863         struct ctdb_vnn *vnn;
1864
1865         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1866                 if (!ctdb_sys_have_ip(&vnn->public_address)) {
1867                         ctdb_vnn_unassign_iface(ctdb, vnn);
1868                         continue;
1869                 }
1870                 if (!vnn->iface) {
1871                         continue;
1872                 }
1873                 ctdb_event_script_args(ctdb, CTDB_EVENT_RELEASE_IP, "%s %s %u",
1874                                   ctdb_vnn_iface_string(vnn),
1875                                   ctdb_addr_to_str(&vnn->public_address),
1876                                   vnn->public_netmask_bits);
1877                 release_kill_clients(ctdb, &vnn->public_address);
1878                 ctdb_vnn_unassign_iface(ctdb, vnn);
1879         }
1880 }
1881
1882
1883 /*
1884   get list of public IPs
1885  */
1886 int32_t ctdb_control_get_public_ips(struct ctdb_context *ctdb, 
1887                                     struct ctdb_req_control *c, TDB_DATA *outdata)
1888 {
1889         int i, num, len;
1890         struct ctdb_all_public_ips *ips;
1891         struct ctdb_vnn *vnn;
1892         bool only_available = false;
1893
1894         if (c->flags & CTDB_PUBLIC_IP_FLAGS_ONLY_AVAILABLE) {
1895                 only_available = true;
1896         }
1897
1898         /* count how many public ip structures we have */
1899         num = 0;
1900         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1901                 num++;
1902         }
1903
1904         len = offsetof(struct ctdb_all_public_ips, ips) + 
1905                 num*sizeof(struct ctdb_public_ip);
1906         ips = talloc_zero_size(outdata, len);
1907         CTDB_NO_MEMORY(ctdb, ips);
1908
1909         i = 0;
1910         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1911                 if (only_available && !ctdb_vnn_available(ctdb, vnn)) {
1912                         continue;
1913                 }
1914                 ips->ips[i].pnn  = vnn->pnn;
1915                 ips->ips[i].addr = vnn->public_address;
1916                 i++;
1917         }
1918         ips->num = i;
1919         len = offsetof(struct ctdb_all_public_ips, ips) +
1920                 i*sizeof(struct ctdb_public_ip);
1921
1922         outdata->dsize = len;
1923         outdata->dptr  = (uint8_t *)ips;
1924
1925         return 0;
1926 }
1927
1928
1929 /*
1930   get list of public IPs, old ipv4 style.  only returns ipv4 addresses
1931  */
1932 int32_t ctdb_control_get_public_ipsv4(struct ctdb_context *ctdb, 
1933                                     struct ctdb_req_control *c, TDB_DATA *outdata)
1934 {
1935         int i, num, len;
1936         struct ctdb_all_public_ipsv4 *ips;
1937         struct ctdb_vnn *vnn;
1938
1939         /* count how many public ip structures we have */
1940         num = 0;
1941         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1942                 if (vnn->public_address.sa.sa_family != AF_INET) {
1943                         continue;
1944                 }
1945                 num++;
1946         }
1947
1948         len = offsetof(struct ctdb_all_public_ipsv4, ips) + 
1949                 num*sizeof(struct ctdb_public_ipv4);
1950         ips = talloc_zero_size(outdata, len);
1951         CTDB_NO_MEMORY(ctdb, ips);
1952
1953         outdata->dsize = len;
1954         outdata->dptr  = (uint8_t *)ips;
1955
1956         ips->num = num;
1957         i = 0;
1958         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1959                 if (vnn->public_address.sa.sa_family != AF_INET) {
1960                         continue;
1961                 }
1962                 ips->ips[i].pnn = vnn->pnn;
1963                 ips->ips[i].sin = vnn->public_address.ip;
1964                 i++;
1965         }
1966
1967         return 0;
1968 }
1969
1970 int32_t ctdb_control_get_public_ip_info(struct ctdb_context *ctdb,
1971                                         struct ctdb_req_control *c,
1972                                         TDB_DATA indata,
1973                                         TDB_DATA *outdata)
1974 {
1975         int i, num, len;
1976         ctdb_sock_addr *addr;
1977         struct ctdb_control_public_ip_info *info;
1978         struct ctdb_vnn *vnn;
1979
1980         addr = (ctdb_sock_addr *)indata.dptr;
1981
1982         vnn = find_public_ip_vnn(ctdb, addr);
1983         if (vnn == NULL) {
1984                 /* if it is not a public ip   it could be our 'single ip' */
1985                 if (ctdb->single_ip_vnn) {
1986                         if (ctdb_same_ip(&ctdb->single_ip_vnn->public_address, addr)) {
1987                                 vnn = ctdb->single_ip_vnn;
1988                         }
1989                 }
1990         }
1991         if (vnn == NULL) {
1992                 DEBUG(DEBUG_ERR,(__location__ " Could not get public ip info, "
1993                                  "'%s'not a public address\n",
1994                                  ctdb_addr_to_str(addr)));
1995                 return -1;
1996         }
1997
1998         /* count how many public ip structures we have */
1999         num = 0;
2000         for (;vnn->ifaces[num];) {
2001                 num++;
2002         }
2003
2004         len = offsetof(struct ctdb_control_public_ip_info, ifaces) +
2005                 num*sizeof(struct ctdb_control_iface_info);
2006         info = talloc_zero_size(outdata, len);
2007         CTDB_NO_MEMORY(ctdb, info);
2008
2009         info->ip.addr = vnn->public_address;
2010         info->ip.pnn = vnn->pnn;
2011         info->active_idx = 0xFFFFFFFF;
2012
2013         for (i=0; vnn->ifaces[i]; i++) {
2014                 struct ctdb_iface *cur;
2015
2016                 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
2017                 if (cur == NULL) {
2018                         DEBUG(DEBUG_CRIT, (__location__ " internal error iface[%s] unknown\n",
2019                                            vnn->ifaces[i]));
2020                         return -1;
2021                 }
2022                 if (vnn->iface == cur) {
2023                         info->active_idx = i;
2024                 }
2025                 strcpy(info->ifaces[i].name, cur->name);
2026                 info->ifaces[i].link_state = cur->link_up;
2027                 info->ifaces[i].references = cur->references;
2028         }
2029         info->num = i;
2030         len = offsetof(struct ctdb_control_public_ip_info, ifaces) +
2031                 i*sizeof(struct ctdb_control_iface_info);
2032
2033         outdata->dsize = len;
2034         outdata->dptr  = (uint8_t *)info;
2035
2036         return 0;
2037 }
2038
2039 int32_t ctdb_control_get_ifaces(struct ctdb_context *ctdb,
2040                                 struct ctdb_req_control *c,
2041                                 TDB_DATA *outdata)
2042 {
2043         int i, num, len;
2044         struct ctdb_control_get_ifaces *ifaces;
2045         struct ctdb_iface *cur;
2046
2047         /* count how many public ip structures we have */
2048         num = 0;
2049         for (cur=ctdb->ifaces;cur;cur=cur->next) {
2050                 num++;
2051         }
2052
2053         len = offsetof(struct ctdb_control_get_ifaces, ifaces) +
2054                 num*sizeof(struct ctdb_control_iface_info);
2055         ifaces = talloc_zero_size(outdata, len);
2056         CTDB_NO_MEMORY(ctdb, ifaces);
2057
2058         i = 0;
2059         for (cur=ctdb->ifaces;cur;cur=cur->next) {
2060                 strcpy(ifaces->ifaces[i].name, cur->name);
2061                 ifaces->ifaces[i].link_state = cur->link_up;
2062                 ifaces->ifaces[i].references = cur->references;
2063                 i++;
2064         }
2065         ifaces->num = i;
2066         len = offsetof(struct ctdb_control_get_ifaces, ifaces) +
2067                 i*sizeof(struct ctdb_control_iface_info);
2068
2069         outdata->dsize = len;
2070         outdata->dptr  = (uint8_t *)ifaces;
2071
2072         return 0;
2073 }
2074
2075 int32_t ctdb_control_set_iface_link(struct ctdb_context *ctdb,
2076                                     struct ctdb_req_control *c,
2077                                     TDB_DATA indata)
2078 {
2079         struct ctdb_control_iface_info *info;
2080         struct ctdb_iface *iface;
2081         bool link_up = false;
2082
2083         info = (struct ctdb_control_iface_info *)indata.dptr;
2084
2085         if (info->name[CTDB_IFACE_SIZE] != '\0') {
2086                 int len = strnlen(info->name, CTDB_IFACE_SIZE);
2087                 DEBUG(DEBUG_ERR, (__location__ " name[%*.*s] not terminated\n",
2088                                   len, len, info->name));
2089                 return -1;
2090         }
2091
2092         switch (info->link_state) {
2093         case 0:
2094                 link_up = false;
2095                 break;
2096         case 1:
2097                 link_up = true;
2098                 break;
2099         default:
2100                 DEBUG(DEBUG_ERR, (__location__ " link_state[%u] invalid\n",
2101                                   (unsigned int)info->link_state));
2102                 return -1;
2103         }
2104
2105         if (info->references != 0) {
2106                 DEBUG(DEBUG_ERR, (__location__ " references[%u] should be 0\n",
2107                                   (unsigned int)info->references));
2108                 return -1;
2109         }
2110
2111         iface = ctdb_find_iface(ctdb, info->name);
2112         if (iface == NULL) {
2113                 DEBUG(DEBUG_ERR, (__location__ "iface[%s] is unknown\n",
2114                                   info->name));
2115                 return -1;
2116         }
2117
2118         if (link_up == iface->link_up) {
2119                 return 0;
2120         }
2121
2122         DEBUG(iface->link_up?DEBUG_ERR:DEBUG_NOTICE,
2123               ("iface[%s] has changed it's link status %s => %s\n",
2124                iface->name,
2125                iface->link_up?"up":"down",
2126                link_up?"up":"down"));
2127
2128         iface->link_up = link_up;
2129         return 0;
2130 }
2131
2132
2133 /* 
2134    structure containing the listening socket and the list of tcp connections
2135    that the ctdb daemon is to kill
2136 */
2137 struct ctdb_kill_tcp {
2138         struct ctdb_vnn *vnn;
2139         struct ctdb_context *ctdb;
2140         int capture_fd;
2141         struct fd_event *fde;
2142         trbt_tree_t *connections;
2143         void *private_data;
2144 };
2145
2146 /*
2147   a tcp connection that is to be killed
2148  */
2149 struct ctdb_killtcp_con {
2150         ctdb_sock_addr src_addr;
2151         ctdb_sock_addr dst_addr;
2152         int count;
2153         struct ctdb_kill_tcp *killtcp;
2154 };
2155
2156 /* this function is used to create a key to represent this socketpair
2157    in the killtcp tree.
2158    this key is used to insert and lookup matching socketpairs that are
2159    to be tickled and RST
2160 */
2161 #define KILLTCP_KEYLEN  10
2162 static uint32_t *killtcp_key(ctdb_sock_addr *src, ctdb_sock_addr *dst)
2163 {
2164         static uint32_t key[KILLTCP_KEYLEN];
2165
2166         bzero(key, sizeof(key));
2167
2168         if (src->sa.sa_family != dst->sa.sa_family) {
2169                 DEBUG(DEBUG_ERR, (__location__ " ERROR, different families passed :%u vs %u\n", src->sa.sa_family, dst->sa.sa_family));
2170                 return key;
2171         }
2172         
2173         switch (src->sa.sa_family) {
2174         case AF_INET:
2175                 key[0]  = dst->ip.sin_addr.s_addr;
2176                 key[1]  = src->ip.sin_addr.s_addr;
2177                 key[2]  = dst->ip.sin_port;
2178                 key[3]  = src->ip.sin_port;
2179                 break;
2180         case AF_INET6:
2181                 key[0]  = dst->ip6.sin6_addr.s6_addr32[3];
2182                 key[1]  = src->ip6.sin6_addr.s6_addr32[3];
2183                 key[2]  = dst->ip6.sin6_addr.s6_addr32[2];
2184                 key[3]  = src->ip6.sin6_addr.s6_addr32[2];
2185                 key[4]  = dst->ip6.sin6_addr.s6_addr32[1];
2186                 key[5]  = src->ip6.sin6_addr.s6_addr32[1];
2187                 key[6]  = dst->ip6.sin6_addr.s6_addr32[0];
2188                 key[7]  = src->ip6.sin6_addr.s6_addr32[0];
2189                 key[8]  = dst->ip6.sin6_port;
2190                 key[9]  = src->ip6.sin6_port;
2191                 break;
2192         default:
2193                 DEBUG(DEBUG_ERR, (__location__ " ERROR, unknown family passed :%u\n", src->sa.sa_family));
2194                 return key;
2195         }
2196
2197         return key;
2198 }
2199
2200 /*
2201   called when we get a read event on the raw socket
2202  */
2203 static void capture_tcp_handler(struct event_context *ev, struct fd_event *fde, 
2204                                 uint16_t flags, void *private_data)
2205 {
2206         struct ctdb_kill_tcp *killtcp = talloc_get_type(private_data, struct ctdb_kill_tcp);
2207         struct ctdb_killtcp_con *con;
2208         ctdb_sock_addr src, dst;
2209         uint32_t ack_seq, seq;
2210
2211         if (!(flags & EVENT_FD_READ)) {
2212                 return;
2213         }
2214
2215         if (ctdb_sys_read_tcp_packet(killtcp->capture_fd,
2216                                 killtcp->private_data,
2217                                 &src, &dst,
2218                                 &ack_seq, &seq) != 0) {
2219                 /* probably a non-tcp ACK packet */
2220                 return;
2221         }
2222
2223         /* check if we have this guy in our list of connections
2224            to kill
2225         */
2226         con = trbt_lookuparray32(killtcp->connections, 
2227                         KILLTCP_KEYLEN, killtcp_key(&src, &dst));
2228         if (con == NULL) {
2229                 /* no this was some other packet we can just ignore */
2230                 return;
2231         }
2232
2233         /* This one has been tickled !
2234            now reset him and remove him from the list.
2235          */
2236         DEBUG(DEBUG_INFO, ("sending a tcp reset to kill connection :%d -> %s:%d\n",
2237                 ntohs(con->dst_addr.ip.sin_port),
2238                 ctdb_addr_to_str(&con->src_addr),
2239                 ntohs(con->src_addr.ip.sin_port)));
2240
2241         ctdb_sys_send_tcp(&con->dst_addr, &con->src_addr, ack_seq, seq, 1);
2242         talloc_free(con);
2243 }
2244
2245
2246 /* when traversing the list of all tcp connections to send tickle acks to
2247    (so that we can capture the ack coming back and kill the connection
2248     by a RST)
2249    this callback is called for each connection we are currently trying to kill
2250 */
2251 static void tickle_connection_traverse(void *param, void *data)
2252 {
2253         struct ctdb_killtcp_con *con = talloc_get_type(data, struct ctdb_killtcp_con);
2254
2255         /* have tried too many times, just give up */
2256         if (con->count >= 5) {
2257                 talloc_free(con);
2258                 return;
2259         }
2260
2261         /* othervise, try tickling it again */
2262         con->count++;
2263         ctdb_sys_send_tcp(
2264                 (ctdb_sock_addr *)&con->dst_addr,
2265                 (ctdb_sock_addr *)&con->src_addr,
2266                 0, 0, 0);
2267 }
2268
2269
2270 /* 
2271    called every second until all sentenced connections have been reset
2272  */
2273 static void ctdb_tickle_sentenced_connections(struct event_context *ev, struct timed_event *te, 
2274                                               struct timeval t, void *private_data)
2275 {
2276         struct ctdb_kill_tcp *killtcp = talloc_get_type(private_data, struct ctdb_kill_tcp);
2277
2278
2279         /* loop over all connections sending tickle ACKs */
2280         trbt_traversearray32(killtcp->connections, KILLTCP_KEYLEN, tickle_connection_traverse, NULL);
2281
2282
2283         /* If there are no more connections to kill we can remove the
2284            entire killtcp structure
2285          */
2286         if ( (killtcp->connections == NULL) || 
2287              (killtcp->connections->root == NULL) ) {
2288                 talloc_free(killtcp);
2289                 return;
2290         }
2291
2292         /* try tickling them again in a seconds time
2293          */
2294         event_add_timed(killtcp->ctdb->ev, killtcp, timeval_current_ofs(1, 0), 
2295                         ctdb_tickle_sentenced_connections, killtcp);
2296 }
2297
2298 /*
2299   destroy the killtcp structure
2300  */
2301 static int ctdb_killtcp_destructor(struct ctdb_kill_tcp *killtcp)
2302 {
2303         killtcp->vnn->killtcp = NULL;
2304         return 0;
2305 }
2306
2307
2308 /* nothing fancy here, just unconditionally replace any existing
2309    connection structure with the new one.
2310
2311    dont even free the old one if it did exist, that one is talloc_stolen
2312    by the same node in the tree anyway and will be deleted when the new data 
2313    is deleted
2314 */
2315 static void *add_killtcp_callback(void *parm, void *data)
2316 {
2317         return parm;
2318 }
2319
2320 /*
2321   add a tcp socket to the list of connections we want to RST
2322  */
2323 static int ctdb_killtcp_add_connection(struct ctdb_context *ctdb, 
2324                                        ctdb_sock_addr *s,
2325                                        ctdb_sock_addr *d)
2326 {
2327         ctdb_sock_addr src, dst;
2328         struct ctdb_kill_tcp *killtcp;
2329         struct ctdb_killtcp_con *con;
2330         struct ctdb_vnn *vnn;
2331
2332         ctdb_canonicalize_ip(s, &src);
2333         ctdb_canonicalize_ip(d, &dst);
2334
2335         vnn = find_public_ip_vnn(ctdb, &dst);
2336         if (vnn == NULL) {
2337                 vnn = find_public_ip_vnn(ctdb, &src);
2338         }
2339         if (vnn == NULL) {
2340                 /* if it is not a public ip   it could be our 'single ip' */
2341                 if (ctdb->single_ip_vnn) {
2342                         if (ctdb_same_ip(&ctdb->single_ip_vnn->public_address, &dst)) {
2343                                 vnn = ctdb->single_ip_vnn;
2344                         }
2345                 }
2346         }
2347         if (vnn == NULL) {
2348                 DEBUG(DEBUG_ERR,(__location__ " Could not killtcp, not a public address\n")); 
2349                 return -1;
2350         }
2351
2352         killtcp = vnn->killtcp;
2353         
2354         /* If this is the first connection to kill we must allocate
2355            a new structure
2356          */
2357         if (killtcp == NULL) {
2358                 killtcp = talloc_zero(ctdb, struct ctdb_kill_tcp);
2359                 CTDB_NO_MEMORY(ctdb, killtcp);
2360
2361                 killtcp->vnn         = vnn;
2362                 killtcp->ctdb        = ctdb;
2363                 killtcp->capture_fd  = -1;
2364                 killtcp->connections = trbt_create(killtcp, 0);
2365
2366                 vnn->killtcp         = killtcp;
2367                 talloc_set_destructor(killtcp, ctdb_killtcp_destructor);
2368         }
2369
2370
2371
2372         /* create a structure that describes this connection we want to
2373            RST and store it in killtcp->connections
2374         */
2375         con = talloc(killtcp, struct ctdb_killtcp_con);
2376         CTDB_NO_MEMORY(ctdb, con);
2377         con->src_addr = src;
2378         con->dst_addr = dst;
2379         con->count    = 0;
2380         con->killtcp  = killtcp;
2381
2382
2383         trbt_insertarray32_callback(killtcp->connections,
2384                         KILLTCP_KEYLEN, killtcp_key(&con->dst_addr, &con->src_addr),
2385                         add_killtcp_callback, con);
2386
2387         /* 
2388            If we dont have a socket to listen on yet we must create it
2389          */
2390         if (killtcp->capture_fd == -1) {
2391                 const char *iface = ctdb_vnn_iface_string(vnn);
2392                 killtcp->capture_fd = ctdb_sys_open_capture_socket(iface, &killtcp->private_data);
2393                 if (killtcp->capture_fd == -1) {
2394                         DEBUG(DEBUG_CRIT,(__location__ " Failed to open capturing "
2395                                           "socket on iface '%s' for killtcp (%s)\n",
2396                                           iface, strerror(errno)));
2397                         goto failed;
2398                 }
2399         }
2400
2401
2402         if (killtcp->fde == NULL) {
2403                 killtcp->fde = event_add_fd(ctdb->ev, killtcp, killtcp->capture_fd, 
2404                                             EVENT_FD_READ | EVENT_FD_AUTOCLOSE, 
2405                                             capture_tcp_handler, killtcp);
2406
2407                 /* We also need to set up some events to tickle all these connections
2408                    until they are all reset
2409                 */
2410                 event_add_timed(ctdb->ev, killtcp, timeval_current_ofs(1, 0), 
2411                                 ctdb_tickle_sentenced_connections, killtcp);
2412         }
2413
2414         /* tickle him once now */
2415         ctdb_sys_send_tcp(
2416                 &con->dst_addr,
2417                 &con->src_addr,
2418                 0, 0, 0);
2419
2420         return 0;
2421
2422 failed:
2423         talloc_free(vnn->killtcp);
2424         vnn->killtcp = NULL;
2425         return -1;
2426 }
2427
2428 /*
2429   kill a TCP connection.
2430  */
2431 int32_t ctdb_control_kill_tcp(struct ctdb_context *ctdb, TDB_DATA indata)
2432 {
2433         struct ctdb_control_killtcp *killtcp = (struct ctdb_control_killtcp *)indata.dptr;
2434
2435         return ctdb_killtcp_add_connection(ctdb, &killtcp->src_addr, &killtcp->dst_addr);
2436 }
2437
2438 /*
2439   called by a daemon to inform us of the entire list of TCP tickles for
2440   a particular public address.
2441   this control should only be sent by the node that is currently serving
2442   that public address.
2443  */
2444 int32_t ctdb_control_set_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata)
2445 {
2446         struct ctdb_control_tcp_tickle_list *list = (struct ctdb_control_tcp_tickle_list *)indata.dptr;
2447         struct ctdb_tcp_array *tcparray;
2448         struct ctdb_vnn *vnn;
2449
2450         /* We must at least have tickles.num or else we cant verify the size
2451            of the received data blob
2452          */
2453         if (indata.dsize < offsetof(struct ctdb_control_tcp_tickle_list, 
2454                                         tickles.connections)) {
2455                 DEBUG(DEBUG_ERR,("Bad indata in ctdb_control_set_tcp_tickle_list. Not enough data for the tickle.num field\n"));
2456                 return -1;
2457         }
2458
2459         /* verify that the size of data matches what we expect */
2460         if (indata.dsize < offsetof(struct ctdb_control_tcp_tickle_list, 
2461                                 tickles.connections)
2462                          + sizeof(struct ctdb_tcp_connection)
2463                                  * list->tickles.num) {
2464                 DEBUG(DEBUG_ERR,("Bad indata in ctdb_control_set_tcp_tickle_list\n"));
2465                 return -1;
2466         }       
2467
2468         vnn = find_public_ip_vnn(ctdb, &list->addr);
2469         if (vnn == NULL) {
2470                 DEBUG(DEBUG_INFO,(__location__ " Could not set tcp tickle list, '%s' is not a public address\n", 
2471                         ctdb_addr_to_str(&list->addr)));
2472
2473                 return 1;
2474         }
2475
2476         /* remove any old ticklelist we might have */
2477         talloc_free(vnn->tcp_array);
2478         vnn->tcp_array = NULL;
2479
2480         tcparray = talloc(ctdb->nodes, struct ctdb_tcp_array);
2481         CTDB_NO_MEMORY(ctdb, tcparray);
2482
2483         tcparray->num = list->tickles.num;
2484
2485         tcparray->connections = talloc_array(tcparray, struct ctdb_tcp_connection, tcparray->num);
2486         CTDB_NO_MEMORY(ctdb, tcparray->connections);
2487
2488         memcpy(tcparray->connections, &list->tickles.connections[0], 
2489                sizeof(struct ctdb_tcp_connection)*tcparray->num);
2490
2491         /* We now have a new fresh tickle list array for this vnn */
2492         vnn->tcp_array = talloc_steal(vnn, tcparray);
2493         
2494         return 0;
2495 }
2496
2497 /*
2498   called to return the full list of tickles for the puclic address associated 
2499   with the provided vnn
2500  */
2501 int32_t ctdb_control_get_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata)
2502 {
2503         ctdb_sock_addr *addr = (ctdb_sock_addr *)indata.dptr;
2504         struct ctdb_control_tcp_tickle_list *list;
2505         struct ctdb_tcp_array *tcparray;
2506         int num;
2507         struct ctdb_vnn *vnn;
2508
2509         vnn = find_public_ip_vnn(ctdb, addr);
2510         if (vnn == NULL) {
2511                 DEBUG(DEBUG_ERR,(__location__ " Could not get tcp tickle list, '%s' is not a public address\n", 
2512                         ctdb_addr_to_str(addr)));
2513
2514                 return 1;
2515         }
2516
2517         tcparray = vnn->tcp_array;
2518         if (tcparray) {
2519                 num = tcparray->num;
2520         } else {
2521                 num = 0;
2522         }
2523
2524         outdata->dsize = offsetof(struct ctdb_control_tcp_tickle_list, 
2525                                 tickles.connections)
2526                         + sizeof(struct ctdb_tcp_connection) * num;
2527
2528         outdata->dptr  = talloc_size(outdata, outdata->dsize);
2529         CTDB_NO_MEMORY(ctdb, outdata->dptr);
2530         list = (struct ctdb_control_tcp_tickle_list *)outdata->dptr;
2531
2532         list->addr = *addr;
2533         list->tickles.num = num;
2534         if (num) {
2535                 memcpy(&list->tickles.connections[0], tcparray->connections, 
2536                         sizeof(struct ctdb_tcp_connection) * num);
2537         }
2538
2539         return 0;
2540 }
2541
2542
2543 /*
2544   set the list of all tcp tickles for a public address
2545  */
2546 static int ctdb_ctrl_set_tcp_tickles(struct ctdb_context *ctdb, 
2547                               struct timeval timeout, uint32_t destnode, 
2548                               ctdb_sock_addr *addr,
2549                               struct ctdb_tcp_array *tcparray)
2550 {
2551         int ret, num;
2552         TDB_DATA data;
2553         struct ctdb_control_tcp_tickle_list *list;
2554
2555         if (tcparray) {
2556                 num = tcparray->num;
2557         } else {
2558                 num = 0;
2559         }
2560
2561         data.dsize = offsetof(struct ctdb_control_tcp_tickle_list, 
2562                                 tickles.connections) +
2563                         sizeof(struct ctdb_tcp_connection) * num;
2564         data.dptr = talloc_size(ctdb, data.dsize);
2565         CTDB_NO_MEMORY(ctdb, data.dptr);
2566
2567         list = (struct ctdb_control_tcp_tickle_list *)data.dptr;
2568         list->addr = *addr;
2569         list->tickles.num = num;
2570         if (tcparray) {
2571                 memcpy(&list->tickles.connections[0], tcparray->connections, sizeof(struct ctdb_tcp_connection) * num);
2572         }
2573
2574         ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0, 
2575                                        CTDB_CONTROL_SET_TCP_TICKLE_LIST,
2576                                        0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
2577         if (ret != 0) {
2578                 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set tcp tickles failed\n"));
2579                 return -1;
2580         }
2581
2582         talloc_free(data.dptr);
2583
2584         return ret;
2585 }
2586
2587
2588 /*
2589   perform tickle updates if required
2590  */
2591 static void ctdb_update_tcp_tickles(struct event_context *ev, 
2592                                 struct timed_event *te, 
2593                                 struct timeval t, void *private_data)
2594 {
2595         struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
2596         int ret;
2597         struct ctdb_vnn *vnn;
2598
2599         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2600                 /* we only send out updates for public addresses that 
2601                    we have taken over
2602                  */
2603                 if (ctdb->pnn != vnn->pnn) {
2604                         continue;
2605                 }
2606                 /* We only send out the updates if we need to */
2607                 if (!vnn->tcp_update_needed) {
2608                         continue;
2609                 }
2610                 ret = ctdb_ctrl_set_tcp_tickles(ctdb, 
2611                                 TAKEOVER_TIMEOUT(),
2612                                 CTDB_BROADCAST_CONNECTED,
2613                                 &vnn->public_address,
2614                                 vnn->tcp_array);
2615                 if (ret != 0) {
2616                         DEBUG(DEBUG_ERR,("Failed to send the tickle update for public address %s\n",
2617                                 ctdb_addr_to_str(&vnn->public_address)));
2618                 }
2619         }
2620
2621         event_add_timed(ctdb->ev, ctdb->tickle_update_context,
2622                              timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0), 
2623                              ctdb_update_tcp_tickles, ctdb);
2624 }               
2625         
2626
2627 /*
2628   start periodic update of tcp tickles
2629  */
2630 void ctdb_start_tcp_tickle_update(struct ctdb_context *ctdb)
2631 {
2632         ctdb->tickle_update_context = talloc_new(ctdb);
2633
2634         event_add_timed(ctdb->ev, ctdb->tickle_update_context,
2635                              timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0), 
2636                              ctdb_update_tcp_tickles, ctdb);
2637 }
2638
2639
2640
2641
2642 struct control_gratious_arp {
2643         struct ctdb_context *ctdb;
2644         ctdb_sock_addr addr;
2645         const char *iface;
2646         int count;
2647 };
2648
2649 /*
2650   send a control_gratuitous arp
2651  */
2652 static void send_gratious_arp(struct event_context *ev, struct timed_event *te, 
2653                                   struct timeval t, void *private_data)
2654 {
2655         int ret;
2656         struct control_gratious_arp *arp = talloc_get_type(private_data, 
2657                                                         struct control_gratious_arp);
2658
2659         ret = ctdb_sys_send_arp(&arp->addr, arp->iface);
2660         if (ret != 0) {
2661                 DEBUG(DEBUG_ERR,(__location__ " sending of gratious arp on iface '%s' failed (%s)\n",
2662                                  arp->iface, strerror(errno)));
2663         }
2664
2665
2666         arp->count++;
2667         if (arp->count == CTDB_ARP_REPEAT) {
2668                 talloc_free(arp);
2669                 return;
2670         }
2671
2672         event_add_timed(arp->ctdb->ev, arp, 
2673                         timeval_current_ofs(CTDB_ARP_INTERVAL, 0), 
2674                         send_gratious_arp, arp);
2675 }
2676
2677
2678 /*
2679   send a gratious arp 
2680  */
2681 int32_t ctdb_control_send_gratious_arp(struct ctdb_context *ctdb, TDB_DATA indata)
2682 {
2683         struct ctdb_control_gratious_arp *gratious_arp = (struct ctdb_control_gratious_arp *)indata.dptr;
2684         struct control_gratious_arp *arp;
2685
2686         /* verify the size of indata */
2687         if (indata.dsize < offsetof(struct ctdb_control_gratious_arp, iface)) {
2688                 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_gratious_arp structure. Got %u require %u bytes\n", 
2689                                  (unsigned)indata.dsize, 
2690                                  (unsigned)offsetof(struct ctdb_control_gratious_arp, iface)));
2691                 return -1;
2692         }
2693         if (indata.dsize != 
2694                 ( offsetof(struct ctdb_control_gratious_arp, iface)
2695                 + gratious_arp->len ) ){
2696
2697                 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2698                         "but should be %u bytes\n", 
2699                          (unsigned)indata.dsize, 
2700                          (unsigned)(offsetof(struct ctdb_control_gratious_arp, iface)+gratious_arp->len)));
2701                 return -1;
2702         }
2703
2704
2705         arp = talloc(ctdb, struct control_gratious_arp);
2706         CTDB_NO_MEMORY(ctdb, arp);
2707
2708         arp->ctdb  = ctdb;
2709         arp->addr   = gratious_arp->addr;
2710         arp->iface = talloc_strdup(arp, gratious_arp->iface);
2711         CTDB_NO_MEMORY(ctdb, arp->iface);
2712         arp->count = 0;
2713         
2714         event_add_timed(arp->ctdb->ev, arp, 
2715                         timeval_zero(), send_gratious_arp, arp);
2716
2717         return 0;
2718 }
2719
2720 int32_t ctdb_control_add_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2721 {
2722         struct ctdb_control_ip_iface *pub = (struct ctdb_control_ip_iface *)indata.dptr;
2723         int ret;
2724
2725         /* verify the size of indata */
2726         if (indata.dsize < offsetof(struct ctdb_control_ip_iface, iface)) {
2727                 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_ip_iface structure\n"));
2728                 return -1;
2729         }
2730         if (indata.dsize != 
2731                 ( offsetof(struct ctdb_control_ip_iface, iface)
2732                 + pub->len ) ){
2733
2734                 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2735                         "but should be %u bytes\n", 
2736                          (unsigned)indata.dsize, 
2737                          (unsigned)(offsetof(struct ctdb_control_ip_iface, iface)+pub->len)));
2738                 return -1;
2739         }
2740
2741         ret = ctdb_add_public_address(ctdb, &pub->addr, pub->mask, &pub->iface[0]);
2742
2743         if (ret != 0) {
2744                 DEBUG(DEBUG_ERR,(__location__ " Failed to add public address\n"));
2745                 return -1;
2746         }
2747
2748         return 0;
2749 }
2750
2751 /*
2752   called when releaseip event finishes for del_public_address
2753  */
2754 static void delete_ip_callback(struct ctdb_context *ctdb, int status, 
2755                                 void *private_data)
2756 {
2757         talloc_free(private_data);
2758 }
2759
2760 int32_t ctdb_control_del_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2761 {
2762         struct ctdb_control_ip_iface *pub = (struct ctdb_control_ip_iface *)indata.dptr;
2763         struct ctdb_vnn *vnn;
2764         int ret;
2765
2766         /* verify the size of indata */
2767         if (indata.dsize < offsetof(struct ctdb_control_ip_iface, iface)) {
2768                 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_ip_iface structure\n"));
2769                 return -1;
2770         }
2771         if (indata.dsize != 
2772                 ( offsetof(struct ctdb_control_ip_iface, iface)
2773                 + pub->len ) ){
2774
2775                 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2776                         "but should be %u bytes\n", 
2777                          (unsigned)indata.dsize, 
2778                          (unsigned)(offsetof(struct ctdb_control_ip_iface, iface)+pub->len)));
2779                 return -1;
2780         }
2781
2782         /* walk over all public addresses until we find a match */
2783         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2784                 if (ctdb_same_ip(&vnn->public_address, &pub->addr)) {
2785                         TALLOC_CTX *mem_ctx;
2786
2787                         DLIST_REMOVE(ctdb->vnn, vnn);
2788                         if (vnn->iface == NULL) {
2789                                 talloc_free(vnn);
2790                                 return 0;
2791                         }
2792
2793                         mem_ctx = talloc_new(ctdb);
2794                         ret = ctdb_event_script_callback(ctdb, 
2795                                          mem_ctx, delete_ip_callback, mem_ctx,
2796                                          false,
2797                                          CTDB_EVENT_RELEASE_IP,
2798                                          "%s %s %u",
2799                                          ctdb_vnn_iface_string(vnn),
2800                                          ctdb_addr_to_str(&vnn->public_address),
2801                                          vnn->public_netmask_bits);
2802                         ctdb_vnn_unassign_iface(ctdb, vnn);
2803                         talloc_free(vnn);
2804                         if (ret != 0) {
2805                                 return -1;
2806                         }
2807                         return 0;
2808                 }
2809         }
2810
2811         return -1;
2812 }
2813
2814 /* This function is called from the recovery daemon to verify that a remote
2815    node has the expected ip allocation.
2816    This is verified against ctdb->ip_tree
2817 */
2818 int verify_remote_ip_allocation(struct ctdb_context *ctdb, struct ctdb_all_public_ips *ips)
2819 {
2820         struct ctdb_public_ip_list *tmp_ip; 
2821         int i;
2822
2823         if (ctdb->ip_tree == NULL) {
2824                 /* dont know the expected allocation yet, assume remote node
2825                    is correct. */
2826                 return 0;
2827         }
2828
2829         if (ips == NULL) {
2830                 return 0;
2831         }
2832
2833         for (i=0; i<ips->num; i++) {
2834                 tmp_ip = trbt_lookuparray32(ctdb->ip_tree, IP_KEYLEN, ip_key(&ips->ips[i].addr));
2835                 if (tmp_ip == NULL) {
2836                         DEBUG(DEBUG_ERR,(__location__ " Could not find host for address %s, reassign ips\n", ctdb_addr_to_str(&ips->ips[i].addr)));
2837                         return -1;
2838                 }
2839
2840                 if (tmp_ip->pnn == -1 || ips->ips[i].pnn == -1) {
2841                         continue;
2842                 }
2843
2844                 if (tmp_ip->pnn != ips->ips[i].pnn) {
2845                         DEBUG(DEBUG_ERR,("Inconsistent ip allocation. Trigger reallocation. Thinks %s is held by node %u while it is held by node %u\n", ctdb_addr_to_str(&ips->ips[i].addr), ips->ips[i].pnn, tmp_ip->pnn));
2846                         return -1;
2847                 }
2848         }
2849
2850         return 0;
2851 }
2852
2853 int update_ip_assignment_tree(struct ctdb_context *ctdb, struct ctdb_public_ip *ip)
2854 {
2855         struct ctdb_public_ip_list *tmp_ip; 
2856
2857         if (ctdb->ip_tree == NULL) {
2858                 DEBUG(DEBUG_ERR,("No ctdb->ip_tree yet. Failed to update ip assignment\n"));
2859                 return -1;
2860         }
2861
2862         tmp_ip = trbt_lookuparray32(ctdb->ip_tree, IP_KEYLEN, ip_key(&ip->addr));
2863         if (tmp_ip == NULL) {
2864                 DEBUG(DEBUG_ERR,(__location__ " Could not find record for address %s, update ip\n", ctdb_addr_to_str(&ip->addr)));
2865                 return -1;
2866         }
2867
2868         DEBUG(DEBUG_NOTICE,("Updated ip assignment tree for ip : %s from node %u to node %u\n", ctdb_addr_to_str(&ip->addr), tmp_ip->pnn, ip->pnn));
2869         tmp_ip->pnn = ip->pnn;
2870
2871         return 0;
2872 }