0d1088023a5b3c2349627c97a4e7be8c349bc898
[tridge/ctdb.git] / server / ctdb_takeover.c
1 /* 
2    ctdb ip takeover code
3
4    Copyright (C) Ronnie Sahlberg  2007
5    Copyright (C) Andrew Tridgell  2007
6
7    This program is free software; you can redistribute it and/or modify
8    it under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 3 of the License, or
10    (at your option) any later version.
11    
12    This program is distributed in the hope that it will be useful,
13    but WITHOUT ANY WARRANTY; without even the implied warranty of
14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15    GNU General Public License for more details.
16    
17    You should have received a copy of the GNU General Public License
18    along with this program; if not, see <http://www.gnu.org/licenses/>.
19 */
20 #include "includes.h"
21 #include "lib/events/events.h"
22 #include "lib/tdb/include/tdb.h"
23 #include "lib/util/dlinklist.h"
24 #include "system/network.h"
25 #include "system/filesys.h"
26 #include "system/wait.h"
27 #include "../include/ctdb_private.h"
28 #include "../common/rb_tree.h"
29
30
31 #define TAKEOVER_TIMEOUT() timeval_current_ofs(ctdb->tunable.takeover_timeout,0)
32
33 #define CTDB_ARP_INTERVAL 1
34 #define CTDB_ARP_REPEAT   3
35
36 struct ctdb_iface {
37         struct ctdb_iface *prev, *next;
38         const char *name;
39         bool link_up;
40         uint32_t references;
41 };
42
43 static const char *ctdb_vnn_iface_string(const struct ctdb_vnn *vnn)
44 {
45         if (vnn->iface) {
46                 return vnn->iface->name;
47         }
48
49         return "__none__";
50 }
51
52 static int ctdb_add_local_iface(struct ctdb_context *ctdb, const char *iface)
53 {
54         struct ctdb_iface *i;
55
56         /* Verify that we dont have an entry for this ip yet */
57         for (i=ctdb->ifaces;i;i=i->next) {
58                 if (strcmp(i->name, iface) == 0) {
59                         return 0;
60                 }
61         }
62
63         /* create a new structure for this interface */
64         i = talloc_zero(ctdb, struct ctdb_iface);
65         CTDB_NO_MEMORY_FATAL(ctdb, i);
66         i->name = talloc_strdup(i, iface);
67         CTDB_NO_MEMORY(ctdb, i->name);
68         i->link_up = false;
69
70         DLIST_ADD(ctdb->ifaces, i);
71
72         return 0;
73 }
74
75 static struct ctdb_iface *ctdb_find_iface(struct ctdb_context *ctdb,
76                                           const char *iface)
77 {
78         struct ctdb_iface *i;
79
80         /* Verify that we dont have an entry for this ip yet */
81         for (i=ctdb->ifaces;i;i=i->next) {
82                 if (strcmp(i->name, iface) == 0) {
83                         return i;
84                 }
85         }
86
87         return NULL;
88 }
89
90 static struct ctdb_iface *ctdb_vnn_best_iface(struct ctdb_context *ctdb,
91                                               struct ctdb_vnn *vnn)
92 {
93         int i;
94         struct ctdb_iface *cur = NULL;
95         struct ctdb_iface *best = NULL;
96
97         for (i=0; vnn->ifaces[i]; i++) {
98
99                 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
100                 if (cur == NULL) {
101                         continue;
102                 }
103
104                 if (!cur->link_up) {
105                         continue;
106                 }
107
108                 if (best == NULL) {
109                         best = cur;
110                         continue;
111                 }
112
113                 if (cur->references < best->references) {
114                         best = cur;
115                         continue;
116                 }
117         }
118
119         return best;
120 }
121
122 static int32_t ctdb_vnn_assign_iface(struct ctdb_context *ctdb,
123                                      struct ctdb_vnn *vnn)
124 {
125         struct ctdb_iface *best = NULL;
126
127         if (vnn->iface) {
128                 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
129                                    "still assigned to iface '%s'\n",
130                                    ctdb_addr_to_str(&vnn->public_address),
131                                    ctdb_vnn_iface_string(vnn)));
132                 return 0;
133         }
134
135         best = ctdb_vnn_best_iface(ctdb, vnn);
136         if (best == NULL) {
137                 DEBUG(DEBUG_ERR, (__location__ " public address '%s' "
138                                   "cannot assign to iface any iface\n",
139                                   ctdb_addr_to_str(&vnn->public_address)));
140                 return -1;
141         }
142
143         vnn->iface = best;
144         best->references++;
145         vnn->pnn = ctdb->pnn;
146
147         DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
148                            "now assigned to iface '%s' refs[%d]\n",
149                            ctdb_addr_to_str(&vnn->public_address),
150                            ctdb_vnn_iface_string(vnn),
151                            best->references));
152         return 0;
153 }
154
155 static void ctdb_vnn_unassign_iface(struct ctdb_context *ctdb,
156                                     struct ctdb_vnn *vnn)
157 {
158         DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
159                            "now unassigned (old iface '%s' refs[%d])\n",
160                            ctdb_addr_to_str(&vnn->public_address),
161                            ctdb_vnn_iface_string(vnn),
162                            vnn->iface?vnn->iface->references:0));
163         if (vnn->iface) {
164                 vnn->iface->references--;
165         }
166         vnn->iface = NULL;
167         if (vnn->pnn == ctdb->pnn) {
168                 vnn->pnn = -1;
169         }
170 }
171
172 static bool ctdb_vnn_available(struct ctdb_context *ctdb,
173                                struct ctdb_vnn *vnn)
174 {
175         int i;
176
177         if (vnn->iface && vnn->iface->link_up) {
178                 return true;
179         }
180
181         for (i=0; vnn->ifaces[i]; i++) {
182                 struct ctdb_iface *cur;
183
184                 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
185                 if (cur == NULL) {
186                         continue;
187                 }
188
189                 if (cur->link_up) {
190                         return true;
191                 }
192         }
193
194         return false;
195 }
196
197 struct ctdb_takeover_arp {
198         struct ctdb_context *ctdb;
199         uint32_t count;
200         ctdb_sock_addr addr;
201         struct ctdb_tcp_array *tcparray;
202         struct ctdb_vnn *vnn;
203 };
204
205
206 /*
207   lists of tcp endpoints
208  */
209 struct ctdb_tcp_list {
210         struct ctdb_tcp_list *prev, *next;
211         struct ctdb_tcp_connection connection;
212 };
213
214 /*
215   list of clients to kill on IP release
216  */
217 struct ctdb_client_ip {
218         struct ctdb_client_ip *prev, *next;
219         struct ctdb_context *ctdb;
220         ctdb_sock_addr addr;
221         uint32_t client_id;
222 };
223
224
225 /*
226   send a gratuitous arp
227  */
228 static void ctdb_control_send_arp(struct event_context *ev, struct timed_event *te, 
229                                   struct timeval t, void *private_data)
230 {
231         struct ctdb_takeover_arp *arp = talloc_get_type(private_data, 
232                                                         struct ctdb_takeover_arp);
233         int i, ret;
234         struct ctdb_tcp_array *tcparray;
235         const char *iface = ctdb_vnn_iface_string(arp->vnn);
236
237         ret = ctdb_sys_send_arp(&arp->addr, iface);
238         if (ret != 0) {
239                 DEBUG(DEBUG_CRIT,(__location__ " sending of arp failed on iface '%s' (%s)\n",
240                                   iface, strerror(errno)));
241         }
242
243         tcparray = arp->tcparray;
244         if (tcparray) {
245                 for (i=0;i<tcparray->num;i++) {
246                         struct ctdb_tcp_connection *tcon;
247
248                         tcon = &tcparray->connections[i];
249                         DEBUG(DEBUG_INFO,("sending tcp tickle ack for %u->%s:%u\n",
250                                 (unsigned)ntohs(tcon->dst_addr.ip.sin_port), 
251                                 ctdb_addr_to_str(&tcon->src_addr),
252                                 (unsigned)ntohs(tcon->src_addr.ip.sin_port)));
253                         ret = ctdb_sys_send_tcp(
254                                 &tcon->src_addr, 
255                                 &tcon->dst_addr,
256                                 0, 0, 0);
257                         if (ret != 0) {
258                                 DEBUG(DEBUG_CRIT,(__location__ " Failed to send tcp tickle ack for %s\n",
259                                         ctdb_addr_to_str(&tcon->src_addr)));
260                         }
261                 }
262         }
263
264         arp->count++;
265
266         if (arp->count == CTDB_ARP_REPEAT) {
267                 talloc_free(arp);
268                 return;
269         }
270
271         event_add_timed(arp->ctdb->ev, arp->vnn->takeover_ctx, 
272                         timeval_current_ofs(CTDB_ARP_INTERVAL, 100000), 
273                         ctdb_control_send_arp, arp);
274 }
275
276 static int32_t ctdb_announce_vnn_iface(struct ctdb_context *ctdb,
277                                        struct ctdb_vnn *vnn)
278 {
279         struct ctdb_takeover_arp *arp;
280         struct ctdb_tcp_array *tcparray;
281
282         if (!vnn->takeover_ctx) {
283                 vnn->takeover_ctx = talloc_new(vnn);
284                 if (!vnn->takeover_ctx) {
285                         return -1;
286                 }
287         }
288
289         arp = talloc_zero(vnn->takeover_ctx, struct ctdb_takeover_arp);
290         if (!arp) {
291                 return -1;
292         }
293
294         arp->ctdb = ctdb;
295         arp->addr = vnn->public_address;
296         arp->vnn  = vnn;
297
298         tcparray = vnn->tcp_array;
299         if (tcparray) {
300                 /* add all of the known tcp connections for this IP to the
301                    list of tcp connections to send tickle acks for */
302                 arp->tcparray = talloc_steal(arp, tcparray);
303
304                 vnn->tcp_array = NULL;
305                 vnn->tcp_update_needed = true;
306         }
307
308         event_add_timed(arp->ctdb->ev, vnn->takeover_ctx,
309                         timeval_zero(), ctdb_control_send_arp, arp);
310
311         return 0;
312 }
313
314 struct takeover_callback_state {
315         struct ctdb_req_control *c;
316         ctdb_sock_addr *addr;
317         struct ctdb_vnn *vnn;
318 };
319
320 struct ctdb_do_takeip_state {
321         struct ctdb_req_control *c;
322         struct ctdb_vnn *vnn;
323 };
324
325 /*
326   called when takeip event finishes
327  */
328 static void ctdb_do_takeip_callback(struct ctdb_context *ctdb, int status,
329                                     void *private_data)
330 {
331         struct ctdb_do_takeip_state *state =
332                 talloc_get_type(private_data, struct ctdb_do_takeip_state);
333         int32_t ret;
334
335         if (status != 0) {
336                 if (status == -ETIME) {
337                         ctdb_ban_self(ctdb);
338                 }
339                 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
340                                  ctdb_addr_to_str(&state->vnn->public_address),
341                                  ctdb_vnn_iface_string(state->vnn)));
342                 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
343                 talloc_free(state);
344                 return;
345         }
346
347         ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
348         if (ret != 0) {
349                 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
350                 talloc_free(state);
351                 return;
352         }
353
354         /* the control succeeded */
355         ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
356         talloc_free(state);
357         return;
358 }
359
360 /*
361   take over an ip address
362  */
363 static int32_t ctdb_do_takeip(struct ctdb_context *ctdb,
364                               struct ctdb_req_control *c,
365                               struct ctdb_vnn *vnn)
366 {
367         int ret;
368         struct ctdb_do_takeip_state *state;
369
370         ret = ctdb_vnn_assign_iface(ctdb, vnn);
371         if (ret != 0) {
372                 DEBUG(DEBUG_ERR,("Takeover of IP %s/%u failed to "
373                                  "assin a usable interface\n",
374                                  ctdb_addr_to_str(&vnn->public_address),
375                                  vnn->public_netmask_bits));
376                 return -1;
377         }
378
379         state = talloc(vnn, struct ctdb_do_takeip_state);
380         CTDB_NO_MEMORY(ctdb, state);
381
382         state->c = talloc_steal(ctdb, c);
383         state->vnn   = vnn;
384
385         DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u on interface %s\n",
386                             ctdb_addr_to_str(&vnn->public_address),
387                             vnn->public_netmask_bits,
388                             ctdb_vnn_iface_string(vnn)));
389
390         ret = ctdb_event_script_callback(ctdb,
391                                          state,
392                                          ctdb_do_takeip_callback,
393                                          state,
394                                          false,
395                                          CTDB_EVENT_TAKE_IP,
396                                          "%s %s %u",
397                                          ctdb_vnn_iface_string(vnn),
398                                          ctdb_addr_to_str(&vnn->public_address),
399                                          vnn->public_netmask_bits);
400
401         if (ret != 0) {
402                 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
403                         ctdb_addr_to_str(&vnn->public_address),
404                         ctdb_vnn_iface_string(vnn)));
405                 talloc_free(state);
406                 return -1;
407         }
408
409         return 0;
410 }
411
412 struct ctdb_do_updateip_state {
413         struct ctdb_req_control *c;
414         struct ctdb_iface *old;
415         struct ctdb_vnn *vnn;
416 };
417
418 /*
419   called when updateip event finishes
420  */
421 static void ctdb_do_updateip_callback(struct ctdb_context *ctdb, int status,
422                                       void *private_data)
423 {
424         struct ctdb_do_updateip_state *state =
425                 talloc_get_type(private_data, struct ctdb_do_updateip_state);
426         int32_t ret;
427
428         if (status != 0) {
429                 if (status == -ETIME) {
430                         ctdb_ban_self(ctdb);
431                 }
432                 DEBUG(DEBUG_ERR,(__location__ " Failed to move IP %s from interface %s to %s\n",
433                         ctdb_addr_to_str(&state->vnn->public_address),
434                         state->old->name,
435                         ctdb_vnn_iface_string(state->vnn)));
436
437                 /*
438                  * All we can do is reset the old interface
439                  * and let the next run fix it
440                  */
441                 ctdb_vnn_unassign_iface(ctdb, state->vnn);
442                 state->vnn->iface = state->old;
443                 state->vnn->iface->references++;
444
445                 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
446                 talloc_free(state);
447                 return;
448         }
449
450         ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
451         if (ret != 0) {
452                 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
453                 talloc_free(state);
454                 return;
455         }
456
457         /* the control succeeded */
458         ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
459         talloc_free(state);
460         return;
461 }
462
463 /*
464   update (move) an ip address
465  */
466 static int32_t ctdb_do_updateip(struct ctdb_context *ctdb,
467                                 struct ctdb_req_control *c,
468                                 struct ctdb_vnn *vnn)
469 {
470         int ret;
471         struct ctdb_do_updateip_state *state;
472         struct ctdb_iface *old = vnn->iface;
473
474         ctdb_vnn_unassign_iface(ctdb, vnn);
475         ret = ctdb_vnn_assign_iface(ctdb, vnn);
476         if (ret != 0) {
477                 DEBUG(DEBUG_ERR,("update of IP %s/%u failed to "
478                                  "assin a usable interface (old iface '%s')\n",
479                                  ctdb_addr_to_str(&vnn->public_address),
480                                  vnn->public_netmask_bits,
481                                  old->name));
482                 return -1;
483         }
484
485         if (vnn->iface == old) {
486                 DEBUG(DEBUG_ERR,("update of IP %s/%u trying to "
487                                  "assin a same interface '%s'\n",
488                                  ctdb_addr_to_str(&vnn->public_address),
489                                  vnn->public_netmask_bits,
490                                  old->name));
491                 return -1;
492         }
493
494         state = talloc(vnn, struct ctdb_do_updateip_state);
495         CTDB_NO_MEMORY(ctdb, state);
496
497         state->c = talloc_steal(ctdb, c);
498         state->old = old;
499         state->vnn = vnn;
500
501         DEBUG(DEBUG_NOTICE,("Update of IP %s/%u from "
502                             "interface %s to %s\n",
503                             ctdb_addr_to_str(&vnn->public_address),
504                             vnn->public_netmask_bits,
505                             old->name,
506                             ctdb_vnn_iface_string(vnn)));
507
508         ret = ctdb_event_script_callback(ctdb,
509                                          state,
510                                          ctdb_do_updateip_callback,
511                                          state,
512                                          false,
513                                          CTDB_EVENT_UPDATE_IP,
514                                          "%s %s %s %u",
515                                          state->old->name,
516                                          ctdb_vnn_iface_string(vnn),
517                                          ctdb_addr_to_str(&vnn->public_address),
518                                          vnn->public_netmask_bits);
519         if (ret != 0) {
520                 DEBUG(DEBUG_ERR,(__location__ " Failed update IP %s from interface %s to %s\n",
521                                  ctdb_addr_to_str(&vnn->public_address),
522                                  old->name, ctdb_vnn_iface_string(vnn)));
523                 talloc_free(state);
524                 return -1;
525         }
526
527         return 0;
528 }
529
530 /*
531   Find the vnn of the node that has a public ip address
532   returns -1 if the address is not known as a public address
533  */
534 static struct ctdb_vnn *find_public_ip_vnn(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
535 {
536         struct ctdb_vnn *vnn;
537
538         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
539                 if (ctdb_same_ip(&vnn->public_address, addr)) {
540                         return vnn;
541                 }
542         }
543
544         return NULL;
545 }
546
547 /*
548   take over an ip address
549  */
550 int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb,
551                                  struct ctdb_req_control *c,
552                                  TDB_DATA indata,
553                                  bool *async_reply)
554 {
555         int ret;
556         struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
557         struct ctdb_vnn *vnn;
558         bool have_ip = false;
559         bool do_updateip = false;
560         bool do_takeip = false;
561
562         /* update out vnn list */
563         vnn = find_public_ip_vnn(ctdb, &pip->addr);
564         if (vnn == NULL) {
565                 DEBUG(DEBUG_INFO,("takeoverip called for an ip '%s' that is not a public address\n",
566                         ctdb_addr_to_str(&pip->addr)));
567                 return 0;
568         }
569         vnn->pnn = pip->pnn;
570
571         have_ip = ctdb_sys_have_ip(&pip->addr);
572
573         if (vnn->iface) {
574                 if (vnn->iface->link_up) {
575                         struct ctdb_iface *best;
576                         best = ctdb_vnn_best_iface(ctdb, vnn);
577                         /* only move when the rebalance gains something */
578                         if (best && vnn->iface->references > (best->references + 1)) {
579                                 do_updateip = true;
580                         }
581                 } else if (vnn->iface != best_iface) {
582                         do_updateip = true;
583                 }
584         }
585
586         if (!have_ip) {
587                 if (do_updateip) {
588                         ctdb_vnn_unassign_iface(ctdb, vnn);
589                         do_updateip = false;
590                 }
591                 do_takeip = true;
592         }
593
594         if (do_takeip) {
595                 ret = ctdb_do_takeip(ctdb, c, vnn);
596                 if (ret != 0) {
597                         return -1;
598                 }
599         } else if (do_updateip) {
600                 ret = ctdb_do_updateip(ctdb, c, vnn);
601                 if (ret != 0) {
602                         return -1;
603                 }
604         } else {
605                 /*
606                  * The interface is up and the kernel known the ip
607                  * => do nothing
608                  */
609                 return 0;
610         }
611
612         /* tell ctdb_control.c that we will be replying asynchronously */
613         *async_reply = true;
614
615         return 0;
616 }
617
618 /*
619   takeover an ip address old v4 style
620  */
621 int32_t ctdb_control_takeover_ipv4(struct ctdb_context *ctdb, 
622                                 struct ctdb_req_control *c,
623                                 TDB_DATA indata, 
624                                 bool *async_reply)
625 {
626         TDB_DATA data;
627         
628         data.dsize = sizeof(struct ctdb_public_ip);
629         data.dptr  = (uint8_t *)talloc_zero(c, struct ctdb_public_ip);
630         CTDB_NO_MEMORY(ctdb, data.dptr);
631         
632         memcpy(data.dptr, indata.dptr, indata.dsize);
633         return ctdb_control_takeover_ip(ctdb, c, data, async_reply);
634 }
635
636 /*
637   kill any clients that are registered with a IP that is being released
638  */
639 static void release_kill_clients(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
640 {
641         struct ctdb_client_ip *ip;
642
643         DEBUG(DEBUG_INFO,("release_kill_clients for ip %s\n",
644                 ctdb_addr_to_str(addr)));
645
646         for (ip=ctdb->client_ip_list; ip; ip=ip->next) {
647                 ctdb_sock_addr tmp_addr;
648
649                 tmp_addr = ip->addr;
650                 DEBUG(DEBUG_INFO,("checking for client %u with IP %s\n", 
651                         ip->client_id,
652                         ctdb_addr_to_str(&ip->addr)));
653
654                 if (ctdb_same_ip(&tmp_addr, addr)) {
655                         struct ctdb_client *client = ctdb_reqid_find(ctdb, 
656                                                                      ip->client_id, 
657                                                                      struct ctdb_client);
658                         DEBUG(DEBUG_INFO,("matched client %u with IP %s and pid %u\n", 
659                                 ip->client_id,
660                                 ctdb_addr_to_str(&ip->addr),
661                                 client->pid));
662
663                         if (client->pid != 0) {
664                                 DEBUG(DEBUG_INFO,(__location__ " Killing client pid %u for IP %s on client_id %u\n",
665                                         (unsigned)client->pid,
666                                         ctdb_addr_to_str(addr),
667                                         ip->client_id));
668                                 kill(client->pid, SIGKILL);
669                         }
670                 }
671         }
672 }
673
674 /*
675   called when releaseip event finishes
676  */
677 static void release_ip_callback(struct ctdb_context *ctdb, int status, 
678                                 void *private_data)
679 {
680         struct takeover_callback_state *state = 
681                 talloc_get_type(private_data, struct takeover_callback_state);
682         TDB_DATA data;
683
684         if (status == -ETIME) {
685                 ctdb_ban_self(ctdb);
686         }
687
688         /* send a message to all clients of this node telling them
689            that the cluster has been reconfigured and they should
690            release any sockets on this IP */
691         data.dptr = (uint8_t *)talloc_strdup(state, ctdb_addr_to_str(state->addr));
692         CTDB_NO_MEMORY_VOID(ctdb, data.dptr);
693         data.dsize = strlen((char *)data.dptr)+1;
694
695         DEBUG(DEBUG_INFO,(__location__ " sending RELEASE_IP for '%s'\n", data.dptr));
696
697         ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_RELEASE_IP, data);
698
699         /* kill clients that have registered with this IP */
700         release_kill_clients(ctdb, state->addr);
701
702         ctdb_vnn_unassign_iface(ctdb, state->vnn);
703
704         /* the control succeeded */
705         ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
706         talloc_free(state);
707 }
708
709 /*
710   release an ip address
711  */
712 int32_t ctdb_control_release_ip(struct ctdb_context *ctdb, 
713                                 struct ctdb_req_control *c,
714                                 TDB_DATA indata, 
715                                 bool *async_reply)
716 {
717         int ret;
718         struct takeover_callback_state *state;
719         struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
720         struct ctdb_vnn *vnn;
721
722         /* update our vnn list */
723         vnn = find_public_ip_vnn(ctdb, &pip->addr);
724         if (vnn == NULL) {
725                 DEBUG(DEBUG_INFO,("releaseip called for an ip '%s' that is not a public address\n",
726                         ctdb_addr_to_str(&pip->addr)));
727                 return 0;
728         }
729         vnn->pnn = pip->pnn;
730
731         /* stop any previous arps */
732         talloc_free(vnn->takeover_ctx);
733         vnn->takeover_ctx = NULL;
734
735         if (!ctdb_sys_have_ip(&pip->addr)) {
736                 DEBUG(DEBUG_NOTICE,("Redundant release of IP %s/%u on interface %s (ip not held)\n", 
737                         ctdb_addr_to_str(&pip->addr),
738                         vnn->public_netmask_bits, 
739                         ctdb_vnn_iface_string(vnn)));
740                 ctdb_vnn_unassign_iface(ctdb, vnn);
741                 return 0;
742         }
743
744         DEBUG(DEBUG_NOTICE,("Release of IP %s/%u on interface %s  node:%u\n", 
745                 ctdb_addr_to_str(&pip->addr),
746                 vnn->public_netmask_bits, 
747                 ctdb_vnn_iface_string(vnn),
748                 pip->pnn));
749
750         state = talloc(ctdb, struct takeover_callback_state);
751         CTDB_NO_MEMORY(ctdb, state);
752
753         state->c = talloc_steal(state, c);
754         state->addr = talloc(state, ctdb_sock_addr);       
755         CTDB_NO_MEMORY(ctdb, state->addr);
756         *state->addr = pip->addr;
757         state->vnn   = vnn;
758
759         ret = ctdb_event_script_callback(ctdb, 
760                                          state, release_ip_callback, state,
761                                          false,
762                                          CTDB_EVENT_RELEASE_IP,
763                                          "%s %s %u",
764                                          ctdb_vnn_iface_string(vnn),
765                                          ctdb_addr_to_str(&pip->addr),
766                                          vnn->public_netmask_bits);
767         if (ret != 0) {
768                 DEBUG(DEBUG_ERR,(__location__ " Failed to release IP %s on interface %s\n",
769                         ctdb_addr_to_str(&pip->addr),
770                         ctdb_vnn_iface_string(vnn)));
771                 talloc_free(state);
772                 return -1;
773         }
774
775         /* tell the control that we will be reply asynchronously */
776         *async_reply = true;
777         return 0;
778 }
779
780 /*
781   release an ip address old v4 style
782  */
783 int32_t ctdb_control_release_ipv4(struct ctdb_context *ctdb, 
784                                 struct ctdb_req_control *c,
785                                 TDB_DATA indata, 
786                                 bool *async_reply)
787 {
788         TDB_DATA data;
789         
790         data.dsize = sizeof(struct ctdb_public_ip);
791         data.dptr  = (uint8_t *)talloc_zero(c, struct ctdb_public_ip);
792         CTDB_NO_MEMORY(ctdb, data.dptr);
793         
794         memcpy(data.dptr, indata.dptr, indata.dsize);
795         return ctdb_control_release_ip(ctdb, c, data, async_reply);
796 }
797
798
799 static int ctdb_add_public_address(struct ctdb_context *ctdb,
800                                    ctdb_sock_addr *addr,
801                                    unsigned mask, const char *ifaces)
802 {
803         struct ctdb_vnn      *vnn;
804         uint32_t num = 0;
805         char *tmp;
806         const char *iface;
807         int i;
808         int ret;
809
810         /* Verify that we dont have an entry for this ip yet */
811         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
812                 if (ctdb_same_sockaddr(addr, &vnn->public_address)) {
813                         DEBUG(DEBUG_CRIT,("Same ip '%s' specified multiple times in the public address list \n", 
814                                 ctdb_addr_to_str(addr)));
815                         return -1;
816                 }               
817         }
818
819         /* create a new vnn structure for this ip address */
820         vnn = talloc_zero(ctdb, struct ctdb_vnn);
821         CTDB_NO_MEMORY_FATAL(ctdb, vnn);
822         vnn->ifaces = talloc_array(vnn, const char *, num + 2);
823         tmp = talloc_strdup(vnn, ifaces);
824         CTDB_NO_MEMORY_FATAL(ctdb, tmp);
825         for (iface = strtok(tmp, ","); iface; iface = strtok(NULL, ",")) {
826                 vnn->ifaces = talloc_realloc(vnn, vnn->ifaces, const char *, num + 2);
827                 CTDB_NO_MEMORY_FATAL(ctdb, vnn->ifaces);
828                 vnn->ifaces[num] = talloc_strdup(vnn, iface);
829                 CTDB_NO_MEMORY_FATAL(ctdb, vnn->ifaces[num]);
830                 num++;
831         }
832         talloc_free(tmp);
833         vnn->ifaces[num] = NULL;
834         vnn->public_address      = *addr;
835         vnn->public_netmask_bits = mask;
836         vnn->pnn                 = -1;
837
838         for (i=0; vnn->ifaces[i]; i++) {
839                 ret = ctdb_add_local_iface(ctdb, vnn->ifaces[i]);
840                 if (ret != 0) {
841                         DEBUG(DEBUG_CRIT, (__location__ " failed to add iface[%s] "
842                                            "for public_address[%s]\n",
843                                            vnn->ifaces[i], ctdb_addr_to_str(addr)));
844                         talloc_free(vnn);
845                         return -1;
846                 }
847         }
848
849         DLIST_ADD(ctdb->vnn, vnn);
850
851         return 0;
852 }
853
854 /*
855   setup the event script directory
856 */
857 int ctdb_set_event_script_dir(struct ctdb_context *ctdb, const char *script_dir)
858 {
859         ctdb->event_script_dir = talloc_strdup(ctdb, script_dir);
860         CTDB_NO_MEMORY(ctdb, ctdb->event_script_dir);
861         return 0;
862 }
863
864 /*
865   setup the public address lists from a file
866 */
867 int ctdb_set_public_addresses(struct ctdb_context *ctdb, const char *alist)
868 {
869         char **lines;
870         int nlines;
871         int i;
872
873         lines = file_lines_load(alist, &nlines, ctdb);
874         if (lines == NULL) {
875                 ctdb_set_error(ctdb, "Failed to load public address list '%s'\n", alist);
876                 return -1;
877         }
878         while (nlines > 0 && strcmp(lines[nlines-1], "") == 0) {
879                 nlines--;
880         }
881
882         for (i=0;i<nlines;i++) {
883                 unsigned mask;
884                 ctdb_sock_addr addr;
885                 const char *addrstr;
886                 const char *ifaces;
887                 char *tok, *line;
888
889                 line = lines[i];
890                 while ((*line == ' ') || (*line == '\t')) {
891                         line++;
892                 }
893                 if (*line == '#') {
894                         continue;
895                 }
896                 if (strcmp(line, "") == 0) {
897                         continue;
898                 }
899                 tok = strtok(line, " \t");
900                 addrstr = tok;
901                 tok = strtok(NULL, " \t");
902                 if (tok == NULL) {
903                         if (NULL == ctdb->default_public_interface) {
904                                 DEBUG(DEBUG_CRIT,("No default public interface and no interface specified at line %u of public address list\n",
905                                          i+1));
906                                 talloc_free(lines);
907                                 return -1;
908                         }
909                         ifaces = ctdb->default_public_interface;
910                 } else {
911                         ifaces = tok;
912                 }
913
914                 if (!addrstr || !parse_ip_mask(addrstr, ifaces, &addr, &mask)) {
915                         DEBUG(DEBUG_CRIT,("Badly formed line %u in public address list\n", i+1));
916                         talloc_free(lines);
917                         return -1;
918                 }
919                 if (ctdb_add_public_address(ctdb, &addr, mask, ifaces)) {
920                         DEBUG(DEBUG_CRIT,("Failed to add line %u to the public address list\n", i+1));
921                         talloc_free(lines);
922                         return -1;
923                 }
924         }
925
926         talloc_free(lines);
927         return 0;
928 }
929
930 int ctdb_set_single_public_ip(struct ctdb_context *ctdb,
931                               const char *iface,
932                               const char *ip)
933 {
934         struct ctdb_vnn *svnn;
935         bool ok;
936         int ret;
937
938         svnn = talloc_zero(ctdb, struct ctdb_vnn);
939         CTDB_NO_MEMORY(ctdb, svnn);
940
941         svnn->ifaces = talloc_array(svnn, const char *, 2);
942         CTDB_NO_MEMORY(ctdb, svnn->ifaces);
943         svnn->ifaces[0] = talloc_strdup(svnn->ifaces, iface);
944         CTDB_NO_MEMORY(ctdb, svnn->ifaces[0]);
945         svnn->ifaces[1] = NULL;
946
947         ok = parse_ip(ip, iface, 0, &svnn->public_address);
948         if (!ok) {
949                 talloc_free(svnn);
950                 return -1;
951         }
952
953         ret = ctdb_add_local_iface(ctdb, svnn->ifaces[0]);
954         if (ret != 0) {
955                 DEBUG(DEBUG_CRIT, (__location__ " failed to add iface[%s] "
956                                    "for single_ip[%s]\n",
957                                    svnn->ifaces[0],
958                                    ctdb_addr_to_str(&svnn->public_address)));
959                 talloc_free(svnn);
960                 return -1;
961         }
962
963         ret = ctdb_vnn_assign_iface(ctdb, svnn);
964         if (ret != 0) {
965                 talloc_free(svnn);
966                 return -1;
967         }
968
969         ctdb->single_ip_vnn = svnn;
970         return 0;
971 }
972
973 struct ctdb_public_ip_list {
974         struct ctdb_public_ip_list *next;
975         uint32_t pnn;
976         ctdb_sock_addr addr;
977 };
978
979
980 /* Given a physical node, return the number of
981    public addresses that is currently assigned to this node.
982 */
983 static int node_ip_coverage(struct ctdb_context *ctdb, 
984         int32_t pnn,
985         struct ctdb_public_ip_list *ips)
986 {
987         int num=0;
988
989         for (;ips;ips=ips->next) {
990                 if (ips->pnn == pnn) {
991                         num++;
992                 }
993         }
994         return num;
995 }
996
997
998 /* Check if this is a public ip known to the node, i.e. can that
999    node takeover this ip ?
1000 */
1001 static int can_node_serve_ip(struct ctdb_context *ctdb, int32_t pnn, 
1002                 struct ctdb_public_ip_list *ip)
1003 {
1004         struct ctdb_all_public_ips *public_ips;
1005         int i;
1006
1007         public_ips = ctdb->nodes[pnn]->available_public_ips;
1008
1009         if (public_ips == NULL) {
1010                 return -1;
1011         }
1012
1013         for (i=0;i<public_ips->num;i++) {
1014                 if (ctdb_same_ip(&ip->addr, &public_ips->ips[i].addr)) {
1015                         /* yes, this node can serve this public ip */
1016                         return 0;
1017                 }
1018         }
1019
1020         return -1;
1021 }
1022
1023
1024 /* search the node lists list for a node to takeover this ip.
1025    pick the node that currently are serving the least number of ips
1026    so that the ips get spread out evenly.
1027 */
1028 static int find_takeover_node(struct ctdb_context *ctdb, 
1029                 struct ctdb_node_map *nodemap, uint32_t mask, 
1030                 struct ctdb_public_ip_list *ip,
1031                 struct ctdb_public_ip_list *all_ips)
1032 {
1033         int pnn, min=0, num;
1034         int i;
1035
1036         pnn    = -1;
1037         for (i=0;i<nodemap->num;i++) {
1038                 if (nodemap->nodes[i].flags & mask) {
1039                         /* This node is not healty and can not be used to serve
1040                            a public address 
1041                         */
1042                         continue;
1043                 }
1044
1045                 /* verify that this node can serve this ip */
1046                 if (can_node_serve_ip(ctdb, i, ip)) {
1047                         /* no it couldnt   so skip to the next node */
1048                         continue;
1049                 }
1050
1051                 num = node_ip_coverage(ctdb, i, all_ips);
1052                 /* was this the first node we checked ? */
1053                 if (pnn == -1) {
1054                         pnn = i;
1055                         min  = num;
1056                 } else {
1057                         if (num < min) {
1058                                 pnn = i;
1059                                 min  = num;
1060                         }
1061                 }
1062         }       
1063         if (pnn == -1) {
1064                 DEBUG(DEBUG_WARNING,(__location__ " Could not find node to take over public address '%s'\n",
1065                         ctdb_addr_to_str(&ip->addr)));
1066
1067                 return -1;
1068         }
1069
1070         ip->pnn = pnn;
1071         return 0;
1072 }
1073
1074 #define IP_KEYLEN       4
1075 static uint32_t *ip_key(ctdb_sock_addr *ip)
1076 {
1077         static uint32_t key[IP_KEYLEN];
1078
1079         bzero(key, sizeof(key));
1080
1081         switch (ip->sa.sa_family) {
1082         case AF_INET:
1083                 key[3]  = htonl(ip->ip.sin_addr.s_addr);
1084                 break;
1085         case AF_INET6:
1086                 key[0]  = htonl(ip->ip6.sin6_addr.s6_addr32[0]);
1087                 key[1]  = htonl(ip->ip6.sin6_addr.s6_addr32[1]);
1088                 key[2]  = htonl(ip->ip6.sin6_addr.s6_addr32[2]);
1089                 key[3]  = htonl(ip->ip6.sin6_addr.s6_addr32[3]);
1090                 break;
1091         default:
1092                 DEBUG(DEBUG_ERR, (__location__ " ERROR, unknown family passed :%u\n", ip->sa.sa_family));
1093                 return key;
1094         }
1095
1096         return key;
1097 }
1098
1099 static void *add_ip_callback(void *parm, void *data)
1100 {
1101         return parm;
1102 }
1103
1104 void getips_count_callback(void *param, void *data)
1105 {
1106         struct ctdb_public_ip_list **ip_list = (struct ctdb_public_ip_list **)param;
1107         struct ctdb_public_ip_list *new_ip = (struct ctdb_public_ip_list *)data;
1108
1109         new_ip->next = *ip_list;
1110         *ip_list     = new_ip;
1111 }
1112
1113 struct ctdb_public_ip_list *
1114 create_merged_ip_list(struct ctdb_context *ctdb, TALLOC_CTX *tmp_ctx)
1115 {
1116         int i, j;
1117         struct ctdb_public_ip_list *ip_list;
1118         struct ctdb_all_public_ips *public_ips;
1119         trbt_tree_t *ip_tree;
1120
1121         ip_tree = trbt_create(tmp_ctx, 0);
1122
1123         for (i=0;i<ctdb->num_nodes;i++) {
1124                 public_ips = ctdb->nodes[i]->known_public_ips;
1125
1126                 if (ctdb->nodes[i]->flags & NODE_FLAGS_DELETED) {
1127                         continue;
1128                 }
1129
1130                 /* there were no public ips for this node */
1131                 if (public_ips == NULL) {
1132                         continue;
1133                 }               
1134
1135                 for (j=0;j<public_ips->num;j++) {
1136                         struct ctdb_public_ip_list *tmp_ip; 
1137
1138                         tmp_ip = talloc_zero(tmp_ctx, struct ctdb_public_ip_list);
1139                         CTDB_NO_MEMORY_NULL(ctdb, tmp_ip);
1140                         tmp_ip->pnn  = public_ips->ips[j].pnn;
1141                         tmp_ip->addr = public_ips->ips[j].addr;
1142                         tmp_ip->next = NULL;
1143
1144                         trbt_insertarray32_callback(ip_tree,
1145                                 IP_KEYLEN, ip_key(&public_ips->ips[j].addr),
1146                                 add_ip_callback,
1147                                 tmp_ip);
1148                 }
1149         }
1150
1151         ip_list = NULL;
1152         trbt_traversearray32(ip_tree, IP_KEYLEN, getips_count_callback, &ip_list);
1153
1154         return ip_list;
1155 }
1156
1157 /*
1158   make any IP alias changes for public addresses that are necessary 
1159  */
1160 int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap)
1161 {
1162         int i, num_healthy, retries;
1163         struct ctdb_public_ip ip;
1164         struct ctdb_public_ipv4 ipv4;
1165         uint32_t mask;
1166         struct ctdb_public_ip_list *all_ips, *tmp_ip;
1167         int maxnode, maxnum=0, minnode, minnum=0, num;
1168         TDB_DATA data;
1169         struct timeval timeout;
1170         struct client_async_data *async_data;
1171         struct ctdb_client_control_state *state;
1172         TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
1173
1174
1175         ZERO_STRUCT(ip);
1176
1177         /* Count how many completely healthy nodes we have */
1178         num_healthy = 0;
1179         for (i=0;i<nodemap->num;i++) {
1180                 if (!(nodemap->nodes[i].flags & (NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED))) {
1181                         num_healthy++;
1182                 }
1183         }
1184
1185         if (num_healthy > 0) {
1186                 /* We have healthy nodes, so only consider them for 
1187                    serving public addresses
1188                 */
1189                 mask = NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED;
1190         } else {
1191                 /* We didnt have any completely healthy nodes so
1192                    use "disabled" nodes as a fallback
1193                 */
1194                 mask = NODE_FLAGS_INACTIVE;
1195         }
1196
1197         /* since nodes only know about those public addresses that
1198            can be served by that particular node, no single node has
1199            a full list of all public addresses that exist in the cluster.
1200            Walk over all node structures and create a merged list of
1201            all public addresses that exist in the cluster.
1202         */
1203         all_ips = create_merged_ip_list(ctdb, tmp_ctx);
1204
1205         /* If we want deterministic ip allocations, i.e. that the ip addresses
1206            will always be allocated the same way for a specific set of
1207            available/unavailable nodes.
1208         */
1209         if (1 == ctdb->tunable.deterministic_public_ips) {              
1210                 DEBUG(DEBUG_NOTICE,("Deterministic IPs enabled. Resetting all ip allocations\n"));
1211                 for (i=0,tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next,i++) {
1212                         tmp_ip->pnn = i%nodemap->num;
1213                 }
1214         }
1215
1216
1217         /* mark all public addresses with a masked node as being served by
1218            node -1
1219         */
1220         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1221                 if (tmp_ip->pnn == -1) {
1222                         continue;
1223                 }
1224                 if (nodemap->nodes[tmp_ip->pnn].flags & mask) {
1225                         tmp_ip->pnn = -1;
1226                 }
1227         }
1228
1229         /* verify that the assigned nodes can serve that public ip
1230            and set it to -1 if not
1231         */
1232         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1233                 if (tmp_ip->pnn == -1) {
1234                         continue;
1235                 }
1236                 if (can_node_serve_ip(ctdb, tmp_ip->pnn, tmp_ip) != 0) {
1237                         /* this node can not serve this ip. */
1238                         tmp_ip->pnn = -1;
1239                 }
1240         }
1241
1242
1243         /* now we must redistribute all public addresses with takeover node
1244            -1 among the nodes available
1245         */
1246         retries = 0;
1247 try_again:
1248         /* loop over all ip's and find a physical node to cover for 
1249            each unassigned ip.
1250         */
1251         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1252                 if (tmp_ip->pnn == -1) {
1253                         if (find_takeover_node(ctdb, nodemap, mask, tmp_ip, all_ips)) {
1254                                 DEBUG(DEBUG_WARNING,("Failed to find node to cover ip %s\n",
1255                                         ctdb_addr_to_str(&tmp_ip->addr)));
1256                         }
1257                 }
1258         }
1259
1260         /* If we dont want ips to fail back after a node becomes healthy
1261            again, we wont even try to reallocat the ip addresses so that
1262            they are evenly spread out.
1263            This can NOT be used at the same time as DeterministicIPs !
1264         */
1265         if (1 == ctdb->tunable.no_ip_failback) {
1266                 if (1 == ctdb->tunable.deterministic_public_ips) {
1267                         DEBUG(DEBUG_ERR, ("ERROR: You can not use 'DeterministicIPs' and 'NoIPFailback' at the same time\n"));
1268                 }
1269                 goto finished;
1270         }
1271
1272
1273         /* now, try to make sure the ip adresses are evenly distributed
1274            across the node.
1275            for each ip address, loop over all nodes that can serve this
1276            ip and make sure that the difference between the node
1277            serving the most and the node serving the least ip's are not greater
1278            than 1.
1279         */
1280         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1281                 if (tmp_ip->pnn == -1) {
1282                         continue;
1283                 }
1284
1285                 /* Get the highest and lowest number of ips's served by any 
1286                    valid node which can serve this ip.
1287                 */
1288                 maxnode = -1;
1289                 minnode = -1;
1290                 for (i=0;i<nodemap->num;i++) {
1291                         if (nodemap->nodes[i].flags & mask) {
1292                                 continue;
1293                         }
1294
1295                         /* only check nodes that can actually serve this ip */
1296                         if (can_node_serve_ip(ctdb, i, tmp_ip)) {
1297                                 /* no it couldnt   so skip to the next node */
1298                                 continue;
1299                         }
1300
1301                         num = node_ip_coverage(ctdb, i, all_ips);
1302                         if (maxnode == -1) {
1303                                 maxnode = i;
1304                                 maxnum  = num;
1305                         } else {
1306                                 if (num > maxnum) {
1307                                         maxnode = i;
1308                                         maxnum  = num;
1309                                 }
1310                         }
1311                         if (minnode == -1) {
1312                                 minnode = i;
1313                                 minnum  = num;
1314                         } else {
1315                                 if (num < minnum) {
1316                                         minnode = i;
1317                                         minnum  = num;
1318                                 }
1319                         }
1320                 }
1321                 if (maxnode == -1) {
1322                         DEBUG(DEBUG_WARNING,(__location__ " Could not find maxnode. May not be able to serve ip '%s'\n",
1323                                 ctdb_addr_to_str(&tmp_ip->addr)));
1324
1325                         continue;
1326                 }
1327
1328                 /* If we want deterministic IPs then dont try to reallocate 
1329                    them to spread out the load.
1330                 */
1331                 if (1 == ctdb->tunable.deterministic_public_ips) {
1332                         continue;
1333                 }
1334
1335                 /* if the spread between the smallest and largest coverage by
1336                    a node is >=2 we steal one of the ips from the node with
1337                    most coverage to even things out a bit.
1338                    try to do this at most 5 times  since we dont want to spend
1339                    too much time balancing the ip coverage.
1340                 */
1341                 if ( (maxnum > minnum+1)
1342                   && (retries < 5) ){
1343                         struct ctdb_public_ip_list *tmp;
1344
1345                         /* mark one of maxnode's vnn's as unassigned and try
1346                            again
1347                         */
1348                         for (tmp=all_ips;tmp;tmp=tmp->next) {
1349                                 if (tmp->pnn == maxnode) {
1350                                         tmp->pnn = -1;
1351                                         retries++;
1352                                         goto try_again;
1353                                 }
1354                         }
1355                 }
1356         }
1357
1358
1359         /* finished distributing the public addresses, now just send the 
1360            info out to the nodes
1361         */
1362 finished:
1363
1364         /* at this point ->pnn is the node which will own each IP
1365            or -1 if there is no node that can cover this ip
1366         */
1367
1368         /* now tell all nodes to delete any alias that they should not
1369            have.  This will be a NOOP on nodes that don't currently
1370            hold the given alias */
1371         async_data = talloc_zero(tmp_ctx, struct client_async_data);
1372         CTDB_NO_MEMORY_FATAL(ctdb, async_data);
1373
1374         for (i=0;i<nodemap->num;i++) {
1375                 /* don't talk to unconnected nodes, but do talk to banned nodes */
1376                 if (nodemap->nodes[i].flags & NODE_FLAGS_DISCONNECTED) {
1377                         continue;
1378                 }
1379
1380                 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1381                         if (tmp_ip->pnn == nodemap->nodes[i].pnn) {
1382                                 /* This node should be serving this
1383                                    vnn so dont tell it to release the ip
1384                                 */
1385                                 continue;
1386                         }
1387                         if (tmp_ip->addr.sa.sa_family == AF_INET) {
1388                                 ipv4.pnn = tmp_ip->pnn;
1389                                 ipv4.sin = tmp_ip->addr.ip;
1390
1391                                 timeout = TAKEOVER_TIMEOUT();
1392                                 data.dsize = sizeof(ipv4);
1393                                 data.dptr  = (uint8_t *)&ipv4;
1394                                 state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
1395                                                 0, CTDB_CONTROL_RELEASE_IPv4, 0,
1396                                                 data, async_data,
1397                                                 &timeout, NULL);
1398                         } else {
1399                                 ip.pnn  = tmp_ip->pnn;
1400                                 ip.addr = tmp_ip->addr;
1401
1402                                 timeout = TAKEOVER_TIMEOUT();
1403                                 data.dsize = sizeof(ip);
1404                                 data.dptr  = (uint8_t *)&ip;
1405                                 state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
1406                                                 0, CTDB_CONTROL_RELEASE_IP, 0,
1407                                                 data, async_data,
1408                                                 &timeout, NULL);
1409                         }
1410
1411                         if (state == NULL) {
1412                                 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_RELEASE_IP to node %u\n", nodemap->nodes[i].pnn));
1413                                 talloc_free(tmp_ctx);
1414                                 return -1;
1415                         }
1416                 
1417                         ctdb_client_async_add(async_data, state);
1418                 }
1419         }
1420         if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1421                 DEBUG(DEBUG_ERR,(__location__ " Async control CTDB_CONTROL_RELEASE_IP failed\n"));
1422                 talloc_free(tmp_ctx);
1423                 return -1;
1424         }
1425         talloc_free(async_data);
1426
1427
1428         /* tell all nodes to get their own IPs */
1429         async_data = talloc_zero(tmp_ctx, struct client_async_data);
1430         CTDB_NO_MEMORY_FATAL(ctdb, async_data);
1431         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1432                 if (tmp_ip->pnn == -1) {
1433                         /* this IP won't be taken over */
1434                         continue;
1435                 }
1436
1437                 if (tmp_ip->addr.sa.sa_family == AF_INET) {
1438                         ipv4.pnn = tmp_ip->pnn;
1439                         ipv4.sin = tmp_ip->addr.ip;
1440
1441                         timeout = TAKEOVER_TIMEOUT();
1442                         data.dsize = sizeof(ipv4);
1443                         data.dptr  = (uint8_t *)&ipv4;
1444                         state = ctdb_control_send(ctdb, tmp_ip->pnn,
1445                                         0, CTDB_CONTROL_TAKEOVER_IPv4, 0,
1446                                         data, async_data,
1447                                         &timeout, NULL);
1448                 } else {
1449                         ip.pnn  = tmp_ip->pnn;
1450                         ip.addr = tmp_ip->addr;
1451
1452                         timeout = TAKEOVER_TIMEOUT();
1453                         data.dsize = sizeof(ip);
1454                         data.dptr  = (uint8_t *)&ip;
1455                         state = ctdb_control_send(ctdb, tmp_ip->pnn,
1456                                         0, CTDB_CONTROL_TAKEOVER_IP, 0,
1457                                         data, async_data,
1458                                         &timeout, NULL);
1459                 }
1460                 if (state == NULL) {
1461                         DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_TAKEOVER_IP to node %u\n", tmp_ip->pnn));
1462                         talloc_free(tmp_ctx);
1463                         return -1;
1464                 }
1465                 
1466                 ctdb_client_async_add(async_data, state);
1467         }
1468         if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1469                 DEBUG(DEBUG_ERR,(__location__ " Async control CTDB_CONTROL_TAKEOVER_IP failed\n"));
1470                 talloc_free(tmp_ctx);
1471                 return -1;
1472         }
1473
1474         talloc_free(tmp_ctx);
1475         return 0;
1476 }
1477
1478
1479 /*
1480   destroy a ctdb_client_ip structure
1481  */
1482 static int ctdb_client_ip_destructor(struct ctdb_client_ip *ip)
1483 {
1484         DEBUG(DEBUG_DEBUG,("destroying client tcp for %s:%u (client_id %u)\n",
1485                 ctdb_addr_to_str(&ip->addr),
1486                 ntohs(ip->addr.ip.sin_port),
1487                 ip->client_id));
1488
1489         DLIST_REMOVE(ip->ctdb->client_ip_list, ip);
1490         return 0;
1491 }
1492
1493 /*
1494   called by a client to inform us of a TCP connection that it is managing
1495   that should tickled with an ACK when IP takeover is done
1496   we handle both the old ipv4 style of packets as well as the new ipv4/6
1497   pdus.
1498  */
1499 int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id,
1500                                 TDB_DATA indata)
1501 {
1502         struct ctdb_client *client = ctdb_reqid_find(ctdb, client_id, struct ctdb_client);
1503         struct ctdb_control_tcp *old_addr = NULL;
1504         struct ctdb_control_tcp_addr new_addr;
1505         struct ctdb_control_tcp_addr *tcp_sock = NULL;
1506         struct ctdb_tcp_list *tcp;
1507         struct ctdb_control_tcp_vnn t;
1508         int ret;
1509         TDB_DATA data;
1510         struct ctdb_client_ip *ip;
1511         struct ctdb_vnn *vnn;
1512         ctdb_sock_addr addr;
1513
1514         switch (indata.dsize) {
1515         case sizeof(struct ctdb_control_tcp):
1516                 old_addr = (struct ctdb_control_tcp *)indata.dptr;
1517                 ZERO_STRUCT(new_addr);
1518                 tcp_sock = &new_addr;
1519                 tcp_sock->src.ip  = old_addr->src;
1520                 tcp_sock->dest.ip = old_addr->dest;
1521                 break;
1522         case sizeof(struct ctdb_control_tcp_addr):
1523                 tcp_sock = (struct ctdb_control_tcp_addr *)indata.dptr;
1524                 break;
1525         default:
1526                 DEBUG(DEBUG_ERR,(__location__ " Invalid data structure passed "
1527                                  "to ctdb_control_tcp_client. size was %d but "
1528                                  "only allowed sizes are %lu and %lu\n",
1529                                  (int)indata.dsize,
1530                                  (long unsigned)sizeof(struct ctdb_control_tcp),
1531                                  (long unsigned)sizeof(struct ctdb_control_tcp_addr)));
1532                 return -1;
1533         }
1534
1535         addr = tcp_sock->src;
1536         ctdb_canonicalize_ip(&addr,  &tcp_sock->src);
1537         addr = tcp_sock->dest;
1538         ctdb_canonicalize_ip(&addr, &tcp_sock->dest);
1539
1540         ZERO_STRUCT(addr);
1541         memcpy(&addr, &tcp_sock->dest, sizeof(addr));
1542         vnn = find_public_ip_vnn(ctdb, &addr);
1543         if (vnn == NULL) {
1544                 switch (addr.sa.sa_family) {
1545                 case AF_INET:
1546                         if (ntohl(addr.ip.sin_addr.s_addr) != INADDR_LOOPBACK) {
1547                                 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public address.\n", 
1548                                         ctdb_addr_to_str(&addr)));
1549                         }
1550                         break;
1551                 case AF_INET6:
1552                         DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public ipv6 address.\n", 
1553                                 ctdb_addr_to_str(&addr)));
1554                         break;
1555                 default:
1556                         DEBUG(DEBUG_ERR,(__location__ " Unknown family type %d\n", addr.sa.sa_family));
1557                 }
1558
1559                 return 0;
1560         }
1561
1562         if (vnn->pnn != ctdb->pnn) {
1563                 DEBUG(DEBUG_ERR,("Attempt to register tcp client for IP %s we don't hold - failing (client_id %u pid %u)\n",
1564                         ctdb_addr_to_str(&addr),
1565                         client_id, client->pid));
1566                 /* failing this call will tell smbd to die */
1567                 return -1;
1568         }
1569
1570         ip = talloc(client, struct ctdb_client_ip);
1571         CTDB_NO_MEMORY(ctdb, ip);
1572
1573         ip->ctdb      = ctdb;
1574         ip->addr      = addr;
1575         ip->client_id = client_id;
1576         talloc_set_destructor(ip, ctdb_client_ip_destructor);
1577         DLIST_ADD(ctdb->client_ip_list, ip);
1578
1579         tcp = talloc(client, struct ctdb_tcp_list);
1580         CTDB_NO_MEMORY(ctdb, tcp);
1581
1582         tcp->connection.src_addr = tcp_sock->src;
1583         tcp->connection.dst_addr = tcp_sock->dest;
1584
1585         DLIST_ADD(client->tcp_list, tcp);
1586
1587         t.src  = tcp_sock->src;
1588         t.dest = tcp_sock->dest;
1589
1590         data.dptr = (uint8_t *)&t;
1591         data.dsize = sizeof(t);
1592
1593         switch (addr.sa.sa_family) {
1594         case AF_INET:
1595                 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1596                         (unsigned)ntohs(tcp_sock->dest.ip.sin_port), 
1597                         ctdb_addr_to_str(&tcp_sock->src),
1598                         (unsigned)ntohs(tcp_sock->src.ip.sin_port), client_id, client->pid));
1599                 break;
1600         case AF_INET6:
1601                 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1602                         (unsigned)ntohs(tcp_sock->dest.ip6.sin6_port), 
1603                         ctdb_addr_to_str(&tcp_sock->src),
1604                         (unsigned)ntohs(tcp_sock->src.ip6.sin6_port), client_id, client->pid));
1605                 break;
1606         default:
1607                 DEBUG(DEBUG_ERR,(__location__ " Unknown family %d\n", addr.sa.sa_family));
1608         }
1609
1610
1611         /* tell all nodes about this tcp connection */
1612         ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0, 
1613                                        CTDB_CONTROL_TCP_ADD,
1614                                        0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
1615         if (ret != 0) {
1616                 DEBUG(DEBUG_ERR,(__location__ " Failed to send CTDB_CONTROL_TCP_ADD\n"));
1617                 return -1;
1618         }
1619
1620         return 0;
1621 }
1622
1623 /*
1624   find a tcp address on a list
1625  */
1626 static struct ctdb_tcp_connection *ctdb_tcp_find(struct ctdb_tcp_array *array, 
1627                                            struct ctdb_tcp_connection *tcp)
1628 {
1629         int i;
1630
1631         if (array == NULL) {
1632                 return NULL;
1633         }
1634
1635         for (i=0;i<array->num;i++) {
1636                 if (ctdb_same_sockaddr(&array->connections[i].src_addr, &tcp->src_addr) &&
1637                     ctdb_same_sockaddr(&array->connections[i].dst_addr, &tcp->dst_addr)) {
1638                         return &array->connections[i];
1639                 }
1640         }
1641         return NULL;
1642 }
1643
1644 /*
1645   called by a daemon to inform us of a TCP connection that one of its
1646   clients managing that should tickled with an ACK when IP takeover is
1647   done
1648  */
1649 int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata)
1650 {
1651         struct ctdb_control_tcp_vnn *p = (struct ctdb_control_tcp_vnn *)indata.dptr;
1652         struct ctdb_tcp_array *tcparray;
1653         struct ctdb_tcp_connection tcp;
1654         struct ctdb_vnn *vnn;
1655
1656         vnn = find_public_ip_vnn(ctdb, &p->dest);
1657         if (vnn == NULL) {
1658                 DEBUG(DEBUG_INFO,(__location__ " got TCP_ADD control for an address which is not a public address '%s'\n",
1659                         ctdb_addr_to_str(&p->dest)));
1660
1661                 return -1;
1662         }
1663
1664
1665         tcparray = vnn->tcp_array;
1666
1667         /* If this is the first tickle */
1668         if (tcparray == NULL) {
1669                 tcparray = talloc_size(ctdb->nodes, 
1670                         offsetof(struct ctdb_tcp_array, connections) +
1671                         sizeof(struct ctdb_tcp_connection) * 1);
1672                 CTDB_NO_MEMORY(ctdb, tcparray);
1673                 vnn->tcp_array = tcparray;
1674
1675                 tcparray->num = 0;
1676                 tcparray->connections = talloc_size(tcparray, sizeof(struct ctdb_tcp_connection));
1677                 CTDB_NO_MEMORY(ctdb, tcparray->connections);
1678
1679                 tcparray->connections[tcparray->num].src_addr = p->src;
1680                 tcparray->connections[tcparray->num].dst_addr = p->dest;
1681                 tcparray->num++;
1682                 return 0;
1683         }
1684
1685
1686         /* Do we already have this tickle ?*/
1687         tcp.src_addr = p->src;
1688         tcp.dst_addr = p->dest;
1689         if (ctdb_tcp_find(vnn->tcp_array, &tcp) != NULL) {
1690                 DEBUG(DEBUG_DEBUG,("Already had tickle info for %s:%u for vnn:%u\n",
1691                         ctdb_addr_to_str(&tcp.dst_addr),
1692                         ntohs(tcp.dst_addr.ip.sin_port),
1693                         vnn->pnn));
1694                 return 0;
1695         }
1696
1697         /* A new tickle, we must add it to the array */
1698         tcparray->connections = talloc_realloc(tcparray, tcparray->connections,
1699                                         struct ctdb_tcp_connection,
1700                                         tcparray->num+1);
1701         CTDB_NO_MEMORY(ctdb, tcparray->connections);
1702
1703         vnn->tcp_array = tcparray;
1704         tcparray->connections[tcparray->num].src_addr = p->src;
1705         tcparray->connections[tcparray->num].dst_addr = p->dest;
1706         tcparray->num++;
1707                                 
1708         DEBUG(DEBUG_INFO,("Added tickle info for %s:%u from vnn %u\n",
1709                 ctdb_addr_to_str(&tcp.dst_addr),
1710                 ntohs(tcp.dst_addr.ip.sin_port),
1711                 vnn->pnn));
1712
1713         return 0;
1714 }
1715
1716
1717 /*
1718   called by a daemon to inform us of a TCP connection that one of its
1719   clients managing that should tickled with an ACK when IP takeover is
1720   done
1721  */
1722 static void ctdb_remove_tcp_connection(struct ctdb_context *ctdb, struct ctdb_tcp_connection *conn)
1723 {
1724         struct ctdb_tcp_connection *tcpp;
1725         struct ctdb_vnn *vnn = find_public_ip_vnn(ctdb, &conn->dst_addr);
1726
1727         if (vnn == NULL) {
1728                 DEBUG(DEBUG_ERR,(__location__ " unable to find public address %s\n",
1729                         ctdb_addr_to_str(&conn->dst_addr)));
1730                 return;
1731         }
1732
1733         /* if the array is empty we cant remove it
1734            and we dont need to do anything
1735          */
1736         if (vnn->tcp_array == NULL) {
1737                 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist (array is empty) %s:%u\n",
1738                         ctdb_addr_to_str(&conn->dst_addr),
1739                         ntohs(conn->dst_addr.ip.sin_port)));
1740                 return;
1741         }
1742
1743
1744         /* See if we know this connection
1745            if we dont know this connection  then we dont need to do anything
1746          */
1747         tcpp = ctdb_tcp_find(vnn->tcp_array, conn);
1748         if (tcpp == NULL) {
1749                 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist %s:%u\n",
1750                         ctdb_addr_to_str(&conn->dst_addr),
1751                         ntohs(conn->dst_addr.ip.sin_port)));
1752                 return;
1753         }
1754
1755
1756         /* We need to remove this entry from the array.
1757            Instead of allocating a new array and copying data to it
1758            we cheat and just copy the last entry in the existing array
1759            to the entry that is to be removed and just shring the 
1760            ->num field
1761          */
1762         *tcpp = vnn->tcp_array->connections[vnn->tcp_array->num - 1];
1763         vnn->tcp_array->num--;
1764
1765         /* If we deleted the last entry we also need to remove the entire array
1766          */
1767         if (vnn->tcp_array->num == 0) {
1768                 talloc_free(vnn->tcp_array);
1769                 vnn->tcp_array = NULL;
1770         }               
1771
1772         vnn->tcp_update_needed = true;
1773
1774         DEBUG(DEBUG_INFO,("Removed tickle info for %s:%u\n",
1775                 ctdb_addr_to_str(&conn->src_addr),
1776                 ntohs(conn->src_addr.ip.sin_port)));
1777 }
1778
1779
1780 /*
1781   called when a daemon restarts - send all tickes for all public addresses
1782   we are serving immediately to the new node.
1783  */
1784 int32_t ctdb_control_startup(struct ctdb_context *ctdb, uint32_t vnn)
1785 {
1786 /*XXX here we should send all tickes we are serving to the new node */
1787         return 0;
1788 }
1789
1790
1791 /*
1792   called when a client structure goes away - hook to remove
1793   elements from the tcp_list in all daemons
1794  */
1795 void ctdb_takeover_client_destructor_hook(struct ctdb_client *client)
1796 {
1797         while (client->tcp_list) {
1798                 struct ctdb_tcp_list *tcp = client->tcp_list;
1799                 DLIST_REMOVE(client->tcp_list, tcp);
1800                 ctdb_remove_tcp_connection(client->ctdb, &tcp->connection);
1801         }
1802 }
1803
1804
1805 /*
1806   release all IPs on shutdown
1807  */
1808 void ctdb_release_all_ips(struct ctdb_context *ctdb)
1809 {
1810         struct ctdb_vnn *vnn;
1811
1812         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1813                 if (!ctdb_sys_have_ip(&vnn->public_address)) {
1814                         ctdb_vnn_unassign_iface(ctdb, vnn);
1815                         continue;
1816                 }
1817                 if (!vnn->iface) {
1818                         continue;
1819                 }
1820                 ctdb_event_script_args(ctdb, CTDB_EVENT_RELEASE_IP, "%s %s %u",
1821                                   ctdb_vnn_iface_string(vnn),
1822                                   ctdb_addr_to_str(&vnn->public_address),
1823                                   vnn->public_netmask_bits);
1824                 release_kill_clients(ctdb, &vnn->public_address);
1825                 ctdb_vnn_unassign_iface(ctdb, vnn);
1826         }
1827 }
1828
1829
1830 /*
1831   get list of public IPs
1832  */
1833 int32_t ctdb_control_get_public_ips(struct ctdb_context *ctdb, 
1834                                     struct ctdb_req_control *c, TDB_DATA *outdata)
1835 {
1836         int i, num, len;
1837         struct ctdb_all_public_ips *ips;
1838         struct ctdb_vnn *vnn;
1839         bool only_available = false;
1840
1841         if (c->flags & CTDB_PUBLIC_IP_FLAGS_ONLY_AVAILABLE) {
1842                 only_available = true;
1843         }
1844
1845         /* count how many public ip structures we have */
1846         num = 0;
1847         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1848                 num++;
1849         }
1850
1851         len = offsetof(struct ctdb_all_public_ips, ips) + 
1852                 num*sizeof(struct ctdb_public_ip);
1853         ips = talloc_zero_size(outdata, len);
1854         CTDB_NO_MEMORY(ctdb, ips);
1855
1856         i = 0;
1857         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1858                 if (only_available && !ctdb_vnn_available(ctdb, vnn)) {
1859                         continue;
1860                 }
1861                 ips->ips[i].pnn  = vnn->pnn;
1862                 ips->ips[i].addr = vnn->public_address;
1863                 i++;
1864         }
1865         ips->num = i;
1866         len = offsetof(struct ctdb_all_public_ips, ips) +
1867                 i*sizeof(struct ctdb_public_ip);
1868
1869         outdata->dsize = len;
1870         outdata->dptr  = (uint8_t *)ips;
1871
1872         return 0;
1873 }
1874
1875
1876 /*
1877   get list of public IPs, old ipv4 style.  only returns ipv4 addresses
1878  */
1879 int32_t ctdb_control_get_public_ipsv4(struct ctdb_context *ctdb, 
1880                                     struct ctdb_req_control *c, TDB_DATA *outdata)
1881 {
1882         int i, num, len;
1883         struct ctdb_all_public_ipsv4 *ips;
1884         struct ctdb_vnn *vnn;
1885
1886         /* count how many public ip structures we have */
1887         num = 0;
1888         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1889                 if (vnn->public_address.sa.sa_family != AF_INET) {
1890                         continue;
1891                 }
1892                 num++;
1893         }
1894
1895         len = offsetof(struct ctdb_all_public_ipsv4, ips) + 
1896                 num*sizeof(struct ctdb_public_ipv4);
1897         ips = talloc_zero_size(outdata, len);
1898         CTDB_NO_MEMORY(ctdb, ips);
1899
1900         outdata->dsize = len;
1901         outdata->dptr  = (uint8_t *)ips;
1902
1903         ips->num = num;
1904         i = 0;
1905         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1906                 if (vnn->public_address.sa.sa_family != AF_INET) {
1907                         continue;
1908                 }
1909                 ips->ips[i].pnn = vnn->pnn;
1910                 ips->ips[i].sin = vnn->public_address.ip;
1911                 i++;
1912         }
1913
1914         return 0;
1915 }
1916
1917 int32_t ctdb_control_get_public_ip_info(struct ctdb_context *ctdb,
1918                                         struct ctdb_req_control *c,
1919                                         TDB_DATA indata,
1920                                         TDB_DATA *outdata)
1921 {
1922         int i, num, len;
1923         ctdb_sock_addr *addr;
1924         struct ctdb_control_public_ip_info *info;
1925         struct ctdb_vnn *vnn;
1926
1927         addr = (ctdb_sock_addr *)indata.dptr;
1928
1929         vnn = find_public_ip_vnn(ctdb, addr);
1930         if (vnn == NULL) {
1931                 /* if it is not a public ip   it could be our 'single ip' */
1932                 if (ctdb->single_ip_vnn) {
1933                         if (ctdb_same_ip(&ctdb->single_ip_vnn->public_address, addr)) {
1934                                 vnn = ctdb->single_ip_vnn;
1935                         }
1936                 }
1937         }
1938         if (vnn == NULL) {
1939                 DEBUG(DEBUG_ERR,(__location__ " Could not get public ip info, "
1940                                  "'%s'not a public address\n",
1941                                  ctdb_addr_to_str(addr)));
1942                 return -1;
1943         }
1944
1945         /* count how many public ip structures we have */
1946         num = 0;
1947         for (;vnn->ifaces[num];) {
1948                 num++;
1949         }
1950
1951         len = offsetof(struct ctdb_control_public_ip_info, ifaces) +
1952                 num*sizeof(struct ctdb_control_iface_info);
1953         info = talloc_zero_size(outdata, len);
1954         CTDB_NO_MEMORY(ctdb, info);
1955
1956         info->ip.addr = vnn->public_address;
1957         info->ip.pnn = vnn->pnn;
1958         info->active_idx = 0xFFFFFFFF;
1959
1960         for (i=0; vnn->ifaces[i]; i++) {
1961                 struct ctdb_iface *cur;
1962
1963                 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
1964                 if (cur == NULL) {
1965                         DEBUG(DEBUG_CRIT, (__location__ " internal error iface[%s] unknown\n",
1966                                            vnn->ifaces[i]));
1967                         return -1;
1968                 }
1969                 if (vnn->iface == cur) {
1970                         info->active_idx = i;
1971                 }
1972                 strcpy(info->ifaces[i].name, cur->name);
1973                 info->ifaces[i].link_state = cur->link_up;
1974                 info->ifaces[i].references = cur->references;
1975         }
1976         info->num = i;
1977         len = offsetof(struct ctdb_control_public_ip_info, ifaces) +
1978                 i*sizeof(struct ctdb_control_iface_info);
1979
1980         outdata->dsize = len;
1981         outdata->dptr  = (uint8_t *)info;
1982
1983         return 0;
1984 }
1985
1986 int32_t ctdb_control_get_ifaces(struct ctdb_context *ctdb,
1987                                 struct ctdb_req_control *c,
1988                                 TDB_DATA *outdata)
1989 {
1990         int i, num, len;
1991         struct ctdb_control_get_ifaces *ifaces;
1992         struct ctdb_iface *cur;
1993
1994         /* count how many public ip structures we have */
1995         num = 0;
1996         for (cur=ctdb->ifaces;cur;cur=cur->next) {
1997                 num++;
1998         }
1999
2000         len = offsetof(struct ctdb_control_get_ifaces, ifaces) +
2001                 num*sizeof(struct ctdb_control_iface_info);
2002         ifaces = talloc_zero_size(outdata, len);
2003         CTDB_NO_MEMORY(ctdb, ifaces);
2004
2005         i = 0;
2006         for (cur=ctdb->ifaces;cur;cur=cur->next) {
2007                 strcpy(ifaces->ifaces[i].name, cur->name);
2008                 ifaces->ifaces[i].link_state = cur->link_up;
2009                 ifaces->ifaces[i].references = cur->references;
2010                 i++;
2011         }
2012         ifaces->num = i;
2013         len = offsetof(struct ctdb_control_get_ifaces, ifaces) +
2014                 i*sizeof(struct ctdb_control_iface_info);
2015
2016         outdata->dsize = len;
2017         outdata->dptr  = (uint8_t *)ifaces;
2018
2019         return 0;
2020 }
2021
2022 int32_t ctdb_control_set_iface_link(struct ctdb_context *ctdb,
2023                                     struct ctdb_req_control *c,
2024                                     TDB_DATA indata)
2025 {
2026         struct ctdb_control_iface_info *info;
2027         struct ctdb_iface *iface;
2028         bool link_up = false;
2029
2030         info = (struct ctdb_control_iface_info *)indata.dptr;
2031
2032         if (info->name[CTDB_IFACE_SIZE] != '\0') {
2033                 int len = strnlen(info->name, CTDB_IFACE_SIZE);
2034                 DEBUG(DEBUG_ERR, (__location__ " name[%*.*s] not terminated\n",
2035                                   len, len, info->name));
2036                 return -1;
2037         }
2038
2039         switch (info->link_state) {
2040         case 0:
2041                 link_up = false;
2042                 break;
2043         case 1:
2044                 link_up = true;
2045                 break;
2046         default:
2047                 DEBUG(DEBUG_ERR, (__location__ " link_state[%u] invalid\n",
2048                                   (unsigned int)info->link_state));
2049                 return -1;
2050         }
2051
2052         if (info->references != 0) {
2053                 DEBUG(DEBUG_ERR, (__location__ " references[%u] should be 0\n",
2054                                   (unsigned int)info->references));
2055                 return -1;
2056         }
2057
2058         iface = ctdb_find_iface(ctdb, info->name);
2059         if (iface == NULL) {
2060                 DEBUG(DEBUG_ERR, (__location__ "iface[%s] is unknown\n",
2061                                   info->name));
2062                 return -1;
2063         }
2064
2065         if (link_up == iface->link_up) {
2066                 return 0;
2067         }
2068
2069         DEBUG(iface->link_up?DEBUG_ERR:DEBUG_NOTICE,
2070               ("iface[%s] has changed it's link status %s => %s\n",
2071                iface->name,
2072                iface->link_up?"up":"down",
2073                link_up?"up":"down"));
2074
2075         iface->link_up = link_up;
2076         return 0;
2077 }
2078
2079
2080 /* 
2081    structure containing the listening socket and the list of tcp connections
2082    that the ctdb daemon is to kill
2083 */
2084 struct ctdb_kill_tcp {
2085         struct ctdb_vnn *vnn;
2086         struct ctdb_context *ctdb;
2087         int capture_fd;
2088         struct fd_event *fde;
2089         trbt_tree_t *connections;
2090         void *private_data;
2091 };
2092
2093 /*
2094   a tcp connection that is to be killed
2095  */
2096 struct ctdb_killtcp_con {
2097         ctdb_sock_addr src_addr;
2098         ctdb_sock_addr dst_addr;
2099         int count;
2100         struct ctdb_kill_tcp *killtcp;
2101 };
2102
2103 /* this function is used to create a key to represent this socketpair
2104    in the killtcp tree.
2105    this key is used to insert and lookup matching socketpairs that are
2106    to be tickled and RST
2107 */
2108 #define KILLTCP_KEYLEN  10
2109 static uint32_t *killtcp_key(ctdb_sock_addr *src, ctdb_sock_addr *dst)
2110 {
2111         static uint32_t key[KILLTCP_KEYLEN];
2112
2113         bzero(key, sizeof(key));
2114
2115         if (src->sa.sa_family != dst->sa.sa_family) {
2116                 DEBUG(DEBUG_ERR, (__location__ " ERROR, different families passed :%u vs %u\n", src->sa.sa_family, dst->sa.sa_family));
2117                 return key;
2118         }
2119         
2120         switch (src->sa.sa_family) {
2121         case AF_INET:
2122                 key[0]  = dst->ip.sin_addr.s_addr;
2123                 key[1]  = src->ip.sin_addr.s_addr;
2124                 key[2]  = dst->ip.sin_port;
2125                 key[3]  = src->ip.sin_port;
2126                 break;
2127         case AF_INET6:
2128                 key[0]  = dst->ip6.sin6_addr.s6_addr32[3];
2129                 key[1]  = src->ip6.sin6_addr.s6_addr32[3];
2130                 key[2]  = dst->ip6.sin6_addr.s6_addr32[2];
2131                 key[3]  = src->ip6.sin6_addr.s6_addr32[2];
2132                 key[4]  = dst->ip6.sin6_addr.s6_addr32[1];
2133                 key[5]  = src->ip6.sin6_addr.s6_addr32[1];
2134                 key[6]  = dst->ip6.sin6_addr.s6_addr32[0];
2135                 key[7]  = src->ip6.sin6_addr.s6_addr32[0];
2136                 key[8]  = dst->ip6.sin6_port;
2137                 key[9]  = src->ip6.sin6_port;
2138                 break;
2139         default:
2140                 DEBUG(DEBUG_ERR, (__location__ " ERROR, unknown family passed :%u\n", src->sa.sa_family));
2141                 return key;
2142         }
2143
2144         return key;
2145 }
2146
2147 /*
2148   called when we get a read event on the raw socket
2149  */
2150 static void capture_tcp_handler(struct event_context *ev, struct fd_event *fde, 
2151                                 uint16_t flags, void *private_data)
2152 {
2153         struct ctdb_kill_tcp *killtcp = talloc_get_type(private_data, struct ctdb_kill_tcp);
2154         struct ctdb_killtcp_con *con;
2155         ctdb_sock_addr src, dst;
2156         uint32_t ack_seq, seq;
2157
2158         if (!(flags & EVENT_FD_READ)) {
2159                 return;
2160         }
2161
2162         if (ctdb_sys_read_tcp_packet(killtcp->capture_fd,
2163                                 killtcp->private_data,
2164                                 &src, &dst,
2165                                 &ack_seq, &seq) != 0) {
2166                 /* probably a non-tcp ACK packet */
2167                 return;
2168         }
2169
2170         /* check if we have this guy in our list of connections
2171            to kill
2172         */
2173         con = trbt_lookuparray32(killtcp->connections, 
2174                         KILLTCP_KEYLEN, killtcp_key(&src, &dst));
2175         if (con == NULL) {
2176                 /* no this was some other packet we can just ignore */
2177                 return;
2178         }
2179
2180         /* This one has been tickled !
2181            now reset him and remove him from the list.
2182          */
2183         DEBUG(DEBUG_INFO, ("sending a tcp reset to kill connection :%d -> %s:%d\n",
2184                 ntohs(con->dst_addr.ip.sin_port),
2185                 ctdb_addr_to_str(&con->src_addr),
2186                 ntohs(con->src_addr.ip.sin_port)));
2187
2188         ctdb_sys_send_tcp(&con->dst_addr, &con->src_addr, ack_seq, seq, 1);
2189         talloc_free(con);
2190 }
2191
2192
2193 /* when traversing the list of all tcp connections to send tickle acks to
2194    (so that we can capture the ack coming back and kill the connection
2195     by a RST)
2196    this callback is called for each connection we are currently trying to kill
2197 */
2198 static void tickle_connection_traverse(void *param, void *data)
2199 {
2200         struct ctdb_killtcp_con *con = talloc_get_type(data, struct ctdb_killtcp_con);
2201
2202         /* have tried too many times, just give up */
2203         if (con->count >= 5) {
2204                 talloc_free(con);
2205                 return;
2206         }
2207
2208         /* othervise, try tickling it again */
2209         con->count++;
2210         ctdb_sys_send_tcp(
2211                 (ctdb_sock_addr *)&con->dst_addr,
2212                 (ctdb_sock_addr *)&con->src_addr,
2213                 0, 0, 0);
2214 }
2215
2216
2217 /* 
2218    called every second until all sentenced connections have been reset
2219  */
2220 static void ctdb_tickle_sentenced_connections(struct event_context *ev, struct timed_event *te, 
2221                                               struct timeval t, void *private_data)
2222 {
2223         struct ctdb_kill_tcp *killtcp = talloc_get_type(private_data, struct ctdb_kill_tcp);
2224
2225
2226         /* loop over all connections sending tickle ACKs */
2227         trbt_traversearray32(killtcp->connections, KILLTCP_KEYLEN, tickle_connection_traverse, NULL);
2228
2229
2230         /* If there are no more connections to kill we can remove the
2231            entire killtcp structure
2232          */
2233         if ( (killtcp->connections == NULL) || 
2234              (killtcp->connections->root == NULL) ) {
2235                 talloc_free(killtcp);
2236                 return;
2237         }
2238
2239         /* try tickling them again in a seconds time
2240          */
2241         event_add_timed(killtcp->ctdb->ev, killtcp, timeval_current_ofs(1, 0), 
2242                         ctdb_tickle_sentenced_connections, killtcp);
2243 }
2244
2245 /*
2246   destroy the killtcp structure
2247  */
2248 static int ctdb_killtcp_destructor(struct ctdb_kill_tcp *killtcp)
2249 {
2250         killtcp->vnn->killtcp = NULL;
2251         return 0;
2252 }
2253
2254
2255 /* nothing fancy here, just unconditionally replace any existing
2256    connection structure with the new one.
2257
2258    dont even free the old one if it did exist, that one is talloc_stolen
2259    by the same node in the tree anyway and will be deleted when the new data 
2260    is deleted
2261 */
2262 static void *add_killtcp_callback(void *parm, void *data)
2263 {
2264         return parm;
2265 }
2266
2267 /*
2268   add a tcp socket to the list of connections we want to RST
2269  */
2270 static int ctdb_killtcp_add_connection(struct ctdb_context *ctdb, 
2271                                        ctdb_sock_addr *s,
2272                                        ctdb_sock_addr *d)
2273 {
2274         ctdb_sock_addr src, dst;
2275         struct ctdb_kill_tcp *killtcp;
2276         struct ctdb_killtcp_con *con;
2277         struct ctdb_vnn *vnn;
2278
2279         ctdb_canonicalize_ip(s, &src);
2280         ctdb_canonicalize_ip(d, &dst);
2281
2282         vnn = find_public_ip_vnn(ctdb, &dst);
2283         if (vnn == NULL) {
2284                 vnn = find_public_ip_vnn(ctdb, &src);
2285         }
2286         if (vnn == NULL) {
2287                 /* if it is not a public ip   it could be our 'single ip' */
2288                 if (ctdb->single_ip_vnn) {
2289                         if (ctdb_same_ip(&ctdb->single_ip_vnn->public_address, &dst)) {
2290                                 vnn = ctdb->single_ip_vnn;
2291                         }
2292                 }
2293         }
2294         if (vnn == NULL) {
2295                 DEBUG(DEBUG_ERR,(__location__ " Could not killtcp, not a public address\n")); 
2296                 return -1;
2297         }
2298
2299         killtcp = vnn->killtcp;
2300         
2301         /* If this is the first connection to kill we must allocate
2302            a new structure
2303          */
2304         if (killtcp == NULL) {
2305                 killtcp = talloc_zero(ctdb, struct ctdb_kill_tcp);
2306                 CTDB_NO_MEMORY(ctdb, killtcp);
2307
2308                 killtcp->vnn         = vnn;
2309                 killtcp->ctdb        = ctdb;
2310                 killtcp->capture_fd  = -1;
2311                 killtcp->connections = trbt_create(killtcp, 0);
2312
2313                 vnn->killtcp         = killtcp;
2314                 talloc_set_destructor(killtcp, ctdb_killtcp_destructor);
2315         }
2316
2317
2318
2319         /* create a structure that describes this connection we want to
2320            RST and store it in killtcp->connections
2321         */
2322         con = talloc(killtcp, struct ctdb_killtcp_con);
2323         CTDB_NO_MEMORY(ctdb, con);
2324         con->src_addr = src;
2325         con->dst_addr = dst;
2326         con->count    = 0;
2327         con->killtcp  = killtcp;
2328
2329
2330         trbt_insertarray32_callback(killtcp->connections,
2331                         KILLTCP_KEYLEN, killtcp_key(&con->dst_addr, &con->src_addr),
2332                         add_killtcp_callback, con);
2333
2334         /* 
2335            If we dont have a socket to listen on yet we must create it
2336          */
2337         if (killtcp->capture_fd == -1) {
2338                 const char *iface = ctdb_vnn_iface_string(vnn);
2339                 killtcp->capture_fd = ctdb_sys_open_capture_socket(iface, &killtcp->private_data);
2340                 if (killtcp->capture_fd == -1) {
2341                         DEBUG(DEBUG_CRIT,(__location__ " Failed to open capturing "
2342                                           "socket on iface '%s' for killtcp (%s)\n",
2343                                           iface, strerror(errno)));
2344                         goto failed;
2345                 }
2346         }
2347
2348
2349         if (killtcp->fde == NULL) {
2350                 killtcp->fde = event_add_fd(ctdb->ev, killtcp, killtcp->capture_fd, 
2351                                             EVENT_FD_READ | EVENT_FD_AUTOCLOSE, 
2352                                             capture_tcp_handler, killtcp);
2353
2354                 /* We also need to set up some events to tickle all these connections
2355                    until they are all reset
2356                 */
2357                 event_add_timed(ctdb->ev, killtcp, timeval_current_ofs(1, 0), 
2358                                 ctdb_tickle_sentenced_connections, killtcp);
2359         }
2360
2361         /* tickle him once now */
2362         ctdb_sys_send_tcp(
2363                 &con->dst_addr,
2364                 &con->src_addr,
2365                 0, 0, 0);
2366
2367         return 0;
2368
2369 failed:
2370         talloc_free(vnn->killtcp);
2371         vnn->killtcp = NULL;
2372         return -1;
2373 }
2374
2375 /*
2376   kill a TCP connection.
2377  */
2378 int32_t ctdb_control_kill_tcp(struct ctdb_context *ctdb, TDB_DATA indata)
2379 {
2380         struct ctdb_control_killtcp *killtcp = (struct ctdb_control_killtcp *)indata.dptr;
2381
2382         return ctdb_killtcp_add_connection(ctdb, &killtcp->src_addr, &killtcp->dst_addr);
2383 }
2384
2385 /*
2386   called by a daemon to inform us of the entire list of TCP tickles for
2387   a particular public address.
2388   this control should only be sent by the node that is currently serving
2389   that public address.
2390  */
2391 int32_t ctdb_control_set_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata)
2392 {
2393         struct ctdb_control_tcp_tickle_list *list = (struct ctdb_control_tcp_tickle_list *)indata.dptr;
2394         struct ctdb_tcp_array *tcparray;
2395         struct ctdb_vnn *vnn;
2396
2397         /* We must at least have tickles.num or else we cant verify the size
2398            of the received data blob
2399          */
2400         if (indata.dsize < offsetof(struct ctdb_control_tcp_tickle_list, 
2401                                         tickles.connections)) {
2402                 DEBUG(DEBUG_ERR,("Bad indata in ctdb_control_set_tcp_tickle_list. Not enough data for the tickle.num field\n"));
2403                 return -1;
2404         }
2405
2406         /* verify that the size of data matches what we expect */
2407         if (indata.dsize < offsetof(struct ctdb_control_tcp_tickle_list, 
2408                                 tickles.connections)
2409                          + sizeof(struct ctdb_tcp_connection)
2410                                  * list->tickles.num) {
2411                 DEBUG(DEBUG_ERR,("Bad indata in ctdb_control_set_tcp_tickle_list\n"));
2412                 return -1;
2413         }       
2414
2415         vnn = find_public_ip_vnn(ctdb, &list->addr);
2416         if (vnn == NULL) {
2417                 DEBUG(DEBUG_INFO,(__location__ " Could not set tcp tickle list, '%s' is not a public address\n", 
2418                         ctdb_addr_to_str(&list->addr)));
2419
2420                 return 1;
2421         }
2422
2423         /* remove any old ticklelist we might have */
2424         talloc_free(vnn->tcp_array);
2425         vnn->tcp_array = NULL;
2426
2427         tcparray = talloc(ctdb->nodes, struct ctdb_tcp_array);
2428         CTDB_NO_MEMORY(ctdb, tcparray);
2429
2430         tcparray->num = list->tickles.num;
2431
2432         tcparray->connections = talloc_array(tcparray, struct ctdb_tcp_connection, tcparray->num);
2433         CTDB_NO_MEMORY(ctdb, tcparray->connections);
2434
2435         memcpy(tcparray->connections, &list->tickles.connections[0], 
2436                sizeof(struct ctdb_tcp_connection)*tcparray->num);
2437
2438         /* We now have a new fresh tickle list array for this vnn */
2439         vnn->tcp_array = talloc_steal(vnn, tcparray);
2440         
2441         return 0;
2442 }
2443
2444 /*
2445   called to return the full list of tickles for the puclic address associated 
2446   with the provided vnn
2447  */
2448 int32_t ctdb_control_get_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata)
2449 {
2450         ctdb_sock_addr *addr = (ctdb_sock_addr *)indata.dptr;
2451         struct ctdb_control_tcp_tickle_list *list;
2452         struct ctdb_tcp_array *tcparray;
2453         int num;
2454         struct ctdb_vnn *vnn;
2455
2456         vnn = find_public_ip_vnn(ctdb, addr);
2457         if (vnn == NULL) {
2458                 DEBUG(DEBUG_ERR,(__location__ " Could not get tcp tickle list, '%s' is not a public address\n", 
2459                         ctdb_addr_to_str(addr)));
2460
2461                 return 1;
2462         }
2463
2464         tcparray = vnn->tcp_array;
2465         if (tcparray) {
2466                 num = tcparray->num;
2467         } else {
2468                 num = 0;
2469         }
2470
2471         outdata->dsize = offsetof(struct ctdb_control_tcp_tickle_list, 
2472                                 tickles.connections)
2473                         + sizeof(struct ctdb_tcp_connection) * num;
2474
2475         outdata->dptr  = talloc_size(outdata, outdata->dsize);
2476         CTDB_NO_MEMORY(ctdb, outdata->dptr);
2477         list = (struct ctdb_control_tcp_tickle_list *)outdata->dptr;
2478
2479         list->addr = *addr;
2480         list->tickles.num = num;
2481         if (num) {
2482                 memcpy(&list->tickles.connections[0], tcparray->connections, 
2483                         sizeof(struct ctdb_tcp_connection) * num);
2484         }
2485
2486         return 0;
2487 }
2488
2489
2490 /*
2491   set the list of all tcp tickles for a public address
2492  */
2493 static int ctdb_ctrl_set_tcp_tickles(struct ctdb_context *ctdb, 
2494                               struct timeval timeout, uint32_t destnode, 
2495                               ctdb_sock_addr *addr,
2496                               struct ctdb_tcp_array *tcparray)
2497 {
2498         int ret, num;
2499         TDB_DATA data;
2500         struct ctdb_control_tcp_tickle_list *list;
2501
2502         if (tcparray) {
2503                 num = tcparray->num;
2504         } else {
2505                 num = 0;
2506         }
2507
2508         data.dsize = offsetof(struct ctdb_control_tcp_tickle_list, 
2509                                 tickles.connections) +
2510                         sizeof(struct ctdb_tcp_connection) * num;
2511         data.dptr = talloc_size(ctdb, data.dsize);
2512         CTDB_NO_MEMORY(ctdb, data.dptr);
2513
2514         list = (struct ctdb_control_tcp_tickle_list *)data.dptr;
2515         list->addr = *addr;
2516         list->tickles.num = num;
2517         if (tcparray) {
2518                 memcpy(&list->tickles.connections[0], tcparray->connections, sizeof(struct ctdb_tcp_connection) * num);
2519         }
2520
2521         ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0, 
2522                                        CTDB_CONTROL_SET_TCP_TICKLE_LIST,
2523                                        0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
2524         if (ret != 0) {
2525                 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set tcp tickles failed\n"));
2526                 return -1;
2527         }
2528
2529         talloc_free(data.dptr);
2530
2531         return ret;
2532 }
2533
2534
2535 /*
2536   perform tickle updates if required
2537  */
2538 static void ctdb_update_tcp_tickles(struct event_context *ev, 
2539                                 struct timed_event *te, 
2540                                 struct timeval t, void *private_data)
2541 {
2542         struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
2543         int ret;
2544         struct ctdb_vnn *vnn;
2545
2546         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2547                 /* we only send out updates for public addresses that 
2548                    we have taken over
2549                  */
2550                 if (ctdb->pnn != vnn->pnn) {
2551                         continue;
2552                 }
2553                 /* We only send out the updates if we need to */
2554                 if (!vnn->tcp_update_needed) {
2555                         continue;
2556                 }
2557                 ret = ctdb_ctrl_set_tcp_tickles(ctdb, 
2558                                 TAKEOVER_TIMEOUT(),
2559                                 CTDB_BROADCAST_CONNECTED,
2560                                 &vnn->public_address,
2561                                 vnn->tcp_array);
2562                 if (ret != 0) {
2563                         DEBUG(DEBUG_ERR,("Failed to send the tickle update for public address %s\n",
2564                                 ctdb_addr_to_str(&vnn->public_address)));
2565                 }
2566         }
2567
2568         event_add_timed(ctdb->ev, ctdb->tickle_update_context,
2569                              timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0), 
2570                              ctdb_update_tcp_tickles, ctdb);
2571 }               
2572         
2573
2574 /*
2575   start periodic update of tcp tickles
2576  */
2577 void ctdb_start_tcp_tickle_update(struct ctdb_context *ctdb)
2578 {
2579         ctdb->tickle_update_context = talloc_new(ctdb);
2580
2581         event_add_timed(ctdb->ev, ctdb->tickle_update_context,
2582                              timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0), 
2583                              ctdb_update_tcp_tickles, ctdb);
2584 }
2585
2586
2587
2588
2589 struct control_gratious_arp {
2590         struct ctdb_context *ctdb;
2591         ctdb_sock_addr addr;
2592         const char *iface;
2593         int count;
2594 };
2595
2596 /*
2597   send a control_gratuitous arp
2598  */
2599 static void send_gratious_arp(struct event_context *ev, struct timed_event *te, 
2600                                   struct timeval t, void *private_data)
2601 {
2602         int ret;
2603         struct control_gratious_arp *arp = talloc_get_type(private_data, 
2604                                                         struct control_gratious_arp);
2605
2606         ret = ctdb_sys_send_arp(&arp->addr, arp->iface);
2607         if (ret != 0) {
2608                 DEBUG(DEBUG_ERR,(__location__ " sending of gratious arp on iface '%s' failed (%s)\n",
2609                                  arp->iface, strerror(errno)));
2610         }
2611
2612
2613         arp->count++;
2614         if (arp->count == CTDB_ARP_REPEAT) {
2615                 talloc_free(arp);
2616                 return;
2617         }
2618
2619         event_add_timed(arp->ctdb->ev, arp, 
2620                         timeval_current_ofs(CTDB_ARP_INTERVAL, 0), 
2621                         send_gratious_arp, arp);
2622 }
2623
2624
2625 /*
2626   send a gratious arp 
2627  */
2628 int32_t ctdb_control_send_gratious_arp(struct ctdb_context *ctdb, TDB_DATA indata)
2629 {
2630         struct ctdb_control_gratious_arp *gratious_arp = (struct ctdb_control_gratious_arp *)indata.dptr;
2631         struct control_gratious_arp *arp;
2632
2633         /* verify the size of indata */
2634         if (indata.dsize < offsetof(struct ctdb_control_gratious_arp, iface)) {
2635                 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_gratious_arp structure. Got %u require %u bytes\n", 
2636                                  (unsigned)indata.dsize, 
2637                                  (unsigned)offsetof(struct ctdb_control_gratious_arp, iface)));
2638                 return -1;
2639         }
2640         if (indata.dsize != 
2641                 ( offsetof(struct ctdb_control_gratious_arp, iface)
2642                 + gratious_arp->len ) ){
2643
2644                 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2645                         "but should be %u bytes\n", 
2646                          (unsigned)indata.dsize, 
2647                          (unsigned)(offsetof(struct ctdb_control_gratious_arp, iface)+gratious_arp->len)));
2648                 return -1;
2649         }
2650
2651
2652         arp = talloc(ctdb, struct control_gratious_arp);
2653         CTDB_NO_MEMORY(ctdb, arp);
2654
2655         arp->ctdb  = ctdb;
2656         arp->addr   = gratious_arp->addr;
2657         arp->iface = talloc_strdup(arp, gratious_arp->iface);
2658         CTDB_NO_MEMORY(ctdb, arp->iface);
2659         arp->count = 0;
2660         
2661         event_add_timed(arp->ctdb->ev, arp, 
2662                         timeval_zero(), send_gratious_arp, arp);
2663
2664         return 0;
2665 }
2666
2667 int32_t ctdb_control_add_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2668 {
2669         struct ctdb_control_ip_iface *pub = (struct ctdb_control_ip_iface *)indata.dptr;
2670         int ret;
2671
2672         /* verify the size of indata */
2673         if (indata.dsize < offsetof(struct ctdb_control_ip_iface, iface)) {
2674                 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_ip_iface structure\n"));
2675                 return -1;
2676         }
2677         if (indata.dsize != 
2678                 ( offsetof(struct ctdb_control_ip_iface, iface)
2679                 + pub->len ) ){
2680
2681                 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2682                         "but should be %u bytes\n", 
2683                          (unsigned)indata.dsize, 
2684                          (unsigned)(offsetof(struct ctdb_control_ip_iface, iface)+pub->len)));
2685                 return -1;
2686         }
2687
2688         ret = ctdb_add_public_address(ctdb, &pub->addr, pub->mask, &pub->iface[0]);
2689
2690         if (ret != 0) {
2691                 DEBUG(DEBUG_ERR,(__location__ " Failed to add public address\n"));
2692                 return -1;
2693         }
2694
2695         return 0;
2696 }
2697
2698 /*
2699   called when releaseip event finishes for del_public_address
2700  */
2701 static void delete_ip_callback(struct ctdb_context *ctdb, int status, 
2702                                 void *private_data)
2703 {
2704         talloc_free(private_data);
2705 }
2706
2707 int32_t ctdb_control_del_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2708 {
2709         struct ctdb_control_ip_iface *pub = (struct ctdb_control_ip_iface *)indata.dptr;
2710         struct ctdb_vnn *vnn;
2711         int ret;
2712
2713         /* verify the size of indata */
2714         if (indata.dsize < offsetof(struct ctdb_control_ip_iface, iface)) {
2715                 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_ip_iface structure\n"));
2716                 return -1;
2717         }
2718         if (indata.dsize != 
2719                 ( offsetof(struct ctdb_control_ip_iface, iface)
2720                 + pub->len ) ){
2721
2722                 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2723                         "but should be %u bytes\n", 
2724                          (unsigned)indata.dsize, 
2725                          (unsigned)(offsetof(struct ctdb_control_ip_iface, iface)+pub->len)));
2726                 return -1;
2727         }
2728
2729         /* walk over all public addresses until we find a match */
2730         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2731                 if (ctdb_same_ip(&vnn->public_address, &pub->addr)) {
2732                         TALLOC_CTX *mem_ctx;
2733
2734                         DLIST_REMOVE(ctdb->vnn, vnn);
2735                         if (vnn->iface == NULL) {
2736                                 talloc_free(vnn);
2737                                 return 0;
2738                         }
2739
2740                         mem_ctx = talloc_new(ctdb);
2741                         ret = ctdb_event_script_callback(ctdb, 
2742                                          mem_ctx, delete_ip_callback, mem_ctx,
2743                                          false,
2744                                          CTDB_EVENT_RELEASE_IP,
2745                                          "%s %s %u",
2746                                          ctdb_vnn_iface_string(vnn),
2747                                          ctdb_addr_to_str(&vnn->public_address),
2748                                          vnn->public_netmask_bits);
2749                         ctdb_vnn_unassign_iface(ctdb, vnn);
2750                         talloc_free(vnn);
2751                         if (ret != 0) {
2752                                 return -1;
2753                         }
2754                         return 0;
2755                 }
2756         }
2757
2758         return -1;
2759 }
2760