lower the loglevel for a debug message for redundant releases of public ips
[metze/ctdb/wip.git] / server / ctdb_takeover.c
1 /* 
2    ctdb ip takeover code
3
4    Copyright (C) Ronnie Sahlberg  2007
5    Copyright (C) Andrew Tridgell  2007
6
7    This program is free software; you can redistribute it and/or modify
8    it under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 3 of the License, or
10    (at your option) any later version.
11    
12    This program is distributed in the hope that it will be useful,
13    but WITHOUT ANY WARRANTY; without even the implied warranty of
14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15    GNU General Public License for more details.
16    
17    You should have received a copy of the GNU General Public License
18    along with this program; if not, see <http://www.gnu.org/licenses/>.
19 */
20 #include "includes.h"
21 #include "lib/events/events.h"
22 #include "lib/tdb/include/tdb.h"
23 #include "lib/util/dlinklist.h"
24 #include "system/network.h"
25 #include "system/filesys.h"
26 #include "system/wait.h"
27 #include "../include/ctdb_private.h"
28 #include "../common/rb_tree.h"
29
30
31 #define TAKEOVER_TIMEOUT() timeval_current_ofs(ctdb->tunable.takeover_timeout,0)
32
33 #define CTDB_ARP_INTERVAL 1
34 #define CTDB_ARP_REPEAT   3
35
36 struct ctdb_iface {
37         struct ctdb_iface *prev, *next;
38         const char *name;
39         bool link_up;
40         uint32_t references;
41 };
42
43 static const char *ctdb_vnn_iface_string(const struct ctdb_vnn *vnn)
44 {
45         if (vnn->iface) {
46                 return vnn->iface->name;
47         }
48
49         return "__none__";
50 }
51
52 static int ctdb_add_local_iface(struct ctdb_context *ctdb, const char *iface)
53 {
54         struct ctdb_iface *i;
55
56         /* Verify that we dont have an entry for this ip yet */
57         for (i=ctdb->ifaces;i;i=i->next) {
58                 if (strcmp(i->name, iface) == 0) {
59                         return 0;
60                 }
61         }
62
63         /* create a new structure for this interface */
64         i = talloc_zero(ctdb, struct ctdb_iface);
65         CTDB_NO_MEMORY_FATAL(ctdb, i);
66         i->name = talloc_strdup(i, iface);
67         CTDB_NO_MEMORY(ctdb, i->name);
68         i->link_up = false;
69
70         DLIST_ADD(ctdb->ifaces, i);
71
72         return 0;
73 }
74
75 static struct ctdb_iface *ctdb_find_iface(struct ctdb_context *ctdb,
76                                           const char *iface)
77 {
78         struct ctdb_iface *i;
79
80         /* Verify that we dont have an entry for this ip yet */
81         for (i=ctdb->ifaces;i;i=i->next) {
82                 if (strcmp(i->name, iface) == 0) {
83                         return i;
84                 }
85         }
86
87         return NULL;
88 }
89
90 static struct ctdb_iface *ctdb_vnn_best_iface(struct ctdb_context *ctdb,
91                                               struct ctdb_vnn *vnn)
92 {
93         int i;
94         struct ctdb_iface *cur = NULL;
95         struct ctdb_iface *best = NULL;
96
97         for (i=0; vnn->ifaces[i]; i++) {
98
99                 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
100                 if (cur == NULL) {
101                         continue;
102                 }
103
104                 if (!cur->link_up) {
105                         continue;
106                 }
107
108                 if (best == NULL) {
109                         best = cur;
110                         continue;
111                 }
112
113                 if (cur->references < best->references) {
114                         best = cur;
115                         continue;
116                 }
117         }
118
119         return best;
120 }
121
122 static int32_t ctdb_vnn_assign_iface(struct ctdb_context *ctdb,
123                                      struct ctdb_vnn *vnn)
124 {
125         struct ctdb_iface *best = NULL;
126
127         if (vnn->iface) {
128                 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
129                                    "still assigned to iface '%s'\n",
130                                    ctdb_addr_to_str(&vnn->public_address),
131                                    ctdb_vnn_iface_string(vnn)));
132                 return 0;
133         }
134
135         best = ctdb_vnn_best_iface(ctdb, vnn);
136         if (best == NULL) {
137                 DEBUG(DEBUG_ERR, (__location__ " public address '%s' "
138                                   "cannot assign to iface any iface\n",
139                                   ctdb_addr_to_str(&vnn->public_address)));
140                 return -1;
141         }
142
143         vnn->iface = best;
144         best->references++;
145         vnn->pnn = ctdb->pnn;
146
147         DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
148                            "now assigned to iface '%s' refs[%d]\n",
149                            ctdb_addr_to_str(&vnn->public_address),
150                            ctdb_vnn_iface_string(vnn),
151                            best->references));
152         return 0;
153 }
154
155 static void ctdb_vnn_unassign_iface(struct ctdb_context *ctdb,
156                                     struct ctdb_vnn *vnn)
157 {
158         DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
159                            "now unassigned (old iface '%s' refs[%d])\n",
160                            ctdb_addr_to_str(&vnn->public_address),
161                            ctdb_vnn_iface_string(vnn),
162                            vnn->iface?vnn->iface->references:0));
163         if (vnn->iface) {
164                 vnn->iface->references--;
165         }
166         vnn->iface = NULL;
167         if (vnn->pnn == ctdb->pnn) {
168                 vnn->pnn = -1;
169         }
170 }
171
172 static bool ctdb_vnn_available(struct ctdb_context *ctdb,
173                                struct ctdb_vnn *vnn)
174 {
175         int i;
176
177         if (vnn->iface && vnn->iface->link_up) {
178                 return true;
179         }
180
181         for (i=0; vnn->ifaces[i]; i++) {
182                 struct ctdb_iface *cur;
183
184                 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
185                 if (cur == NULL) {
186                         continue;
187                 }
188
189                 if (cur->link_up) {
190                         return true;
191                 }
192         }
193
194         return false;
195 }
196
197 struct ctdb_takeover_arp {
198         struct ctdb_context *ctdb;
199         uint32_t count;
200         ctdb_sock_addr addr;
201         struct ctdb_tcp_array *tcparray;
202         struct ctdb_vnn *vnn;
203 };
204
205
206 /*
207   lists of tcp endpoints
208  */
209 struct ctdb_tcp_list {
210         struct ctdb_tcp_list *prev, *next;
211         struct ctdb_tcp_connection connection;
212 };
213
214 /*
215   list of clients to kill on IP release
216  */
217 struct ctdb_client_ip {
218         struct ctdb_client_ip *prev, *next;
219         struct ctdb_context *ctdb;
220         ctdb_sock_addr addr;
221         uint32_t client_id;
222 };
223
224
225 /*
226   send a gratuitous arp
227  */
228 static void ctdb_control_send_arp(struct event_context *ev, struct timed_event *te, 
229                                   struct timeval t, void *private_data)
230 {
231         struct ctdb_takeover_arp *arp = talloc_get_type(private_data, 
232                                                         struct ctdb_takeover_arp);
233         int i, ret;
234         struct ctdb_tcp_array *tcparray;
235         const char *iface = ctdb_vnn_iface_string(arp->vnn);
236
237         ret = ctdb_sys_send_arp(&arp->addr, iface);
238         if (ret != 0) {
239                 DEBUG(DEBUG_CRIT,(__location__ " sending of arp failed on iface '%s' (%s)\n",
240                                   iface, strerror(errno)));
241         }
242
243         tcparray = arp->tcparray;
244         if (tcparray) {
245                 for (i=0;i<tcparray->num;i++) {
246                         struct ctdb_tcp_connection *tcon;
247
248                         tcon = &tcparray->connections[i];
249                         DEBUG(DEBUG_INFO,("sending tcp tickle ack for %u->%s:%u\n",
250                                 (unsigned)ntohs(tcon->dst_addr.ip.sin_port), 
251                                 ctdb_addr_to_str(&tcon->src_addr),
252                                 (unsigned)ntohs(tcon->src_addr.ip.sin_port)));
253                         ret = ctdb_sys_send_tcp(
254                                 &tcon->src_addr, 
255                                 &tcon->dst_addr,
256                                 0, 0, 0);
257                         if (ret != 0) {
258                                 DEBUG(DEBUG_CRIT,(__location__ " Failed to send tcp tickle ack for %s\n",
259                                         ctdb_addr_to_str(&tcon->src_addr)));
260                         }
261                 }
262         }
263
264         arp->count++;
265
266         if (arp->count == CTDB_ARP_REPEAT) {
267                 talloc_free(arp);
268                 return;
269         }
270
271         event_add_timed(arp->ctdb->ev, arp->vnn->takeover_ctx, 
272                         timeval_current_ofs(CTDB_ARP_INTERVAL, 100000), 
273                         ctdb_control_send_arp, arp);
274 }
275
276 static int32_t ctdb_announce_vnn_iface(struct ctdb_context *ctdb,
277                                        struct ctdb_vnn *vnn)
278 {
279         struct ctdb_takeover_arp *arp;
280         struct ctdb_tcp_array *tcparray;
281
282         if (!vnn->takeover_ctx) {
283                 vnn->takeover_ctx = talloc_new(vnn);
284                 if (!vnn->takeover_ctx) {
285                         return -1;
286                 }
287         }
288
289         arp = talloc_zero(vnn->takeover_ctx, struct ctdb_takeover_arp);
290         if (!arp) {
291                 return -1;
292         }
293
294         arp->ctdb = ctdb;
295         arp->addr = vnn->public_address;
296         arp->vnn  = vnn;
297
298         tcparray = vnn->tcp_array;
299         if (tcparray) {
300                 /* add all of the known tcp connections for this IP to the
301                    list of tcp connections to send tickle acks for */
302                 arp->tcparray = talloc_steal(arp, tcparray);
303
304                 vnn->tcp_array = NULL;
305                 vnn->tcp_update_needed = true;
306         }
307
308         event_add_timed(arp->ctdb->ev, vnn->takeover_ctx,
309                         timeval_zero(), ctdb_control_send_arp, arp);
310
311         return 0;
312 }
313
314 struct takeover_callback_state {
315         struct ctdb_req_control *c;
316         ctdb_sock_addr *addr;
317         struct ctdb_vnn *vnn;
318 };
319
320 struct ctdb_do_takeip_state {
321         struct ctdb_req_control *c;
322         struct ctdb_vnn *vnn;
323 };
324
325 /*
326   called when takeip event finishes
327  */
328 static void ctdb_do_takeip_callback(struct ctdb_context *ctdb, int status,
329                                     void *private_data)
330 {
331         struct ctdb_do_takeip_state *state =
332                 talloc_get_type(private_data, struct ctdb_do_takeip_state);
333         int32_t ret;
334
335         if (status != 0) {
336                 if (status == -ETIME) {
337                         ctdb_ban_self(ctdb);
338                 }
339                 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
340                                  ctdb_addr_to_str(&state->vnn->public_address),
341                                  ctdb_vnn_iface_string(state->vnn)));
342                 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
343                 talloc_free(state);
344                 return;
345         }
346
347         ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
348         if (ret != 0) {
349                 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
350                 talloc_free(state);
351                 return;
352         }
353
354         /* the control succeeded */
355         ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
356         talloc_free(state);
357         return;
358 }
359
360 /*
361   take over an ip address
362  */
363 static int32_t ctdb_do_takeip(struct ctdb_context *ctdb,
364                               struct ctdb_req_control *c,
365                               struct ctdb_vnn *vnn)
366 {
367         int ret;
368         struct ctdb_do_takeip_state *state;
369
370         ret = ctdb_vnn_assign_iface(ctdb, vnn);
371         if (ret != 0) {
372                 DEBUG(DEBUG_ERR,("Takeover of IP %s/%u failed to "
373                                  "assin a usable interface\n",
374                                  ctdb_addr_to_str(&vnn->public_address),
375                                  vnn->public_netmask_bits));
376                 return -1;
377         }
378
379         state = talloc(vnn, struct ctdb_do_takeip_state);
380         CTDB_NO_MEMORY(ctdb, state);
381
382         state->c = talloc_steal(ctdb, c);
383         state->vnn   = vnn;
384
385         DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u on interface %s\n",
386                             ctdb_addr_to_str(&vnn->public_address),
387                             vnn->public_netmask_bits,
388                             ctdb_vnn_iface_string(vnn)));
389
390         ret = ctdb_event_script_callback(ctdb,
391                                          state,
392                                          ctdb_do_takeip_callback,
393                                          state,
394                                          false,
395                                          CTDB_EVENT_TAKE_IP,
396                                          "%s %s %u",
397                                          ctdb_vnn_iface_string(vnn),
398                                          ctdb_addr_to_str(&vnn->public_address),
399                                          vnn->public_netmask_bits);
400
401         if (ret != 0) {
402                 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
403                         ctdb_addr_to_str(&vnn->public_address),
404                         ctdb_vnn_iface_string(vnn)));
405                 talloc_free(state);
406                 return -1;
407         }
408
409         return 0;
410 }
411
412 struct ctdb_do_updateip_state {
413         struct ctdb_req_control *c;
414         struct ctdb_iface *old;
415         struct ctdb_vnn *vnn;
416 };
417
418 /*
419   called when updateip event finishes
420  */
421 static void ctdb_do_updateip_callback(struct ctdb_context *ctdb, int status,
422                                       void *private_data)
423 {
424         struct ctdb_do_updateip_state *state =
425                 talloc_get_type(private_data, struct ctdb_do_updateip_state);
426         int32_t ret;
427
428         if (status != 0) {
429                 if (status == -ETIME) {
430                         ctdb_ban_self(ctdb);
431                 }
432                 DEBUG(DEBUG_ERR,(__location__ " Failed to move IP %s from interface %s to %s\n",
433                         ctdb_addr_to_str(&state->vnn->public_address),
434                         state->old->name,
435                         ctdb_vnn_iface_string(state->vnn)));
436
437                 /*
438                  * All we can do is reset the old interface
439                  * and let the next run fix it
440                  */
441                 ctdb_vnn_unassign_iface(ctdb, state->vnn);
442                 state->vnn->iface = state->old;
443                 state->vnn->iface->references++;
444
445                 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
446                 talloc_free(state);
447                 return;
448         }
449
450         ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
451         if (ret != 0) {
452                 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
453                 talloc_free(state);
454                 return;
455         }
456
457         /* the control succeeded */
458         ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
459         talloc_free(state);
460         return;
461 }
462
463 /*
464   update (move) an ip address
465  */
466 static int32_t ctdb_do_updateip(struct ctdb_context *ctdb,
467                                 struct ctdb_req_control *c,
468                                 struct ctdb_vnn *vnn)
469 {
470         int ret;
471         struct ctdb_do_updateip_state *state;
472         struct ctdb_iface *old = vnn->iface;
473
474         ctdb_vnn_unassign_iface(ctdb, vnn);
475         ret = ctdb_vnn_assign_iface(ctdb, vnn);
476         if (ret != 0) {
477                 DEBUG(DEBUG_ERR,("update of IP %s/%u failed to "
478                                  "assin a usable interface (old iface '%s')\n",
479                                  ctdb_addr_to_str(&vnn->public_address),
480                                  vnn->public_netmask_bits,
481                                  old->name));
482                 return -1;
483         }
484
485         if (vnn->iface == old) {
486                 DEBUG(DEBUG_ERR,("update of IP %s/%u trying to "
487                                  "assin a same interface '%s'\n",
488                                  ctdb_addr_to_str(&vnn->public_address),
489                                  vnn->public_netmask_bits,
490                                  old->name));
491                 return -1;
492         }
493
494         state = talloc(vnn, struct ctdb_do_updateip_state);
495         CTDB_NO_MEMORY(ctdb, state);
496
497         state->c = talloc_steal(ctdb, c);
498         state->old = old;
499         state->vnn = vnn;
500
501         DEBUG(DEBUG_NOTICE,("Update of IP %s/%u from "
502                             "interface %s to %s\n",
503                             ctdb_addr_to_str(&vnn->public_address),
504                             vnn->public_netmask_bits,
505                             old->name,
506                             ctdb_vnn_iface_string(vnn)));
507
508         ret = ctdb_event_script_callback(ctdb,
509                                          state,
510                                          ctdb_do_updateip_callback,
511                                          state,
512                                          false,
513                                          CTDB_EVENT_UPDATE_IP,
514                                          "%s %s %s %u",
515                                          state->old->name,
516                                          ctdb_vnn_iface_string(vnn),
517                                          ctdb_addr_to_str(&vnn->public_address),
518                                          vnn->public_netmask_bits);
519         if (ret != 0) {
520                 DEBUG(DEBUG_ERR,(__location__ " Failed update IP %s from interface %s to %s\n",
521                                  ctdb_addr_to_str(&vnn->public_address),
522                                  old->name, ctdb_vnn_iface_string(vnn)));
523                 talloc_free(state);
524                 return -1;
525         }
526
527         return 0;
528 }
529
530 /*
531   Find the vnn of the node that has a public ip address
532   returns -1 if the address is not known as a public address
533  */
534 static struct ctdb_vnn *find_public_ip_vnn(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
535 {
536         struct ctdb_vnn *vnn;
537
538         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
539                 if (ctdb_same_ip(&vnn->public_address, addr)) {
540                         return vnn;
541                 }
542         }
543
544         return NULL;
545 }
546
547 /*
548   take over an ip address
549  */
550 int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb,
551                                  struct ctdb_req_control *c,
552                                  TDB_DATA indata,
553                                  bool *async_reply)
554 {
555         int ret;
556         struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
557         struct ctdb_vnn *vnn;
558         bool have_ip = false;
559         bool do_updateip = false;
560         bool do_takeip = false;
561         struct ctdb_iface *best_iface = NULL;
562
563         if (pip->pnn != ctdb->pnn) {
564                 DEBUG(DEBUG_ERR,(__location__" takeoverip called for an ip '%s' "
565                                  "with pnn %d, but we're node %d\n",
566                                  ctdb_addr_to_str(&pip->addr),
567                                  pip->pnn, ctdb->pnn));
568                 return -1;
569         }
570
571         /* update out vnn list */
572         vnn = find_public_ip_vnn(ctdb, &pip->addr);
573         if (vnn == NULL) {
574                 DEBUG(DEBUG_INFO,("takeoverip called for an ip '%s' that is not a public address\n",
575                         ctdb_addr_to_str(&pip->addr)));
576                 return 0;
577         }
578
579         have_ip = ctdb_sys_have_ip(&pip->addr);
580         best_iface = ctdb_vnn_best_iface(ctdb, vnn);
581         if (best_iface == NULL) {
582                 DEBUG(DEBUG_ERR,("takeoverip of IP %s/%u failed to find"
583                                  "a usable interface (old %s, have_ip %d)\n",
584                                  ctdb_addr_to_str(&vnn->public_address),
585                                  vnn->public_netmask_bits,
586                                  ctdb_vnn_iface_string(vnn),
587                                  have_ip));
588                 return -1;
589         }
590
591         if (vnn->iface == NULL && have_ip) {
592                 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
593                                   "but we have no interface assigned, has someone manually configured it?"
594                                   "banning ourself\n",
595                                  ctdb_addr_to_str(&vnn->public_address)));
596                 ctdb_ban_self(ctdb);
597                 return -1;
598         }
599
600         if (vnn->pnn != ctdb->pnn && have_ip) {
601                 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
602                                   "and we have it on iface[%s], but it was assigned to node %d"
603                                   "and we are node %d, banning ourself\n",
604                                  ctdb_addr_to_str(&vnn->public_address),
605                                  ctdb_vnn_iface_string(vnn), vnn->pnn, ctdb->pnn));
606                 ctdb_ban_self(ctdb);
607                 return -1;
608         }
609
610         if (vnn->iface) {
611                 if (vnn->iface->link_up) {
612                         /* only move when the rebalance gains something */
613                         if (vnn->iface->references > (best_iface->references + 1)) {
614                                 do_updateip = true;
615                         }
616                 } else if (vnn->iface != best_iface) {
617                         do_updateip = true;
618                 }
619         }
620
621         if (!have_ip) {
622                 if (do_updateip) {
623                         ctdb_vnn_unassign_iface(ctdb, vnn);
624                         do_updateip = false;
625                 }
626                 do_takeip = true;
627         }
628
629         if (do_takeip) {
630                 ret = ctdb_do_takeip(ctdb, c, vnn);
631                 if (ret != 0) {
632                         return -1;
633                 }
634         } else if (do_updateip) {
635                 ret = ctdb_do_updateip(ctdb, c, vnn);
636                 if (ret != 0) {
637                         return -1;
638                 }
639         } else {
640                 /*
641                  * The interface is up and the kernel known the ip
642                  * => do nothing
643                  */
644                 DEBUG(DEBUG_INFO,("Redundant takeover of IP %s/%u on interface %s (ip already held)\n",
645                         ctdb_addr_to_str(&pip->addr),
646                         vnn->public_netmask_bits,
647                         ctdb_vnn_iface_string(vnn)));
648                 return 0;
649         }
650
651         /* tell ctdb_control.c that we will be replying asynchronously */
652         *async_reply = true;
653
654         return 0;
655 }
656
657 /*
658   takeover an ip address old v4 style
659  */
660 int32_t ctdb_control_takeover_ipv4(struct ctdb_context *ctdb, 
661                                 struct ctdb_req_control *c,
662                                 TDB_DATA indata, 
663                                 bool *async_reply)
664 {
665         TDB_DATA data;
666         
667         data.dsize = sizeof(struct ctdb_public_ip);
668         data.dptr  = (uint8_t *)talloc_zero(c, struct ctdb_public_ip);
669         CTDB_NO_MEMORY(ctdb, data.dptr);
670         
671         memcpy(data.dptr, indata.dptr, indata.dsize);
672         return ctdb_control_takeover_ip(ctdb, c, data, async_reply);
673 }
674
675 /*
676   kill any clients that are registered with a IP that is being released
677  */
678 static void release_kill_clients(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
679 {
680         struct ctdb_client_ip *ip;
681
682         DEBUG(DEBUG_INFO,("release_kill_clients for ip %s\n",
683                 ctdb_addr_to_str(addr)));
684
685         for (ip=ctdb->client_ip_list; ip; ip=ip->next) {
686                 ctdb_sock_addr tmp_addr;
687
688                 tmp_addr = ip->addr;
689                 DEBUG(DEBUG_INFO,("checking for client %u with IP %s\n", 
690                         ip->client_id,
691                         ctdb_addr_to_str(&ip->addr)));
692
693                 if (ctdb_same_ip(&tmp_addr, addr)) {
694                         struct ctdb_client *client = ctdb_reqid_find(ctdb, 
695                                                                      ip->client_id, 
696                                                                      struct ctdb_client);
697                         DEBUG(DEBUG_INFO,("matched client %u with IP %s and pid %u\n", 
698                                 ip->client_id,
699                                 ctdb_addr_to_str(&ip->addr),
700                                 client->pid));
701
702                         if (client->pid != 0) {
703                                 DEBUG(DEBUG_INFO,(__location__ " Killing client pid %u for IP %s on client_id %u\n",
704                                         (unsigned)client->pid,
705                                         ctdb_addr_to_str(addr),
706                                         ip->client_id));
707                                 kill(client->pid, SIGKILL);
708                         }
709                 }
710         }
711 }
712
713 /*
714   called when releaseip event finishes
715  */
716 static void release_ip_callback(struct ctdb_context *ctdb, int status, 
717                                 void *private_data)
718 {
719         struct takeover_callback_state *state = 
720                 talloc_get_type(private_data, struct takeover_callback_state);
721         TDB_DATA data;
722
723         if (status == -ETIME) {
724                 ctdb_ban_self(ctdb);
725         }
726
727         /* send a message to all clients of this node telling them
728            that the cluster has been reconfigured and they should
729            release any sockets on this IP */
730         data.dptr = (uint8_t *)talloc_strdup(state, ctdb_addr_to_str(state->addr));
731         CTDB_NO_MEMORY_VOID(ctdb, data.dptr);
732         data.dsize = strlen((char *)data.dptr)+1;
733
734         DEBUG(DEBUG_INFO,(__location__ " sending RELEASE_IP for '%s'\n", data.dptr));
735
736         ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_RELEASE_IP, data);
737
738         /* kill clients that have registered with this IP */
739         release_kill_clients(ctdb, state->addr);
740
741         ctdb_vnn_unassign_iface(ctdb, state->vnn);
742
743         /* the control succeeded */
744         ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
745         talloc_free(state);
746 }
747
748 /*
749   release an ip address
750  */
751 int32_t ctdb_control_release_ip(struct ctdb_context *ctdb, 
752                                 struct ctdb_req_control *c,
753                                 TDB_DATA indata, 
754                                 bool *async_reply)
755 {
756         int ret;
757         struct takeover_callback_state *state;
758         struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
759         struct ctdb_vnn *vnn;
760
761         /* update our vnn list */
762         vnn = find_public_ip_vnn(ctdb, &pip->addr);
763         if (vnn == NULL) {
764                 DEBUG(DEBUG_INFO,("releaseip called for an ip '%s' that is not a public address\n",
765                         ctdb_addr_to_str(&pip->addr)));
766                 return 0;
767         }
768         vnn->pnn = pip->pnn;
769
770         /* stop any previous arps */
771         talloc_free(vnn->takeover_ctx);
772         vnn->takeover_ctx = NULL;
773
774         if (!ctdb_sys_have_ip(&pip->addr)) {
775                 DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u on interface %s (ip not held)\n", 
776                         ctdb_addr_to_str(&pip->addr),
777                         vnn->public_netmask_bits, 
778                         ctdb_vnn_iface_string(vnn)));
779                 ctdb_vnn_unassign_iface(ctdb, vnn);
780                 return 0;
781         }
782
783         if (vnn->iface == NULL) {
784                 DEBUG(DEBUG_CRIT,(__location__ " release_ip of IP %s is known to the kernel, "
785                                   "but we have no interface assigned, has someone manually configured it?"
786                                   "banning ourself\n",
787                                  ctdb_addr_to_str(&vnn->public_address)));
788                 ctdb_ban_self(ctdb);
789                 return -1;
790         }
791
792         DEBUG(DEBUG_NOTICE,("Release of IP %s/%u on interface %s  node:%d\n",
793                 ctdb_addr_to_str(&pip->addr),
794                 vnn->public_netmask_bits, 
795                 ctdb_vnn_iface_string(vnn),
796                 pip->pnn));
797
798         state = talloc(ctdb, struct takeover_callback_state);
799         CTDB_NO_MEMORY(ctdb, state);
800
801         state->c = talloc_steal(state, c);
802         state->addr = talloc(state, ctdb_sock_addr);       
803         CTDB_NO_MEMORY(ctdb, state->addr);
804         *state->addr = pip->addr;
805         state->vnn   = vnn;
806
807         ret = ctdb_event_script_callback(ctdb, 
808                                          state, release_ip_callback, state,
809                                          false,
810                                          CTDB_EVENT_RELEASE_IP,
811                                          "%s %s %u",
812                                          ctdb_vnn_iface_string(vnn),
813                                          ctdb_addr_to_str(&pip->addr),
814                                          vnn->public_netmask_bits);
815         if (ret != 0) {
816                 DEBUG(DEBUG_ERR,(__location__ " Failed to release IP %s on interface %s\n",
817                         ctdb_addr_to_str(&pip->addr),
818                         ctdb_vnn_iface_string(vnn)));
819                 talloc_free(state);
820                 return -1;
821         }
822
823         /* tell the control that we will be reply asynchronously */
824         *async_reply = true;
825         return 0;
826 }
827
828 /*
829   release an ip address old v4 style
830  */
831 int32_t ctdb_control_release_ipv4(struct ctdb_context *ctdb, 
832                                 struct ctdb_req_control *c,
833                                 TDB_DATA indata, 
834                                 bool *async_reply)
835 {
836         TDB_DATA data;
837         
838         data.dsize = sizeof(struct ctdb_public_ip);
839         data.dptr  = (uint8_t *)talloc_zero(c, struct ctdb_public_ip);
840         CTDB_NO_MEMORY(ctdb, data.dptr);
841         
842         memcpy(data.dptr, indata.dptr, indata.dsize);
843         return ctdb_control_release_ip(ctdb, c, data, async_reply);
844 }
845
846
847 static int ctdb_add_public_address(struct ctdb_context *ctdb,
848                                    ctdb_sock_addr *addr,
849                                    unsigned mask, const char *ifaces)
850 {
851         struct ctdb_vnn      *vnn;
852         uint32_t num = 0;
853         char *tmp;
854         const char *iface;
855         int i;
856         int ret;
857
858         /* Verify that we dont have an entry for this ip yet */
859         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
860                 if (ctdb_same_sockaddr(addr, &vnn->public_address)) {
861                         DEBUG(DEBUG_CRIT,("Same ip '%s' specified multiple times in the public address list \n", 
862                                 ctdb_addr_to_str(addr)));
863                         return -1;
864                 }               
865         }
866
867         /* create a new vnn structure for this ip address */
868         vnn = talloc_zero(ctdb, struct ctdb_vnn);
869         CTDB_NO_MEMORY_FATAL(ctdb, vnn);
870         vnn->ifaces = talloc_array(vnn, const char *, num + 2);
871         tmp = talloc_strdup(vnn, ifaces);
872         CTDB_NO_MEMORY_FATAL(ctdb, tmp);
873         for (iface = strtok(tmp, ","); iface; iface = strtok(NULL, ",")) {
874                 vnn->ifaces = talloc_realloc(vnn, vnn->ifaces, const char *, num + 2);
875                 CTDB_NO_MEMORY_FATAL(ctdb, vnn->ifaces);
876                 vnn->ifaces[num] = talloc_strdup(vnn, iface);
877                 CTDB_NO_MEMORY_FATAL(ctdb, vnn->ifaces[num]);
878                 num++;
879         }
880         talloc_free(tmp);
881         vnn->ifaces[num] = NULL;
882         vnn->public_address      = *addr;
883         vnn->public_netmask_bits = mask;
884         vnn->pnn                 = -1;
885
886         for (i=0; vnn->ifaces[i]; i++) {
887                 ret = ctdb_add_local_iface(ctdb, vnn->ifaces[i]);
888                 if (ret != 0) {
889                         DEBUG(DEBUG_CRIT, (__location__ " failed to add iface[%s] "
890                                            "for public_address[%s]\n",
891                                            vnn->ifaces[i], ctdb_addr_to_str(addr)));
892                         talloc_free(vnn);
893                         return -1;
894                 }
895         }
896
897         DLIST_ADD(ctdb->vnn, vnn);
898
899         return 0;
900 }
901
902 /*
903   setup the event script directory
904 */
905 int ctdb_set_event_script_dir(struct ctdb_context *ctdb, const char *script_dir)
906 {
907         ctdb->event_script_dir = talloc_strdup(ctdb, script_dir);
908         CTDB_NO_MEMORY(ctdb, ctdb->event_script_dir);
909         return 0;
910 }
911
912 /*
913   setup the public address lists from a file
914 */
915 int ctdb_set_public_addresses(struct ctdb_context *ctdb, const char *alist)
916 {
917         char **lines;
918         int nlines;
919         int i;
920
921         lines = file_lines_load(alist, &nlines, ctdb);
922         if (lines == NULL) {
923                 ctdb_set_error(ctdb, "Failed to load public address list '%s'\n", alist);
924                 return -1;
925         }
926         while (nlines > 0 && strcmp(lines[nlines-1], "") == 0) {
927                 nlines--;
928         }
929
930         for (i=0;i<nlines;i++) {
931                 unsigned mask;
932                 ctdb_sock_addr addr;
933                 const char *addrstr;
934                 const char *ifaces;
935                 char *tok, *line;
936
937                 line = lines[i];
938                 while ((*line == ' ') || (*line == '\t')) {
939                         line++;
940                 }
941                 if (*line == '#') {
942                         continue;
943                 }
944                 if (strcmp(line, "") == 0) {
945                         continue;
946                 }
947                 tok = strtok(line, " \t");
948                 addrstr = tok;
949                 tok = strtok(NULL, " \t");
950                 if (tok == NULL) {
951                         if (NULL == ctdb->default_public_interface) {
952                                 DEBUG(DEBUG_CRIT,("No default public interface and no interface specified at line %u of public address list\n",
953                                          i+1));
954                                 talloc_free(lines);
955                                 return -1;
956                         }
957                         ifaces = ctdb->default_public_interface;
958                 } else {
959                         ifaces = tok;
960                 }
961
962                 if (!addrstr || !parse_ip_mask(addrstr, ifaces, &addr, &mask)) {
963                         DEBUG(DEBUG_CRIT,("Badly formed line %u in public address list\n", i+1));
964                         talloc_free(lines);
965                         return -1;
966                 }
967                 if (ctdb_add_public_address(ctdb, &addr, mask, ifaces)) {
968                         DEBUG(DEBUG_CRIT,("Failed to add line %u to the public address list\n", i+1));
969                         talloc_free(lines);
970                         return -1;
971                 }
972         }
973
974         talloc_free(lines);
975         return 0;
976 }
977
978 int ctdb_set_single_public_ip(struct ctdb_context *ctdb,
979                               const char *iface,
980                               const char *ip)
981 {
982         struct ctdb_vnn *svnn;
983         bool ok;
984         int ret;
985
986         svnn = talloc_zero(ctdb, struct ctdb_vnn);
987         CTDB_NO_MEMORY(ctdb, svnn);
988
989         svnn->ifaces = talloc_array(svnn, const char *, 2);
990         CTDB_NO_MEMORY(ctdb, svnn->ifaces);
991         svnn->ifaces[0] = talloc_strdup(svnn->ifaces, iface);
992         CTDB_NO_MEMORY(ctdb, svnn->ifaces[0]);
993         svnn->ifaces[1] = NULL;
994
995         ok = parse_ip(ip, iface, 0, &svnn->public_address);
996         if (!ok) {
997                 talloc_free(svnn);
998                 return -1;
999         }
1000
1001         ret = ctdb_add_local_iface(ctdb, svnn->ifaces[0]);
1002         if (ret != 0) {
1003                 DEBUG(DEBUG_CRIT, (__location__ " failed to add iface[%s] "
1004                                    "for single_ip[%s]\n",
1005                                    svnn->ifaces[0],
1006                                    ctdb_addr_to_str(&svnn->public_address)));
1007                 talloc_free(svnn);
1008                 return -1;
1009         }
1010
1011         ret = ctdb_vnn_assign_iface(ctdb, svnn);
1012         if (ret != 0) {
1013                 talloc_free(svnn);
1014                 return -1;
1015         }
1016
1017         ctdb->single_ip_vnn = svnn;
1018         return 0;
1019 }
1020
1021 struct ctdb_public_ip_list {
1022         struct ctdb_public_ip_list *next;
1023         uint32_t pnn;
1024         ctdb_sock_addr addr;
1025 };
1026
1027
1028 /* Given a physical node, return the number of
1029    public addresses that is currently assigned to this node.
1030 */
1031 static int node_ip_coverage(struct ctdb_context *ctdb, 
1032         int32_t pnn,
1033         struct ctdb_public_ip_list *ips)
1034 {
1035         int num=0;
1036
1037         for (;ips;ips=ips->next) {
1038                 if (ips->pnn == pnn) {
1039                         num++;
1040                 }
1041         }
1042         return num;
1043 }
1044
1045
1046 /* Check if this is a public ip known to the node, i.e. can that
1047    node takeover this ip ?
1048 */
1049 static int can_node_serve_ip(struct ctdb_context *ctdb, int32_t pnn, 
1050                 struct ctdb_public_ip_list *ip)
1051 {
1052         struct ctdb_all_public_ips *public_ips;
1053         int i;
1054
1055         public_ips = ctdb->nodes[pnn]->available_public_ips;
1056
1057         if (public_ips == NULL) {
1058                 return -1;
1059         }
1060
1061         for (i=0;i<public_ips->num;i++) {
1062                 if (ctdb_same_ip(&ip->addr, &public_ips->ips[i].addr)) {
1063                         /* yes, this node can serve this public ip */
1064                         return 0;
1065                 }
1066         }
1067
1068         return -1;
1069 }
1070
1071
1072 /* search the node lists list for a node to takeover this ip.
1073    pick the node that currently are serving the least number of ips
1074    so that the ips get spread out evenly.
1075 */
1076 static int find_takeover_node(struct ctdb_context *ctdb, 
1077                 struct ctdb_node_map *nodemap, uint32_t mask, 
1078                 struct ctdb_public_ip_list *ip,
1079                 struct ctdb_public_ip_list *all_ips)
1080 {
1081         int pnn, min=0, num;
1082         int i;
1083
1084         pnn    = -1;
1085         for (i=0;i<nodemap->num;i++) {
1086                 if (nodemap->nodes[i].flags & mask) {
1087                         /* This node is not healty and can not be used to serve
1088                            a public address 
1089                         */
1090                         continue;
1091                 }
1092
1093                 /* verify that this node can serve this ip */
1094                 if (can_node_serve_ip(ctdb, i, ip)) {
1095                         /* no it couldnt   so skip to the next node */
1096                         continue;
1097                 }
1098
1099                 num = node_ip_coverage(ctdb, i, all_ips);
1100                 /* was this the first node we checked ? */
1101                 if (pnn == -1) {
1102                         pnn = i;
1103                         min  = num;
1104                 } else {
1105                         if (num < min) {
1106                                 pnn = i;
1107                                 min  = num;
1108                         }
1109                 }
1110         }       
1111         if (pnn == -1) {
1112                 DEBUG(DEBUG_WARNING,(__location__ " Could not find node to take over public address '%s'\n",
1113                         ctdb_addr_to_str(&ip->addr)));
1114
1115                 return -1;
1116         }
1117
1118         ip->pnn = pnn;
1119         return 0;
1120 }
1121
1122 #define IP_KEYLEN       4
1123 static uint32_t *ip_key(ctdb_sock_addr *ip)
1124 {
1125         static uint32_t key[IP_KEYLEN];
1126
1127         bzero(key, sizeof(key));
1128
1129         switch (ip->sa.sa_family) {
1130         case AF_INET:
1131                 key[3]  = htonl(ip->ip.sin_addr.s_addr);
1132                 break;
1133         case AF_INET6:
1134                 key[0]  = htonl(ip->ip6.sin6_addr.s6_addr32[0]);
1135                 key[1]  = htonl(ip->ip6.sin6_addr.s6_addr32[1]);
1136                 key[2]  = htonl(ip->ip6.sin6_addr.s6_addr32[2]);
1137                 key[3]  = htonl(ip->ip6.sin6_addr.s6_addr32[3]);
1138                 break;
1139         default:
1140                 DEBUG(DEBUG_ERR, (__location__ " ERROR, unknown family passed :%u\n", ip->sa.sa_family));
1141                 return key;
1142         }
1143
1144         return key;
1145 }
1146
1147 static void *add_ip_callback(void *parm, void *data)
1148 {
1149         return parm;
1150 }
1151
1152 void getips_count_callback(void *param, void *data)
1153 {
1154         struct ctdb_public_ip_list **ip_list = (struct ctdb_public_ip_list **)param;
1155         struct ctdb_public_ip_list *new_ip = (struct ctdb_public_ip_list *)data;
1156
1157         new_ip->next = *ip_list;
1158         *ip_list     = new_ip;
1159 }
1160
1161 struct ctdb_public_ip_list *
1162 create_merged_ip_list(struct ctdb_context *ctdb, TALLOC_CTX *tmp_ctx)
1163 {
1164         int i, j;
1165         struct ctdb_public_ip_list *ip_list;
1166         struct ctdb_all_public_ips *public_ips;
1167         trbt_tree_t *ip_tree;
1168
1169         ip_tree = trbt_create(tmp_ctx, 0);
1170
1171         for (i=0;i<ctdb->num_nodes;i++) {
1172                 public_ips = ctdb->nodes[i]->known_public_ips;
1173
1174                 if (ctdb->nodes[i]->flags & NODE_FLAGS_DELETED) {
1175                         continue;
1176                 }
1177
1178                 /* there were no public ips for this node */
1179                 if (public_ips == NULL) {
1180                         continue;
1181                 }               
1182
1183                 for (j=0;j<public_ips->num;j++) {
1184                         struct ctdb_public_ip_list *tmp_ip; 
1185
1186                         tmp_ip = talloc_zero(tmp_ctx, struct ctdb_public_ip_list);
1187                         CTDB_NO_MEMORY_NULL(ctdb, tmp_ip);
1188                         tmp_ip->pnn  = public_ips->ips[j].pnn;
1189                         tmp_ip->addr = public_ips->ips[j].addr;
1190                         tmp_ip->next = NULL;
1191
1192                         trbt_insertarray32_callback(ip_tree,
1193                                 IP_KEYLEN, ip_key(&public_ips->ips[j].addr),
1194                                 add_ip_callback,
1195                                 tmp_ip);
1196                 }
1197         }
1198
1199         ip_list = NULL;
1200         trbt_traversearray32(ip_tree, IP_KEYLEN, getips_count_callback, &ip_list);
1201
1202         return ip_list;
1203 }
1204
1205 /*
1206   make any IP alias changes for public addresses that are necessary 
1207  */
1208 int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap)
1209 {
1210         int i, num_healthy, retries;
1211         struct ctdb_public_ip ip;
1212         struct ctdb_public_ipv4 ipv4;
1213         uint32_t mask;
1214         struct ctdb_public_ip_list *all_ips, *tmp_ip;
1215         int maxnode, maxnum=0, minnode, minnum=0, num;
1216         TDB_DATA data;
1217         struct timeval timeout;
1218         struct client_async_data *async_data;
1219         struct ctdb_client_control_state *state;
1220         TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
1221
1222
1223         ZERO_STRUCT(ip);
1224
1225         /* Count how many completely healthy nodes we have */
1226         num_healthy = 0;
1227         for (i=0;i<nodemap->num;i++) {
1228                 if (!(nodemap->nodes[i].flags & (NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED))) {
1229                         num_healthy++;
1230                 }
1231         }
1232
1233         if (num_healthy > 0) {
1234                 /* We have healthy nodes, so only consider them for 
1235                    serving public addresses
1236                 */
1237                 mask = NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED;
1238         } else {
1239                 /* We didnt have any completely healthy nodes so
1240                    use "disabled" nodes as a fallback
1241                 */
1242                 mask = NODE_FLAGS_INACTIVE;
1243         }
1244
1245         /* since nodes only know about those public addresses that
1246            can be served by that particular node, no single node has
1247            a full list of all public addresses that exist in the cluster.
1248            Walk over all node structures and create a merged list of
1249            all public addresses that exist in the cluster.
1250         */
1251         all_ips = create_merged_ip_list(ctdb, tmp_ctx);
1252
1253         /* If we want deterministic ip allocations, i.e. that the ip addresses
1254            will always be allocated the same way for a specific set of
1255            available/unavailable nodes.
1256         */
1257         if (1 == ctdb->tunable.deterministic_public_ips) {              
1258                 DEBUG(DEBUG_NOTICE,("Deterministic IPs enabled. Resetting all ip allocations\n"));
1259                 for (i=0,tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next,i++) {
1260                         tmp_ip->pnn = i%nodemap->num;
1261                 }
1262         }
1263
1264
1265         /* mark all public addresses with a masked node as being served by
1266            node -1
1267         */
1268         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1269                 if (tmp_ip->pnn == -1) {
1270                         continue;
1271                 }
1272                 if (nodemap->nodes[tmp_ip->pnn].flags & mask) {
1273                         tmp_ip->pnn = -1;
1274                 }
1275         }
1276
1277         /* verify that the assigned nodes can serve that public ip
1278            and set it to -1 if not
1279         */
1280         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1281                 if (tmp_ip->pnn == -1) {
1282                         continue;
1283                 }
1284                 if (can_node_serve_ip(ctdb, tmp_ip->pnn, tmp_ip) != 0) {
1285                         /* this node can not serve this ip. */
1286                         tmp_ip->pnn = -1;
1287                 }
1288         }
1289
1290
1291         /* now we must redistribute all public addresses with takeover node
1292            -1 among the nodes available
1293         */
1294         retries = 0;
1295 try_again:
1296         /* loop over all ip's and find a physical node to cover for 
1297            each unassigned ip.
1298         */
1299         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1300                 if (tmp_ip->pnn == -1) {
1301                         if (find_takeover_node(ctdb, nodemap, mask, tmp_ip, all_ips)) {
1302                                 DEBUG(DEBUG_WARNING,("Failed to find node to cover ip %s\n",
1303                                         ctdb_addr_to_str(&tmp_ip->addr)));
1304                         }
1305                 }
1306         }
1307
1308         /* If we dont want ips to fail back after a node becomes healthy
1309            again, we wont even try to reallocat the ip addresses so that
1310            they are evenly spread out.
1311            This can NOT be used at the same time as DeterministicIPs !
1312         */
1313         if (1 == ctdb->tunable.no_ip_failback) {
1314                 if (1 == ctdb->tunable.deterministic_public_ips) {
1315                         DEBUG(DEBUG_ERR, ("ERROR: You can not use 'DeterministicIPs' and 'NoIPFailback' at the same time\n"));
1316                 }
1317                 goto finished;
1318         }
1319
1320
1321         /* now, try to make sure the ip adresses are evenly distributed
1322            across the node.
1323            for each ip address, loop over all nodes that can serve this
1324            ip and make sure that the difference between the node
1325            serving the most and the node serving the least ip's are not greater
1326            than 1.
1327         */
1328         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1329                 if (tmp_ip->pnn == -1) {
1330                         continue;
1331                 }
1332
1333                 /* Get the highest and lowest number of ips's served by any 
1334                    valid node which can serve this ip.
1335                 */
1336                 maxnode = -1;
1337                 minnode = -1;
1338                 for (i=0;i<nodemap->num;i++) {
1339                         if (nodemap->nodes[i].flags & mask) {
1340                                 continue;
1341                         }
1342
1343                         /* only check nodes that can actually serve this ip */
1344                         if (can_node_serve_ip(ctdb, i, tmp_ip)) {
1345                                 /* no it couldnt   so skip to the next node */
1346                                 continue;
1347                         }
1348
1349                         num = node_ip_coverage(ctdb, i, all_ips);
1350                         if (maxnode == -1) {
1351                                 maxnode = i;
1352                                 maxnum  = num;
1353                         } else {
1354                                 if (num > maxnum) {
1355                                         maxnode = i;
1356                                         maxnum  = num;
1357                                 }
1358                         }
1359                         if (minnode == -1) {
1360                                 minnode = i;
1361                                 minnum  = num;
1362                         } else {
1363                                 if (num < minnum) {
1364                                         minnode = i;
1365                                         minnum  = num;
1366                                 }
1367                         }
1368                 }
1369                 if (maxnode == -1) {
1370                         DEBUG(DEBUG_WARNING,(__location__ " Could not find maxnode. May not be able to serve ip '%s'\n",
1371                                 ctdb_addr_to_str(&tmp_ip->addr)));
1372
1373                         continue;
1374                 }
1375
1376                 /* If we want deterministic IPs then dont try to reallocate 
1377                    them to spread out the load.
1378                 */
1379                 if (1 == ctdb->tunable.deterministic_public_ips) {
1380                         continue;
1381                 }
1382
1383                 /* if the spread between the smallest and largest coverage by
1384                    a node is >=2 we steal one of the ips from the node with
1385                    most coverage to even things out a bit.
1386                    try to do this at most 5 times  since we dont want to spend
1387                    too much time balancing the ip coverage.
1388                 */
1389                 if ( (maxnum > minnum+1)
1390                   && (retries < 5) ){
1391                         struct ctdb_public_ip_list *tmp;
1392
1393                         /* mark one of maxnode's vnn's as unassigned and try
1394                            again
1395                         */
1396                         for (tmp=all_ips;tmp;tmp=tmp->next) {
1397                                 if (tmp->pnn == maxnode) {
1398                                         tmp->pnn = -1;
1399                                         retries++;
1400                                         goto try_again;
1401                                 }
1402                         }
1403                 }
1404         }
1405
1406
1407         /* finished distributing the public addresses, now just send the 
1408            info out to the nodes
1409         */
1410 finished:
1411
1412         /* at this point ->pnn is the node which will own each IP
1413            or -1 if there is no node that can cover this ip
1414         */
1415
1416         /* now tell all nodes to delete any alias that they should not
1417            have.  This will be a NOOP on nodes that don't currently
1418            hold the given alias */
1419         async_data = talloc_zero(tmp_ctx, struct client_async_data);
1420         CTDB_NO_MEMORY_FATAL(ctdb, async_data);
1421
1422         for (i=0;i<nodemap->num;i++) {
1423                 /* don't talk to unconnected nodes, but do talk to banned nodes */
1424                 if (nodemap->nodes[i].flags & NODE_FLAGS_DISCONNECTED) {
1425                         continue;
1426                 }
1427
1428                 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1429                         if (tmp_ip->pnn == nodemap->nodes[i].pnn) {
1430                                 /* This node should be serving this
1431                                    vnn so dont tell it to release the ip
1432                                 */
1433                                 continue;
1434                         }
1435                         if (tmp_ip->addr.sa.sa_family == AF_INET) {
1436                                 ipv4.pnn = tmp_ip->pnn;
1437                                 ipv4.sin = tmp_ip->addr.ip;
1438
1439                                 timeout = TAKEOVER_TIMEOUT();
1440                                 data.dsize = sizeof(ipv4);
1441                                 data.dptr  = (uint8_t *)&ipv4;
1442                                 state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
1443                                                 0, CTDB_CONTROL_RELEASE_IPv4, 0,
1444                                                 data, async_data,
1445                                                 &timeout, NULL);
1446                         } else {
1447                                 ip.pnn  = tmp_ip->pnn;
1448                                 ip.addr = tmp_ip->addr;
1449
1450                                 timeout = TAKEOVER_TIMEOUT();
1451                                 data.dsize = sizeof(ip);
1452                                 data.dptr  = (uint8_t *)&ip;
1453                                 state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
1454                                                 0, CTDB_CONTROL_RELEASE_IP, 0,
1455                                                 data, async_data,
1456                                                 &timeout, NULL);
1457                         }
1458
1459                         if (state == NULL) {
1460                                 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_RELEASE_IP to node %u\n", nodemap->nodes[i].pnn));
1461                                 talloc_free(tmp_ctx);
1462                                 return -1;
1463                         }
1464                 
1465                         ctdb_client_async_add(async_data, state);
1466                 }
1467         }
1468         if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1469                 DEBUG(DEBUG_ERR,(__location__ " Async control CTDB_CONTROL_RELEASE_IP failed\n"));
1470                 talloc_free(tmp_ctx);
1471                 return -1;
1472         }
1473         talloc_free(async_data);
1474
1475
1476         /* tell all nodes to get their own IPs */
1477         async_data = talloc_zero(tmp_ctx, struct client_async_data);
1478         CTDB_NO_MEMORY_FATAL(ctdb, async_data);
1479         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1480                 if (tmp_ip->pnn == -1) {
1481                         /* this IP won't be taken over */
1482                         continue;
1483                 }
1484
1485                 if (tmp_ip->addr.sa.sa_family == AF_INET) {
1486                         ipv4.pnn = tmp_ip->pnn;
1487                         ipv4.sin = tmp_ip->addr.ip;
1488
1489                         timeout = TAKEOVER_TIMEOUT();
1490                         data.dsize = sizeof(ipv4);
1491                         data.dptr  = (uint8_t *)&ipv4;
1492                         state = ctdb_control_send(ctdb, tmp_ip->pnn,
1493                                         0, CTDB_CONTROL_TAKEOVER_IPv4, 0,
1494                                         data, async_data,
1495                                         &timeout, NULL);
1496                 } else {
1497                         ip.pnn  = tmp_ip->pnn;
1498                         ip.addr = tmp_ip->addr;
1499
1500                         timeout = TAKEOVER_TIMEOUT();
1501                         data.dsize = sizeof(ip);
1502                         data.dptr  = (uint8_t *)&ip;
1503                         state = ctdb_control_send(ctdb, tmp_ip->pnn,
1504                                         0, CTDB_CONTROL_TAKEOVER_IP, 0,
1505                                         data, async_data,
1506                                         &timeout, NULL);
1507                 }
1508                 if (state == NULL) {
1509                         DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_TAKEOVER_IP to node %u\n", tmp_ip->pnn));
1510                         talloc_free(tmp_ctx);
1511                         return -1;
1512                 }
1513                 
1514                 ctdb_client_async_add(async_data, state);
1515         }
1516         if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1517                 DEBUG(DEBUG_ERR,(__location__ " Async control CTDB_CONTROL_TAKEOVER_IP failed\n"));
1518                 talloc_free(tmp_ctx);
1519                 return -1;
1520         }
1521
1522         talloc_free(tmp_ctx);
1523         return 0;
1524 }
1525
1526
1527 /*
1528   destroy a ctdb_client_ip structure
1529  */
1530 static int ctdb_client_ip_destructor(struct ctdb_client_ip *ip)
1531 {
1532         DEBUG(DEBUG_DEBUG,("destroying client tcp for %s:%u (client_id %u)\n",
1533                 ctdb_addr_to_str(&ip->addr),
1534                 ntohs(ip->addr.ip.sin_port),
1535                 ip->client_id));
1536
1537         DLIST_REMOVE(ip->ctdb->client_ip_list, ip);
1538         return 0;
1539 }
1540
1541 /*
1542   called by a client to inform us of a TCP connection that it is managing
1543   that should tickled with an ACK when IP takeover is done
1544   we handle both the old ipv4 style of packets as well as the new ipv4/6
1545   pdus.
1546  */
1547 int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id,
1548                                 TDB_DATA indata)
1549 {
1550         struct ctdb_client *client = ctdb_reqid_find(ctdb, client_id, struct ctdb_client);
1551         struct ctdb_control_tcp *old_addr = NULL;
1552         struct ctdb_control_tcp_addr new_addr;
1553         struct ctdb_control_tcp_addr *tcp_sock = NULL;
1554         struct ctdb_tcp_list *tcp;
1555         struct ctdb_control_tcp_vnn t;
1556         int ret;
1557         TDB_DATA data;
1558         struct ctdb_client_ip *ip;
1559         struct ctdb_vnn *vnn;
1560         ctdb_sock_addr addr;
1561
1562         switch (indata.dsize) {
1563         case sizeof(struct ctdb_control_tcp):
1564                 old_addr = (struct ctdb_control_tcp *)indata.dptr;
1565                 ZERO_STRUCT(new_addr);
1566                 tcp_sock = &new_addr;
1567                 tcp_sock->src.ip  = old_addr->src;
1568                 tcp_sock->dest.ip = old_addr->dest;
1569                 break;
1570         case sizeof(struct ctdb_control_tcp_addr):
1571                 tcp_sock = (struct ctdb_control_tcp_addr *)indata.dptr;
1572                 break;
1573         default:
1574                 DEBUG(DEBUG_ERR,(__location__ " Invalid data structure passed "
1575                                  "to ctdb_control_tcp_client. size was %d but "
1576                                  "only allowed sizes are %lu and %lu\n",
1577                                  (int)indata.dsize,
1578                                  (long unsigned)sizeof(struct ctdb_control_tcp),
1579                                  (long unsigned)sizeof(struct ctdb_control_tcp_addr)));
1580                 return -1;
1581         }
1582
1583         addr = tcp_sock->src;
1584         ctdb_canonicalize_ip(&addr,  &tcp_sock->src);
1585         addr = tcp_sock->dest;
1586         ctdb_canonicalize_ip(&addr, &tcp_sock->dest);
1587
1588         ZERO_STRUCT(addr);
1589         memcpy(&addr, &tcp_sock->dest, sizeof(addr));
1590         vnn = find_public_ip_vnn(ctdb, &addr);
1591         if (vnn == NULL) {
1592                 switch (addr.sa.sa_family) {
1593                 case AF_INET:
1594                         if (ntohl(addr.ip.sin_addr.s_addr) != INADDR_LOOPBACK) {
1595                                 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public address.\n", 
1596                                         ctdb_addr_to_str(&addr)));
1597                         }
1598                         break;
1599                 case AF_INET6:
1600                         DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public ipv6 address.\n", 
1601                                 ctdb_addr_to_str(&addr)));
1602                         break;
1603                 default:
1604                         DEBUG(DEBUG_ERR,(__location__ " Unknown family type %d\n", addr.sa.sa_family));
1605                 }
1606
1607                 return 0;
1608         }
1609
1610         if (vnn->pnn != ctdb->pnn) {
1611                 DEBUG(DEBUG_ERR,("Attempt to register tcp client for IP %s we don't hold - failing (client_id %u pid %u)\n",
1612                         ctdb_addr_to_str(&addr),
1613                         client_id, client->pid));
1614                 /* failing this call will tell smbd to die */
1615                 return -1;
1616         }
1617
1618         ip = talloc(client, struct ctdb_client_ip);
1619         CTDB_NO_MEMORY(ctdb, ip);
1620
1621         ip->ctdb      = ctdb;
1622         ip->addr      = addr;
1623         ip->client_id = client_id;
1624         talloc_set_destructor(ip, ctdb_client_ip_destructor);
1625         DLIST_ADD(ctdb->client_ip_list, ip);
1626
1627         tcp = talloc(client, struct ctdb_tcp_list);
1628         CTDB_NO_MEMORY(ctdb, tcp);
1629
1630         tcp->connection.src_addr = tcp_sock->src;
1631         tcp->connection.dst_addr = tcp_sock->dest;
1632
1633         DLIST_ADD(client->tcp_list, tcp);
1634
1635         t.src  = tcp_sock->src;
1636         t.dest = tcp_sock->dest;
1637
1638         data.dptr = (uint8_t *)&t;
1639         data.dsize = sizeof(t);
1640
1641         switch (addr.sa.sa_family) {
1642         case AF_INET:
1643                 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1644                         (unsigned)ntohs(tcp_sock->dest.ip.sin_port), 
1645                         ctdb_addr_to_str(&tcp_sock->src),
1646                         (unsigned)ntohs(tcp_sock->src.ip.sin_port), client_id, client->pid));
1647                 break;
1648         case AF_INET6:
1649                 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1650                         (unsigned)ntohs(tcp_sock->dest.ip6.sin6_port), 
1651                         ctdb_addr_to_str(&tcp_sock->src),
1652                         (unsigned)ntohs(tcp_sock->src.ip6.sin6_port), client_id, client->pid));
1653                 break;
1654         default:
1655                 DEBUG(DEBUG_ERR,(__location__ " Unknown family %d\n", addr.sa.sa_family));
1656         }
1657
1658
1659         /* tell all nodes about this tcp connection */
1660         ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0, 
1661                                        CTDB_CONTROL_TCP_ADD,
1662                                        0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
1663         if (ret != 0) {
1664                 DEBUG(DEBUG_ERR,(__location__ " Failed to send CTDB_CONTROL_TCP_ADD\n"));
1665                 return -1;
1666         }
1667
1668         return 0;
1669 }
1670
1671 /*
1672   find a tcp address on a list
1673  */
1674 static struct ctdb_tcp_connection *ctdb_tcp_find(struct ctdb_tcp_array *array, 
1675                                            struct ctdb_tcp_connection *tcp)
1676 {
1677         int i;
1678
1679         if (array == NULL) {
1680                 return NULL;
1681         }
1682
1683         for (i=0;i<array->num;i++) {
1684                 if (ctdb_same_sockaddr(&array->connections[i].src_addr, &tcp->src_addr) &&
1685                     ctdb_same_sockaddr(&array->connections[i].dst_addr, &tcp->dst_addr)) {
1686                         return &array->connections[i];
1687                 }
1688         }
1689         return NULL;
1690 }
1691
1692 /*
1693   called by a daemon to inform us of a TCP connection that one of its
1694   clients managing that should tickled with an ACK when IP takeover is
1695   done
1696  */
1697 int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata)
1698 {
1699         struct ctdb_control_tcp_vnn *p = (struct ctdb_control_tcp_vnn *)indata.dptr;
1700         struct ctdb_tcp_array *tcparray;
1701         struct ctdb_tcp_connection tcp;
1702         struct ctdb_vnn *vnn;
1703
1704         vnn = find_public_ip_vnn(ctdb, &p->dest);
1705         if (vnn == NULL) {
1706                 DEBUG(DEBUG_INFO,(__location__ " got TCP_ADD control for an address which is not a public address '%s'\n",
1707                         ctdb_addr_to_str(&p->dest)));
1708
1709                 return -1;
1710         }
1711
1712
1713         tcparray = vnn->tcp_array;
1714
1715         /* If this is the first tickle */
1716         if (tcparray == NULL) {
1717                 tcparray = talloc_size(ctdb->nodes, 
1718                         offsetof(struct ctdb_tcp_array, connections) +
1719                         sizeof(struct ctdb_tcp_connection) * 1);
1720                 CTDB_NO_MEMORY(ctdb, tcparray);
1721                 vnn->tcp_array = tcparray;
1722
1723                 tcparray->num = 0;
1724                 tcparray->connections = talloc_size(tcparray, sizeof(struct ctdb_tcp_connection));
1725                 CTDB_NO_MEMORY(ctdb, tcparray->connections);
1726
1727                 tcparray->connections[tcparray->num].src_addr = p->src;
1728                 tcparray->connections[tcparray->num].dst_addr = p->dest;
1729                 tcparray->num++;
1730                 return 0;
1731         }
1732
1733
1734         /* Do we already have this tickle ?*/
1735         tcp.src_addr = p->src;
1736         tcp.dst_addr = p->dest;
1737         if (ctdb_tcp_find(vnn->tcp_array, &tcp) != NULL) {
1738                 DEBUG(DEBUG_DEBUG,("Already had tickle info for %s:%u for vnn:%u\n",
1739                         ctdb_addr_to_str(&tcp.dst_addr),
1740                         ntohs(tcp.dst_addr.ip.sin_port),
1741                         vnn->pnn));
1742                 return 0;
1743         }
1744
1745         /* A new tickle, we must add it to the array */
1746         tcparray->connections = talloc_realloc(tcparray, tcparray->connections,
1747                                         struct ctdb_tcp_connection,
1748                                         tcparray->num+1);
1749         CTDB_NO_MEMORY(ctdb, tcparray->connections);
1750
1751         vnn->tcp_array = tcparray;
1752         tcparray->connections[tcparray->num].src_addr = p->src;
1753         tcparray->connections[tcparray->num].dst_addr = p->dest;
1754         tcparray->num++;
1755                                 
1756         DEBUG(DEBUG_INFO,("Added tickle info for %s:%u from vnn %u\n",
1757                 ctdb_addr_to_str(&tcp.dst_addr),
1758                 ntohs(tcp.dst_addr.ip.sin_port),
1759                 vnn->pnn));
1760
1761         return 0;
1762 }
1763
1764
1765 /*
1766   called by a daemon to inform us of a TCP connection that one of its
1767   clients managing that should tickled with an ACK when IP takeover is
1768   done
1769  */
1770 static void ctdb_remove_tcp_connection(struct ctdb_context *ctdb, struct ctdb_tcp_connection *conn)
1771 {
1772         struct ctdb_tcp_connection *tcpp;
1773         struct ctdb_vnn *vnn = find_public_ip_vnn(ctdb, &conn->dst_addr);
1774
1775         if (vnn == NULL) {
1776                 DEBUG(DEBUG_ERR,(__location__ " unable to find public address %s\n",
1777                         ctdb_addr_to_str(&conn->dst_addr)));
1778                 return;
1779         }
1780
1781         /* if the array is empty we cant remove it
1782            and we dont need to do anything
1783          */
1784         if (vnn->tcp_array == NULL) {
1785                 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist (array is empty) %s:%u\n",
1786                         ctdb_addr_to_str(&conn->dst_addr),
1787                         ntohs(conn->dst_addr.ip.sin_port)));
1788                 return;
1789         }
1790
1791
1792         /* See if we know this connection
1793            if we dont know this connection  then we dont need to do anything
1794          */
1795         tcpp = ctdb_tcp_find(vnn->tcp_array, conn);
1796         if (tcpp == NULL) {
1797                 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist %s:%u\n",
1798                         ctdb_addr_to_str(&conn->dst_addr),
1799                         ntohs(conn->dst_addr.ip.sin_port)));
1800                 return;
1801         }
1802
1803
1804         /* We need to remove this entry from the array.
1805            Instead of allocating a new array and copying data to it
1806            we cheat and just copy the last entry in the existing array
1807            to the entry that is to be removed and just shring the 
1808            ->num field
1809          */
1810         *tcpp = vnn->tcp_array->connections[vnn->tcp_array->num - 1];
1811         vnn->tcp_array->num--;
1812
1813         /* If we deleted the last entry we also need to remove the entire array
1814          */
1815         if (vnn->tcp_array->num == 0) {
1816                 talloc_free(vnn->tcp_array);
1817                 vnn->tcp_array = NULL;
1818         }               
1819
1820         vnn->tcp_update_needed = true;
1821
1822         DEBUG(DEBUG_INFO,("Removed tickle info for %s:%u\n",
1823                 ctdb_addr_to_str(&conn->src_addr),
1824                 ntohs(conn->src_addr.ip.sin_port)));
1825 }
1826
1827
1828 /*
1829   called when a daemon restarts - send all tickes for all public addresses
1830   we are serving immediately to the new node.
1831  */
1832 int32_t ctdb_control_startup(struct ctdb_context *ctdb, uint32_t vnn)
1833 {
1834 /*XXX here we should send all tickes we are serving to the new node */
1835         return 0;
1836 }
1837
1838
1839 /*
1840   called when a client structure goes away - hook to remove
1841   elements from the tcp_list in all daemons
1842  */
1843 void ctdb_takeover_client_destructor_hook(struct ctdb_client *client)
1844 {
1845         while (client->tcp_list) {
1846                 struct ctdb_tcp_list *tcp = client->tcp_list;
1847                 DLIST_REMOVE(client->tcp_list, tcp);
1848                 ctdb_remove_tcp_connection(client->ctdb, &tcp->connection);
1849         }
1850 }
1851
1852
1853 /*
1854   release all IPs on shutdown
1855  */
1856 void ctdb_release_all_ips(struct ctdb_context *ctdb)
1857 {
1858         struct ctdb_vnn *vnn;
1859
1860         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1861                 if (!ctdb_sys_have_ip(&vnn->public_address)) {
1862                         ctdb_vnn_unassign_iface(ctdb, vnn);
1863                         continue;
1864                 }
1865                 if (!vnn->iface) {
1866                         continue;
1867                 }
1868                 ctdb_event_script_args(ctdb, CTDB_EVENT_RELEASE_IP, "%s %s %u",
1869                                   ctdb_vnn_iface_string(vnn),
1870                                   ctdb_addr_to_str(&vnn->public_address),
1871                                   vnn->public_netmask_bits);
1872                 release_kill_clients(ctdb, &vnn->public_address);
1873                 ctdb_vnn_unassign_iface(ctdb, vnn);
1874         }
1875 }
1876
1877
1878 /*
1879   get list of public IPs
1880  */
1881 int32_t ctdb_control_get_public_ips(struct ctdb_context *ctdb, 
1882                                     struct ctdb_req_control *c, TDB_DATA *outdata)
1883 {
1884         int i, num, len;
1885         struct ctdb_all_public_ips *ips;
1886         struct ctdb_vnn *vnn;
1887         bool only_available = false;
1888
1889         if (c->flags & CTDB_PUBLIC_IP_FLAGS_ONLY_AVAILABLE) {
1890                 only_available = true;
1891         }
1892
1893         /* count how many public ip structures we have */
1894         num = 0;
1895         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1896                 num++;
1897         }
1898
1899         len = offsetof(struct ctdb_all_public_ips, ips) + 
1900                 num*sizeof(struct ctdb_public_ip);
1901         ips = talloc_zero_size(outdata, len);
1902         CTDB_NO_MEMORY(ctdb, ips);
1903
1904         i = 0;
1905         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1906                 if (only_available && !ctdb_vnn_available(ctdb, vnn)) {
1907                         continue;
1908                 }
1909                 ips->ips[i].pnn  = vnn->pnn;
1910                 ips->ips[i].addr = vnn->public_address;
1911                 i++;
1912         }
1913         ips->num = i;
1914         len = offsetof(struct ctdb_all_public_ips, ips) +
1915                 i*sizeof(struct ctdb_public_ip);
1916
1917         outdata->dsize = len;
1918         outdata->dptr  = (uint8_t *)ips;
1919
1920         return 0;
1921 }
1922
1923
1924 /*
1925   get list of public IPs, old ipv4 style.  only returns ipv4 addresses
1926  */
1927 int32_t ctdb_control_get_public_ipsv4(struct ctdb_context *ctdb, 
1928                                     struct ctdb_req_control *c, TDB_DATA *outdata)
1929 {
1930         int i, num, len;
1931         struct ctdb_all_public_ipsv4 *ips;
1932         struct ctdb_vnn *vnn;
1933
1934         /* count how many public ip structures we have */
1935         num = 0;
1936         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1937                 if (vnn->public_address.sa.sa_family != AF_INET) {
1938                         continue;
1939                 }
1940                 num++;
1941         }
1942
1943         len = offsetof(struct ctdb_all_public_ipsv4, ips) + 
1944                 num*sizeof(struct ctdb_public_ipv4);
1945         ips = talloc_zero_size(outdata, len);
1946         CTDB_NO_MEMORY(ctdb, ips);
1947
1948         outdata->dsize = len;
1949         outdata->dptr  = (uint8_t *)ips;
1950
1951         ips->num = num;
1952         i = 0;
1953         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1954                 if (vnn->public_address.sa.sa_family != AF_INET) {
1955                         continue;
1956                 }
1957                 ips->ips[i].pnn = vnn->pnn;
1958                 ips->ips[i].sin = vnn->public_address.ip;
1959                 i++;
1960         }
1961
1962         return 0;
1963 }
1964
1965 int32_t ctdb_control_get_public_ip_info(struct ctdb_context *ctdb,
1966                                         struct ctdb_req_control *c,
1967                                         TDB_DATA indata,
1968                                         TDB_DATA *outdata)
1969 {
1970         int i, num, len;
1971         ctdb_sock_addr *addr;
1972         struct ctdb_control_public_ip_info *info;
1973         struct ctdb_vnn *vnn;
1974
1975         addr = (ctdb_sock_addr *)indata.dptr;
1976
1977         vnn = find_public_ip_vnn(ctdb, addr);
1978         if (vnn == NULL) {
1979                 /* if it is not a public ip   it could be our 'single ip' */
1980                 if (ctdb->single_ip_vnn) {
1981                         if (ctdb_same_ip(&ctdb->single_ip_vnn->public_address, addr)) {
1982                                 vnn = ctdb->single_ip_vnn;
1983                         }
1984                 }
1985         }
1986         if (vnn == NULL) {
1987                 DEBUG(DEBUG_ERR,(__location__ " Could not get public ip info, "
1988                                  "'%s'not a public address\n",
1989                                  ctdb_addr_to_str(addr)));
1990                 return -1;
1991         }
1992
1993         /* count how many public ip structures we have */
1994         num = 0;
1995         for (;vnn->ifaces[num];) {
1996                 num++;
1997         }
1998
1999         len = offsetof(struct ctdb_control_public_ip_info, ifaces) +
2000                 num*sizeof(struct ctdb_control_iface_info);
2001         info = talloc_zero_size(outdata, len);
2002         CTDB_NO_MEMORY(ctdb, info);
2003
2004         info->ip.addr = vnn->public_address;
2005         info->ip.pnn = vnn->pnn;
2006         info->active_idx = 0xFFFFFFFF;
2007
2008         for (i=0; vnn->ifaces[i]; i++) {
2009                 struct ctdb_iface *cur;
2010
2011                 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
2012                 if (cur == NULL) {
2013                         DEBUG(DEBUG_CRIT, (__location__ " internal error iface[%s] unknown\n",
2014                                            vnn->ifaces[i]));
2015                         return -1;
2016                 }
2017                 if (vnn->iface == cur) {
2018                         info->active_idx = i;
2019                 }
2020                 strcpy(info->ifaces[i].name, cur->name);
2021                 info->ifaces[i].link_state = cur->link_up;
2022                 info->ifaces[i].references = cur->references;
2023         }
2024         info->num = i;
2025         len = offsetof(struct ctdb_control_public_ip_info, ifaces) +
2026                 i*sizeof(struct ctdb_control_iface_info);
2027
2028         outdata->dsize = len;
2029         outdata->dptr  = (uint8_t *)info;
2030
2031         return 0;
2032 }
2033
2034 int32_t ctdb_control_get_ifaces(struct ctdb_context *ctdb,
2035                                 struct ctdb_req_control *c,
2036                                 TDB_DATA *outdata)
2037 {
2038         int i, num, len;
2039         struct ctdb_control_get_ifaces *ifaces;
2040         struct ctdb_iface *cur;
2041
2042         /* count how many public ip structures we have */
2043         num = 0;
2044         for (cur=ctdb->ifaces;cur;cur=cur->next) {
2045                 num++;
2046         }
2047
2048         len = offsetof(struct ctdb_control_get_ifaces, ifaces) +
2049                 num*sizeof(struct ctdb_control_iface_info);
2050         ifaces = talloc_zero_size(outdata, len);
2051         CTDB_NO_MEMORY(ctdb, ifaces);
2052
2053         i = 0;
2054         for (cur=ctdb->ifaces;cur;cur=cur->next) {
2055                 strcpy(ifaces->ifaces[i].name, cur->name);
2056                 ifaces->ifaces[i].link_state = cur->link_up;
2057                 ifaces->ifaces[i].references = cur->references;
2058                 i++;
2059         }
2060         ifaces->num = i;
2061         len = offsetof(struct ctdb_control_get_ifaces, ifaces) +
2062                 i*sizeof(struct ctdb_control_iface_info);
2063
2064         outdata->dsize = len;
2065         outdata->dptr  = (uint8_t *)ifaces;
2066
2067         return 0;
2068 }
2069
2070 int32_t ctdb_control_set_iface_link(struct ctdb_context *ctdb,
2071                                     struct ctdb_req_control *c,
2072                                     TDB_DATA indata)
2073 {
2074         struct ctdb_control_iface_info *info;
2075         struct ctdb_iface *iface;
2076         bool link_up = false;
2077
2078         info = (struct ctdb_control_iface_info *)indata.dptr;
2079
2080         if (info->name[CTDB_IFACE_SIZE] != '\0') {
2081                 int len = strnlen(info->name, CTDB_IFACE_SIZE);
2082                 DEBUG(DEBUG_ERR, (__location__ " name[%*.*s] not terminated\n",
2083                                   len, len, info->name));
2084                 return -1;
2085         }
2086
2087         switch (info->link_state) {
2088         case 0:
2089                 link_up = false;
2090                 break;
2091         case 1:
2092                 link_up = true;
2093                 break;
2094         default:
2095                 DEBUG(DEBUG_ERR, (__location__ " link_state[%u] invalid\n",
2096                                   (unsigned int)info->link_state));
2097                 return -1;
2098         }
2099
2100         if (info->references != 0) {
2101                 DEBUG(DEBUG_ERR, (__location__ " references[%u] should be 0\n",
2102                                   (unsigned int)info->references));
2103                 return -1;
2104         }
2105
2106         iface = ctdb_find_iface(ctdb, info->name);
2107         if (iface == NULL) {
2108                 DEBUG(DEBUG_ERR, (__location__ "iface[%s] is unknown\n",
2109                                   info->name));
2110                 return -1;
2111         }
2112
2113         if (link_up == iface->link_up) {
2114                 return 0;
2115         }
2116
2117         DEBUG(iface->link_up?DEBUG_ERR:DEBUG_NOTICE,
2118               ("iface[%s] has changed it's link status %s => %s\n",
2119                iface->name,
2120                iface->link_up?"up":"down",
2121                link_up?"up":"down"));
2122
2123         iface->link_up = link_up;
2124         return 0;
2125 }
2126
2127
2128 /* 
2129    structure containing the listening socket and the list of tcp connections
2130    that the ctdb daemon is to kill
2131 */
2132 struct ctdb_kill_tcp {
2133         struct ctdb_vnn *vnn;
2134         struct ctdb_context *ctdb;
2135         int capture_fd;
2136         struct fd_event *fde;
2137         trbt_tree_t *connections;
2138         void *private_data;
2139 };
2140
2141 /*
2142   a tcp connection that is to be killed
2143  */
2144 struct ctdb_killtcp_con {
2145         ctdb_sock_addr src_addr;
2146         ctdb_sock_addr dst_addr;
2147         int count;
2148         struct ctdb_kill_tcp *killtcp;
2149 };
2150
2151 /* this function is used to create a key to represent this socketpair
2152    in the killtcp tree.
2153    this key is used to insert and lookup matching socketpairs that are
2154    to be tickled and RST
2155 */
2156 #define KILLTCP_KEYLEN  10
2157 static uint32_t *killtcp_key(ctdb_sock_addr *src, ctdb_sock_addr *dst)
2158 {
2159         static uint32_t key[KILLTCP_KEYLEN];
2160
2161         bzero(key, sizeof(key));
2162
2163         if (src->sa.sa_family != dst->sa.sa_family) {
2164                 DEBUG(DEBUG_ERR, (__location__ " ERROR, different families passed :%u vs %u\n", src->sa.sa_family, dst->sa.sa_family));
2165                 return key;
2166         }
2167         
2168         switch (src->sa.sa_family) {
2169         case AF_INET:
2170                 key[0]  = dst->ip.sin_addr.s_addr;
2171                 key[1]  = src->ip.sin_addr.s_addr;
2172                 key[2]  = dst->ip.sin_port;
2173                 key[3]  = src->ip.sin_port;
2174                 break;
2175         case AF_INET6:
2176                 key[0]  = dst->ip6.sin6_addr.s6_addr32[3];
2177                 key[1]  = src->ip6.sin6_addr.s6_addr32[3];
2178                 key[2]  = dst->ip6.sin6_addr.s6_addr32[2];
2179                 key[3]  = src->ip6.sin6_addr.s6_addr32[2];
2180                 key[4]  = dst->ip6.sin6_addr.s6_addr32[1];
2181                 key[5]  = src->ip6.sin6_addr.s6_addr32[1];
2182                 key[6]  = dst->ip6.sin6_addr.s6_addr32[0];
2183                 key[7]  = src->ip6.sin6_addr.s6_addr32[0];
2184                 key[8]  = dst->ip6.sin6_port;
2185                 key[9]  = src->ip6.sin6_port;
2186                 break;
2187         default:
2188                 DEBUG(DEBUG_ERR, (__location__ " ERROR, unknown family passed :%u\n", src->sa.sa_family));
2189                 return key;
2190         }
2191
2192         return key;
2193 }
2194
2195 /*
2196   called when we get a read event on the raw socket
2197  */
2198 static void capture_tcp_handler(struct event_context *ev, struct fd_event *fde, 
2199                                 uint16_t flags, void *private_data)
2200 {
2201         struct ctdb_kill_tcp *killtcp = talloc_get_type(private_data, struct ctdb_kill_tcp);
2202         struct ctdb_killtcp_con *con;
2203         ctdb_sock_addr src, dst;
2204         uint32_t ack_seq, seq;
2205
2206         if (!(flags & EVENT_FD_READ)) {
2207                 return;
2208         }
2209
2210         if (ctdb_sys_read_tcp_packet(killtcp->capture_fd,
2211                                 killtcp->private_data,
2212                                 &src, &dst,
2213                                 &ack_seq, &seq) != 0) {
2214                 /* probably a non-tcp ACK packet */
2215                 return;
2216         }
2217
2218         /* check if we have this guy in our list of connections
2219            to kill
2220         */
2221         con = trbt_lookuparray32(killtcp->connections, 
2222                         KILLTCP_KEYLEN, killtcp_key(&src, &dst));
2223         if (con == NULL) {
2224                 /* no this was some other packet we can just ignore */
2225                 return;
2226         }
2227
2228         /* This one has been tickled !
2229            now reset him and remove him from the list.
2230          */
2231         DEBUG(DEBUG_INFO, ("sending a tcp reset to kill connection :%d -> %s:%d\n",
2232                 ntohs(con->dst_addr.ip.sin_port),
2233                 ctdb_addr_to_str(&con->src_addr),
2234                 ntohs(con->src_addr.ip.sin_port)));
2235
2236         ctdb_sys_send_tcp(&con->dst_addr, &con->src_addr, ack_seq, seq, 1);
2237         talloc_free(con);
2238 }
2239
2240
2241 /* when traversing the list of all tcp connections to send tickle acks to
2242    (so that we can capture the ack coming back and kill the connection
2243     by a RST)
2244    this callback is called for each connection we are currently trying to kill
2245 */
2246 static void tickle_connection_traverse(void *param, void *data)
2247 {
2248         struct ctdb_killtcp_con *con = talloc_get_type(data, struct ctdb_killtcp_con);
2249
2250         /* have tried too many times, just give up */
2251         if (con->count >= 5) {
2252                 talloc_free(con);
2253                 return;
2254         }
2255
2256         /* othervise, try tickling it again */
2257         con->count++;
2258         ctdb_sys_send_tcp(
2259                 (ctdb_sock_addr *)&con->dst_addr,
2260                 (ctdb_sock_addr *)&con->src_addr,
2261                 0, 0, 0);
2262 }
2263
2264
2265 /* 
2266    called every second until all sentenced connections have been reset
2267  */
2268 static void ctdb_tickle_sentenced_connections(struct event_context *ev, struct timed_event *te, 
2269                                               struct timeval t, void *private_data)
2270 {
2271         struct ctdb_kill_tcp *killtcp = talloc_get_type(private_data, struct ctdb_kill_tcp);
2272
2273
2274         /* loop over all connections sending tickle ACKs */
2275         trbt_traversearray32(killtcp->connections, KILLTCP_KEYLEN, tickle_connection_traverse, NULL);
2276
2277
2278         /* If there are no more connections to kill we can remove the
2279            entire killtcp structure
2280          */
2281         if ( (killtcp->connections == NULL) || 
2282              (killtcp->connections->root == NULL) ) {
2283                 talloc_free(killtcp);
2284                 return;
2285         }
2286
2287         /* try tickling them again in a seconds time
2288          */
2289         event_add_timed(killtcp->ctdb->ev, killtcp, timeval_current_ofs(1, 0), 
2290                         ctdb_tickle_sentenced_connections, killtcp);
2291 }
2292
2293 /*
2294   destroy the killtcp structure
2295  */
2296 static int ctdb_killtcp_destructor(struct ctdb_kill_tcp *killtcp)
2297 {
2298         killtcp->vnn->killtcp = NULL;
2299         return 0;
2300 }
2301
2302
2303 /* nothing fancy here, just unconditionally replace any existing
2304    connection structure with the new one.
2305
2306    dont even free the old one if it did exist, that one is talloc_stolen
2307    by the same node in the tree anyway and will be deleted when the new data 
2308    is deleted
2309 */
2310 static void *add_killtcp_callback(void *parm, void *data)
2311 {
2312         return parm;
2313 }
2314
2315 /*
2316   add a tcp socket to the list of connections we want to RST
2317  */
2318 static int ctdb_killtcp_add_connection(struct ctdb_context *ctdb, 
2319                                        ctdb_sock_addr *s,
2320                                        ctdb_sock_addr *d)
2321 {
2322         ctdb_sock_addr src, dst;
2323         struct ctdb_kill_tcp *killtcp;
2324         struct ctdb_killtcp_con *con;
2325         struct ctdb_vnn *vnn;
2326
2327         ctdb_canonicalize_ip(s, &src);
2328         ctdb_canonicalize_ip(d, &dst);
2329
2330         vnn = find_public_ip_vnn(ctdb, &dst);
2331         if (vnn == NULL) {
2332                 vnn = find_public_ip_vnn(ctdb, &src);
2333         }
2334         if (vnn == NULL) {
2335                 /* if it is not a public ip   it could be our 'single ip' */
2336                 if (ctdb->single_ip_vnn) {
2337                         if (ctdb_same_ip(&ctdb->single_ip_vnn->public_address, &dst)) {
2338                                 vnn = ctdb->single_ip_vnn;
2339                         }
2340                 }
2341         }
2342         if (vnn == NULL) {
2343                 DEBUG(DEBUG_ERR,(__location__ " Could not killtcp, not a public address\n")); 
2344                 return -1;
2345         }
2346
2347         killtcp = vnn->killtcp;
2348         
2349         /* If this is the first connection to kill we must allocate
2350            a new structure
2351          */
2352         if (killtcp == NULL) {
2353                 killtcp = talloc_zero(ctdb, struct ctdb_kill_tcp);
2354                 CTDB_NO_MEMORY(ctdb, killtcp);
2355
2356                 killtcp->vnn         = vnn;
2357                 killtcp->ctdb        = ctdb;
2358                 killtcp->capture_fd  = -1;
2359                 killtcp->connections = trbt_create(killtcp, 0);
2360
2361                 vnn->killtcp         = killtcp;
2362                 talloc_set_destructor(killtcp, ctdb_killtcp_destructor);
2363         }
2364
2365
2366
2367         /* create a structure that describes this connection we want to
2368            RST and store it in killtcp->connections
2369         */
2370         con = talloc(killtcp, struct ctdb_killtcp_con);
2371         CTDB_NO_MEMORY(ctdb, con);
2372         con->src_addr = src;
2373         con->dst_addr = dst;
2374         con->count    = 0;
2375         con->killtcp  = killtcp;
2376
2377
2378         trbt_insertarray32_callback(killtcp->connections,
2379                         KILLTCP_KEYLEN, killtcp_key(&con->dst_addr, &con->src_addr),
2380                         add_killtcp_callback, con);
2381
2382         /* 
2383            If we dont have a socket to listen on yet we must create it
2384          */
2385         if (killtcp->capture_fd == -1) {
2386                 const char *iface = ctdb_vnn_iface_string(vnn);
2387                 killtcp->capture_fd = ctdb_sys_open_capture_socket(iface, &killtcp->private_data);
2388                 if (killtcp->capture_fd == -1) {
2389                         DEBUG(DEBUG_CRIT,(__location__ " Failed to open capturing "
2390                                           "socket on iface '%s' for killtcp (%s)\n",
2391                                           iface, strerror(errno)));
2392                         goto failed;
2393                 }
2394         }
2395
2396
2397         if (killtcp->fde == NULL) {
2398                 killtcp->fde = event_add_fd(ctdb->ev, killtcp, killtcp->capture_fd, 
2399                                             EVENT_FD_READ | EVENT_FD_AUTOCLOSE, 
2400                                             capture_tcp_handler, killtcp);
2401
2402                 /* We also need to set up some events to tickle all these connections
2403                    until they are all reset
2404                 */
2405                 event_add_timed(ctdb->ev, killtcp, timeval_current_ofs(1, 0), 
2406                                 ctdb_tickle_sentenced_connections, killtcp);
2407         }
2408
2409         /* tickle him once now */
2410         ctdb_sys_send_tcp(
2411                 &con->dst_addr,
2412                 &con->src_addr,
2413                 0, 0, 0);
2414
2415         return 0;
2416
2417 failed:
2418         talloc_free(vnn->killtcp);
2419         vnn->killtcp = NULL;
2420         return -1;
2421 }
2422
2423 /*
2424   kill a TCP connection.
2425  */
2426 int32_t ctdb_control_kill_tcp(struct ctdb_context *ctdb, TDB_DATA indata)
2427 {
2428         struct ctdb_control_killtcp *killtcp = (struct ctdb_control_killtcp *)indata.dptr;
2429
2430         return ctdb_killtcp_add_connection(ctdb, &killtcp->src_addr, &killtcp->dst_addr);
2431 }
2432
2433 /*
2434   called by a daemon to inform us of the entire list of TCP tickles for
2435   a particular public address.
2436   this control should only be sent by the node that is currently serving
2437   that public address.
2438  */
2439 int32_t ctdb_control_set_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata)
2440 {
2441         struct ctdb_control_tcp_tickle_list *list = (struct ctdb_control_tcp_tickle_list *)indata.dptr;
2442         struct ctdb_tcp_array *tcparray;
2443         struct ctdb_vnn *vnn;
2444
2445         /* We must at least have tickles.num or else we cant verify the size
2446            of the received data blob
2447          */
2448         if (indata.dsize < offsetof(struct ctdb_control_tcp_tickle_list, 
2449                                         tickles.connections)) {
2450                 DEBUG(DEBUG_ERR,("Bad indata in ctdb_control_set_tcp_tickle_list. Not enough data for the tickle.num field\n"));
2451                 return -1;
2452         }
2453
2454         /* verify that the size of data matches what we expect */
2455         if (indata.dsize < offsetof(struct ctdb_control_tcp_tickle_list, 
2456                                 tickles.connections)
2457                          + sizeof(struct ctdb_tcp_connection)
2458                                  * list->tickles.num) {
2459                 DEBUG(DEBUG_ERR,("Bad indata in ctdb_control_set_tcp_tickle_list\n"));
2460                 return -1;
2461         }       
2462
2463         vnn = find_public_ip_vnn(ctdb, &list->addr);
2464         if (vnn == NULL) {
2465                 DEBUG(DEBUG_INFO,(__location__ " Could not set tcp tickle list, '%s' is not a public address\n", 
2466                         ctdb_addr_to_str(&list->addr)));
2467
2468                 return 1;
2469         }
2470
2471         /* remove any old ticklelist we might have */
2472         talloc_free(vnn->tcp_array);
2473         vnn->tcp_array = NULL;
2474
2475         tcparray = talloc(ctdb->nodes, struct ctdb_tcp_array);
2476         CTDB_NO_MEMORY(ctdb, tcparray);
2477
2478         tcparray->num = list->tickles.num;
2479
2480         tcparray->connections = talloc_array(tcparray, struct ctdb_tcp_connection, tcparray->num);
2481         CTDB_NO_MEMORY(ctdb, tcparray->connections);
2482
2483         memcpy(tcparray->connections, &list->tickles.connections[0], 
2484                sizeof(struct ctdb_tcp_connection)*tcparray->num);
2485
2486         /* We now have a new fresh tickle list array for this vnn */
2487         vnn->tcp_array = talloc_steal(vnn, tcparray);
2488         
2489         return 0;
2490 }
2491
2492 /*
2493   called to return the full list of tickles for the puclic address associated 
2494   with the provided vnn
2495  */
2496 int32_t ctdb_control_get_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata)
2497 {
2498         ctdb_sock_addr *addr = (ctdb_sock_addr *)indata.dptr;
2499         struct ctdb_control_tcp_tickle_list *list;
2500         struct ctdb_tcp_array *tcparray;
2501         int num;
2502         struct ctdb_vnn *vnn;
2503
2504         vnn = find_public_ip_vnn(ctdb, addr);
2505         if (vnn == NULL) {
2506                 DEBUG(DEBUG_ERR,(__location__ " Could not get tcp tickle list, '%s' is not a public address\n", 
2507                         ctdb_addr_to_str(addr)));
2508
2509                 return 1;
2510         }
2511
2512         tcparray = vnn->tcp_array;
2513         if (tcparray) {
2514                 num = tcparray->num;
2515         } else {
2516                 num = 0;
2517         }
2518
2519         outdata->dsize = offsetof(struct ctdb_control_tcp_tickle_list, 
2520                                 tickles.connections)
2521                         + sizeof(struct ctdb_tcp_connection) * num;
2522
2523         outdata->dptr  = talloc_size(outdata, outdata->dsize);
2524         CTDB_NO_MEMORY(ctdb, outdata->dptr);
2525         list = (struct ctdb_control_tcp_tickle_list *)outdata->dptr;
2526
2527         list->addr = *addr;
2528         list->tickles.num = num;
2529         if (num) {
2530                 memcpy(&list->tickles.connections[0], tcparray->connections, 
2531                         sizeof(struct ctdb_tcp_connection) * num);
2532         }
2533
2534         return 0;
2535 }
2536
2537
2538 /*
2539   set the list of all tcp tickles for a public address
2540  */
2541 static int ctdb_ctrl_set_tcp_tickles(struct ctdb_context *ctdb, 
2542                               struct timeval timeout, uint32_t destnode, 
2543                               ctdb_sock_addr *addr,
2544                               struct ctdb_tcp_array *tcparray)
2545 {
2546         int ret, num;
2547         TDB_DATA data;
2548         struct ctdb_control_tcp_tickle_list *list;
2549
2550         if (tcparray) {
2551                 num = tcparray->num;
2552         } else {
2553                 num = 0;
2554         }
2555
2556         data.dsize = offsetof(struct ctdb_control_tcp_tickle_list, 
2557                                 tickles.connections) +
2558                         sizeof(struct ctdb_tcp_connection) * num;
2559         data.dptr = talloc_size(ctdb, data.dsize);
2560         CTDB_NO_MEMORY(ctdb, data.dptr);
2561
2562         list = (struct ctdb_control_tcp_tickle_list *)data.dptr;
2563         list->addr = *addr;
2564         list->tickles.num = num;
2565         if (tcparray) {
2566                 memcpy(&list->tickles.connections[0], tcparray->connections, sizeof(struct ctdb_tcp_connection) * num);
2567         }
2568
2569         ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0, 
2570                                        CTDB_CONTROL_SET_TCP_TICKLE_LIST,
2571                                        0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
2572         if (ret != 0) {
2573                 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set tcp tickles failed\n"));
2574                 return -1;
2575         }
2576
2577         talloc_free(data.dptr);
2578
2579         return ret;
2580 }
2581
2582
2583 /*
2584   perform tickle updates if required
2585  */
2586 static void ctdb_update_tcp_tickles(struct event_context *ev, 
2587                                 struct timed_event *te, 
2588                                 struct timeval t, void *private_data)
2589 {
2590         struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
2591         int ret;
2592         struct ctdb_vnn *vnn;
2593
2594         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2595                 /* we only send out updates for public addresses that 
2596                    we have taken over
2597                  */
2598                 if (ctdb->pnn != vnn->pnn) {
2599                         continue;
2600                 }
2601                 /* We only send out the updates if we need to */
2602                 if (!vnn->tcp_update_needed) {
2603                         continue;
2604                 }
2605                 ret = ctdb_ctrl_set_tcp_tickles(ctdb, 
2606                                 TAKEOVER_TIMEOUT(),
2607                                 CTDB_BROADCAST_CONNECTED,
2608                                 &vnn->public_address,
2609                                 vnn->tcp_array);
2610                 if (ret != 0) {
2611                         DEBUG(DEBUG_ERR,("Failed to send the tickle update for public address %s\n",
2612                                 ctdb_addr_to_str(&vnn->public_address)));
2613                 }
2614         }
2615
2616         event_add_timed(ctdb->ev, ctdb->tickle_update_context,
2617                              timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0), 
2618                              ctdb_update_tcp_tickles, ctdb);
2619 }               
2620         
2621
2622 /*
2623   start periodic update of tcp tickles
2624  */
2625 void ctdb_start_tcp_tickle_update(struct ctdb_context *ctdb)
2626 {
2627         ctdb->tickle_update_context = talloc_new(ctdb);
2628
2629         event_add_timed(ctdb->ev, ctdb->tickle_update_context,
2630                              timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0), 
2631                              ctdb_update_tcp_tickles, ctdb);
2632 }
2633
2634
2635
2636
2637 struct control_gratious_arp {
2638         struct ctdb_context *ctdb;
2639         ctdb_sock_addr addr;
2640         const char *iface;
2641         int count;
2642 };
2643
2644 /*
2645   send a control_gratuitous arp
2646  */
2647 static void send_gratious_arp(struct event_context *ev, struct timed_event *te, 
2648                                   struct timeval t, void *private_data)
2649 {
2650         int ret;
2651         struct control_gratious_arp *arp = talloc_get_type(private_data, 
2652                                                         struct control_gratious_arp);
2653
2654         ret = ctdb_sys_send_arp(&arp->addr, arp->iface);
2655         if (ret != 0) {
2656                 DEBUG(DEBUG_ERR,(__location__ " sending of gratious arp on iface '%s' failed (%s)\n",
2657                                  arp->iface, strerror(errno)));
2658         }
2659
2660
2661         arp->count++;
2662         if (arp->count == CTDB_ARP_REPEAT) {
2663                 talloc_free(arp);
2664                 return;
2665         }
2666
2667         event_add_timed(arp->ctdb->ev, arp, 
2668                         timeval_current_ofs(CTDB_ARP_INTERVAL, 0), 
2669                         send_gratious_arp, arp);
2670 }
2671
2672
2673 /*
2674   send a gratious arp 
2675  */
2676 int32_t ctdb_control_send_gratious_arp(struct ctdb_context *ctdb, TDB_DATA indata)
2677 {
2678         struct ctdb_control_gratious_arp *gratious_arp = (struct ctdb_control_gratious_arp *)indata.dptr;
2679         struct control_gratious_arp *arp;
2680
2681         /* verify the size of indata */
2682         if (indata.dsize < offsetof(struct ctdb_control_gratious_arp, iface)) {
2683                 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_gratious_arp structure. Got %u require %u bytes\n", 
2684                                  (unsigned)indata.dsize, 
2685                                  (unsigned)offsetof(struct ctdb_control_gratious_arp, iface)));
2686                 return -1;
2687         }
2688         if (indata.dsize != 
2689                 ( offsetof(struct ctdb_control_gratious_arp, iface)
2690                 + gratious_arp->len ) ){
2691
2692                 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2693                         "but should be %u bytes\n", 
2694                          (unsigned)indata.dsize, 
2695                          (unsigned)(offsetof(struct ctdb_control_gratious_arp, iface)+gratious_arp->len)));
2696                 return -1;
2697         }
2698
2699
2700         arp = talloc(ctdb, struct control_gratious_arp);
2701         CTDB_NO_MEMORY(ctdb, arp);
2702
2703         arp->ctdb  = ctdb;
2704         arp->addr   = gratious_arp->addr;
2705         arp->iface = talloc_strdup(arp, gratious_arp->iface);
2706         CTDB_NO_MEMORY(ctdb, arp->iface);
2707         arp->count = 0;
2708         
2709         event_add_timed(arp->ctdb->ev, arp, 
2710                         timeval_zero(), send_gratious_arp, arp);
2711
2712         return 0;
2713 }
2714
2715 int32_t ctdb_control_add_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2716 {
2717         struct ctdb_control_ip_iface *pub = (struct ctdb_control_ip_iface *)indata.dptr;
2718         int ret;
2719
2720         /* verify the size of indata */
2721         if (indata.dsize < offsetof(struct ctdb_control_ip_iface, iface)) {
2722                 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_ip_iface structure\n"));
2723                 return -1;
2724         }
2725         if (indata.dsize != 
2726                 ( offsetof(struct ctdb_control_ip_iface, iface)
2727                 + pub->len ) ){
2728
2729                 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2730                         "but should be %u bytes\n", 
2731                          (unsigned)indata.dsize, 
2732                          (unsigned)(offsetof(struct ctdb_control_ip_iface, iface)+pub->len)));
2733                 return -1;
2734         }
2735
2736         ret = ctdb_add_public_address(ctdb, &pub->addr, pub->mask, &pub->iface[0]);
2737
2738         if (ret != 0) {
2739                 DEBUG(DEBUG_ERR,(__location__ " Failed to add public address\n"));
2740                 return -1;
2741         }
2742
2743         return 0;
2744 }
2745
2746 /*
2747   called when releaseip event finishes for del_public_address
2748  */
2749 static void delete_ip_callback(struct ctdb_context *ctdb, int status, 
2750                                 void *private_data)
2751 {
2752         talloc_free(private_data);
2753 }
2754
2755 int32_t ctdb_control_del_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2756 {
2757         struct ctdb_control_ip_iface *pub = (struct ctdb_control_ip_iface *)indata.dptr;
2758         struct ctdb_vnn *vnn;
2759         int ret;
2760
2761         /* verify the size of indata */
2762         if (indata.dsize < offsetof(struct ctdb_control_ip_iface, iface)) {
2763                 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_ip_iface structure\n"));
2764                 return -1;
2765         }
2766         if (indata.dsize != 
2767                 ( offsetof(struct ctdb_control_ip_iface, iface)
2768                 + pub->len ) ){
2769
2770                 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2771                         "but should be %u bytes\n", 
2772                          (unsigned)indata.dsize, 
2773                          (unsigned)(offsetof(struct ctdb_control_ip_iface, iface)+pub->len)));
2774                 return -1;
2775         }
2776
2777         /* walk over all public addresses until we find a match */
2778         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2779                 if (ctdb_same_ip(&vnn->public_address, &pub->addr)) {
2780                         TALLOC_CTX *mem_ctx;
2781
2782                         DLIST_REMOVE(ctdb->vnn, vnn);
2783                         if (vnn->iface == NULL) {
2784                                 talloc_free(vnn);
2785                                 return 0;
2786                         }
2787
2788                         mem_ctx = talloc_new(ctdb);
2789                         ret = ctdb_event_script_callback(ctdb, 
2790                                          mem_ctx, delete_ip_callback, mem_ctx,
2791                                          false,
2792                                          CTDB_EVENT_RELEASE_IP,
2793                                          "%s %s %u",
2794                                          ctdb_vnn_iface_string(vnn),
2795                                          ctdb_addr_to_str(&vnn->public_address),
2796                                          vnn->public_netmask_bits);
2797                         ctdb_vnn_unassign_iface(ctdb, vnn);
2798                         talloc_free(vnn);
2799                         if (ret != 0) {
2800                                 return -1;
2801                         }
2802                         return 0;
2803                 }
2804         }
2805
2806         return -1;
2807 }
2808