Merge commit 'rusty/script-report'
[sahlberg/ctdb.git] / server / ctdb_takeover.c
1 /* 
2    ctdb ip takeover code
3
4    Copyright (C) Ronnie Sahlberg  2007
5    Copyright (C) Andrew Tridgell  2007
6
7    This program is free software; you can redistribute it and/or modify
8    it under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 3 of the License, or
10    (at your option) any later version.
11    
12    This program is distributed in the hope that it will be useful,
13    but WITHOUT ANY WARRANTY; without even the implied warranty of
14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15    GNU General Public License for more details.
16    
17    You should have received a copy of the GNU General Public License
18    along with this program; if not, see <http://www.gnu.org/licenses/>.
19 */
20 #include "includes.h"
21 #include "lib/events/events.h"
22 #include "lib/tdb/include/tdb.h"
23 #include "lib/util/dlinklist.h"
24 #include "system/network.h"
25 #include "system/filesys.h"
26 #include "system/wait.h"
27 #include "../include/ctdb_private.h"
28 #include "../common/rb_tree.h"
29
30
31 #define TAKEOVER_TIMEOUT() timeval_current_ofs(ctdb->tunable.takeover_timeout,0)
32
33 #define CTDB_ARP_INTERVAL 1
34 #define CTDB_ARP_REPEAT   3
35
36 struct ctdb_takeover_arp {
37         struct ctdb_context *ctdb;
38         uint32_t count;
39         ctdb_sock_addr addr;
40         struct ctdb_tcp_array *tcparray;
41         struct ctdb_vnn *vnn;
42 };
43
44
45 /*
46   lists of tcp endpoints
47  */
48 struct ctdb_tcp_list {
49         struct ctdb_tcp_list *prev, *next;
50         struct ctdb_tcp_connection connection;
51 };
52
53 /*
54   list of clients to kill on IP release
55  */
56 struct ctdb_client_ip {
57         struct ctdb_client_ip *prev, *next;
58         struct ctdb_context *ctdb;
59         ctdb_sock_addr addr;
60         uint32_t client_id;
61 };
62
63
64 /*
65   send a gratuitous arp
66  */
67 static void ctdb_control_send_arp(struct event_context *ev, struct timed_event *te, 
68                                   struct timeval t, void *private_data)
69 {
70         struct ctdb_takeover_arp *arp = talloc_get_type(private_data, 
71                                                         struct ctdb_takeover_arp);
72         int i, ret;
73         struct ctdb_tcp_array *tcparray;
74
75         ret = ctdb_sys_send_arp(&arp->addr, arp->vnn->iface);
76         if (ret != 0) {
77                 DEBUG(DEBUG_CRIT,(__location__ " sending of arp failed (%s)\n", strerror(errno)));
78         }
79
80         tcparray = arp->tcparray;
81         if (tcparray) {
82                 for (i=0;i<tcparray->num;i++) {
83                         struct ctdb_tcp_connection *tcon;
84
85                         tcon = &tcparray->connections[i];
86                         DEBUG(DEBUG_INFO,("sending tcp tickle ack for %u->%s:%u\n",
87                                 (unsigned)ntohs(tcon->dst_addr.ip.sin_port), 
88                                 ctdb_addr_to_str(&tcon->src_addr),
89                                 (unsigned)ntohs(tcon->src_addr.ip.sin_port)));
90                         ret = ctdb_sys_send_tcp(
91                                 &tcon->src_addr, 
92                                 &tcon->dst_addr,
93                                 0, 0, 0);
94                         if (ret != 0) {
95                                 DEBUG(DEBUG_CRIT,(__location__ " Failed to send tcp tickle ack for %s\n",
96                                         ctdb_addr_to_str(&tcon->src_addr)));
97                         }
98                 }
99         }
100
101         arp->count++;
102
103         if (arp->count == CTDB_ARP_REPEAT) {
104                 talloc_free(arp);
105                 return;
106         }
107
108         event_add_timed(arp->ctdb->ev, arp->vnn->takeover_ctx, 
109                         timeval_current_ofs(CTDB_ARP_INTERVAL, 100000), 
110                         ctdb_control_send_arp, arp);
111 }
112
113 struct takeover_callback_state {
114         struct ctdb_req_control *c;
115         ctdb_sock_addr *addr;
116         struct ctdb_vnn *vnn;
117 };
118
119 /*
120   called when takeip event finishes
121  */
122 static void takeover_ip_callback(struct ctdb_context *ctdb, int status, 
123                                  void *private_data)
124 {
125         struct takeover_callback_state *state = 
126                 talloc_get_type(private_data, struct takeover_callback_state);
127         struct ctdb_takeover_arp *arp;
128         struct ctdb_tcp_array *tcparray;
129
130         if (status != 0) {
131                 if (status == -ETIME) {
132                         ctdb_ban_self(ctdb);
133                 }
134                 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
135                         ctdb_addr_to_str(state->addr),
136                         state->vnn->iface));
137                 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
138                 talloc_free(state);
139                 return;
140         }
141
142         if (!state->vnn->takeover_ctx) {
143                 state->vnn->takeover_ctx = talloc_new(state->vnn);
144                 if (!state->vnn->takeover_ctx) {
145                         goto failed;
146                 }
147         }
148
149         arp = talloc_zero(state->vnn->takeover_ctx, struct ctdb_takeover_arp);
150         if (!arp) goto failed;
151         
152         arp->ctdb = ctdb;
153         arp->addr = *state->addr;
154         arp->vnn  = state->vnn;
155
156         tcparray = state->vnn->tcp_array;
157         if (tcparray) {
158                 /* add all of the known tcp connections for this IP to the
159                    list of tcp connections to send tickle acks for */
160                 arp->tcparray = talloc_steal(arp, tcparray);
161
162                 state->vnn->tcp_array = NULL;
163                 state->vnn->tcp_update_needed = true;
164         }
165
166         event_add_timed(arp->ctdb->ev, state->vnn->takeover_ctx, 
167                         timeval_zero(), ctdb_control_send_arp, arp);
168
169         /* the control succeeded */
170         ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
171         talloc_free(state);
172         return;
173
174 failed:
175         ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
176         talloc_free(state);
177         return;
178 }
179
180 /*
181   Find the vnn of the node that has a public ip address
182   returns -1 if the address is not known as a public address
183  */
184 static struct ctdb_vnn *find_public_ip_vnn(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
185 {
186         struct ctdb_vnn *vnn;
187
188         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
189                 if (ctdb_same_ip(&vnn->public_address, addr)) {
190                         return vnn;
191                 }
192         }
193
194         return NULL;
195 }
196
197
198 /*
199   take over an ip address
200  */
201 int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb, 
202                                  struct ctdb_req_control *c,
203                                  TDB_DATA indata, 
204                                  bool *async_reply)
205 {
206         int ret;
207         struct takeover_callback_state *state;
208         struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
209         struct ctdb_vnn *vnn;
210
211         /* update out vnn list */
212         vnn = find_public_ip_vnn(ctdb, &pip->addr);
213         if (vnn == NULL) {
214                 DEBUG(DEBUG_INFO,("takeoverip called for an ip '%s' that is not a public address\n", 
215                         ctdb_addr_to_str(&pip->addr)));
216                 return 0;
217         }
218         vnn->pnn = pip->pnn;
219
220         /* if our kernel already has this IP, do nothing */
221         if (ctdb_sys_have_ip(&pip->addr)) {
222                 return 0;
223         }
224
225         state = talloc(vnn, struct takeover_callback_state);
226         CTDB_NO_MEMORY(ctdb, state);
227
228         state->c = talloc_steal(ctdb, c);
229         state->addr = talloc(ctdb, ctdb_sock_addr);
230         CTDB_NO_MEMORY(ctdb, state->addr);
231
232         *state->addr = pip->addr;
233         state->vnn   = vnn;
234
235         DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u on interface %s\n", 
236                 ctdb_addr_to_str(&pip->addr),
237                 vnn->public_netmask_bits, 
238                 vnn->iface));
239
240         ret = ctdb_event_script_callback(ctdb, 
241                                          state, takeover_ip_callback, state,
242                                          false,
243                                          CTDB_EVENT_TAKE_IP,
244                                          "%s %s %u",
245                                          vnn->iface, 
246                                          talloc_strdup(state, ctdb_addr_to_str(&pip->addr)),
247                                          vnn->public_netmask_bits);
248
249         if (ret != 0) {
250                 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
251                         ctdb_addr_to_str(&pip->addr),
252                         vnn->iface));
253                 talloc_free(state);
254                 return -1;
255         }
256
257         /* tell ctdb_control.c that we will be replying asynchronously */
258         *async_reply = true;
259
260         return 0;
261 }
262
263 /*
264   takeover an ip address old v4 style
265  */
266 int32_t ctdb_control_takeover_ipv4(struct ctdb_context *ctdb, 
267                                 struct ctdb_req_control *c,
268                                 TDB_DATA indata, 
269                                 bool *async_reply)
270 {
271         TDB_DATA data;
272         
273         data.dsize = sizeof(struct ctdb_public_ip);
274         data.dptr  = (uint8_t *)talloc_zero(c, struct ctdb_public_ip);
275         CTDB_NO_MEMORY(ctdb, data.dptr);
276         
277         memcpy(data.dptr, indata.dptr, indata.dsize);
278         return ctdb_control_takeover_ip(ctdb, c, data, async_reply);
279 }
280
281 /*
282   kill any clients that are registered with a IP that is being released
283  */
284 static void release_kill_clients(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
285 {
286         struct ctdb_client_ip *ip;
287
288         DEBUG(DEBUG_INFO,("release_kill_clients for ip %s\n",
289                 ctdb_addr_to_str(addr)));
290
291         for (ip=ctdb->client_ip_list; ip; ip=ip->next) {
292                 ctdb_sock_addr tmp_addr;
293
294                 tmp_addr = ip->addr;
295                 DEBUG(DEBUG_INFO,("checking for client %u with IP %s\n", 
296                         ip->client_id,
297                         ctdb_addr_to_str(&ip->addr)));
298
299                 if (ctdb_same_ip(&tmp_addr, addr)) {
300                         struct ctdb_client *client = ctdb_reqid_find(ctdb, 
301                                                                      ip->client_id, 
302                                                                      struct ctdb_client);
303                         DEBUG(DEBUG_INFO,("matched client %u with IP %s and pid %u\n", 
304                                 ip->client_id,
305                                 ctdb_addr_to_str(&ip->addr),
306                                 client->pid));
307
308                         if (client->pid != 0) {
309                                 DEBUG(DEBUG_INFO,(__location__ " Killing client pid %u for IP %s on client_id %u\n",
310                                         (unsigned)client->pid,
311                                         ctdb_addr_to_str(addr),
312                                         ip->client_id));
313                                 kill(client->pid, SIGKILL);
314                         }
315                 }
316         }
317 }
318
319 /*
320   called when releaseip event finishes
321  */
322 static void release_ip_callback(struct ctdb_context *ctdb, int status, 
323                                 void *private_data)
324 {
325         struct takeover_callback_state *state = 
326                 talloc_get_type(private_data, struct takeover_callback_state);
327         TDB_DATA data;
328
329         if (status == -ETIME) {
330                 ctdb_ban_self(ctdb);
331         }
332
333         /* send a message to all clients of this node telling them
334            that the cluster has been reconfigured and they should
335            release any sockets on this IP */
336         data.dptr = (uint8_t *)talloc_strdup(state, ctdb_addr_to_str(state->addr));
337         CTDB_NO_MEMORY_VOID(ctdb, data.dptr);
338         data.dsize = strlen((char *)data.dptr)+1;
339
340         DEBUG(DEBUG_INFO,(__location__ " sending RELEASE_IP for '%s'\n", data.dptr));
341
342         ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_RELEASE_IP, data);
343
344         /* kill clients that have registered with this IP */
345         release_kill_clients(ctdb, state->addr);
346         
347         /* the control succeeded */
348         ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
349         talloc_free(state);
350 }
351
352 /*
353   release an ip address
354  */
355 int32_t ctdb_control_release_ip(struct ctdb_context *ctdb, 
356                                 struct ctdb_req_control *c,
357                                 TDB_DATA indata, 
358                                 bool *async_reply)
359 {
360         int ret;
361         struct takeover_callback_state *state;
362         struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
363         struct ctdb_vnn *vnn;
364
365         /* update our vnn list */
366         vnn = find_public_ip_vnn(ctdb, &pip->addr);
367         if (vnn == NULL) {
368                 DEBUG(DEBUG_INFO,("releaseip called for an ip '%s' that is not a public address\n",
369                         ctdb_addr_to_str(&pip->addr)));
370                 return 0;
371         }
372         vnn->pnn = pip->pnn;
373
374         /* stop any previous arps */
375         talloc_free(vnn->takeover_ctx);
376         vnn->takeover_ctx = NULL;
377
378         if (!ctdb_sys_have_ip(&pip->addr)) {
379                 DEBUG(DEBUG_NOTICE,("Redundant release of IP %s/%u on interface %s (ip not held)\n", 
380                         ctdb_addr_to_str(&pip->addr),
381                         vnn->public_netmask_bits, 
382                         vnn->iface));
383                 return 0;
384         }
385
386         DEBUG(DEBUG_NOTICE,("Release of IP %s/%u on interface %s  node:%u\n", 
387                 ctdb_addr_to_str(&pip->addr),
388                 vnn->public_netmask_bits, 
389                 vnn->iface,
390                 pip->pnn));
391
392         state = talloc(ctdb, struct takeover_callback_state);
393         CTDB_NO_MEMORY(ctdb, state);
394
395         state->c = talloc_steal(state, c);
396         state->addr = talloc(state, ctdb_sock_addr);       
397         CTDB_NO_MEMORY(ctdb, state->addr);
398         *state->addr = pip->addr;
399         state->vnn   = vnn;
400
401         ret = ctdb_event_script_callback(ctdb, 
402                                          state, release_ip_callback, state,
403                                          false,
404                                          CTDB_EVENT_RELEASE_IP,
405                                          "%s %s %u",
406                                          vnn->iface, 
407                                          talloc_strdup(state, ctdb_addr_to_str(&pip->addr)),
408                                          vnn->public_netmask_bits);
409         if (ret != 0) {
410                 DEBUG(DEBUG_ERR,(__location__ " Failed to release IP %s on interface %s\n",
411                         ctdb_addr_to_str(&pip->addr),
412                         vnn->iface));
413                 talloc_free(state);
414                 return -1;
415         }
416
417         /* tell the control that we will be reply asynchronously */
418         *async_reply = true;
419         return 0;
420 }
421
422 /*
423   release an ip address old v4 style
424  */
425 int32_t ctdb_control_release_ipv4(struct ctdb_context *ctdb, 
426                                 struct ctdb_req_control *c,
427                                 TDB_DATA indata, 
428                                 bool *async_reply)
429 {
430         TDB_DATA data;
431         
432         data.dsize = sizeof(struct ctdb_public_ip);
433         data.dptr  = (uint8_t *)talloc_zero(c, struct ctdb_public_ip);
434         CTDB_NO_MEMORY(ctdb, data.dptr);
435         
436         memcpy(data.dptr, indata.dptr, indata.dsize);
437         return ctdb_control_release_ip(ctdb, c, data, async_reply);
438 }
439
440
441 static int ctdb_add_public_address(struct ctdb_context *ctdb, ctdb_sock_addr *addr, unsigned mask, const char *iface)
442 {
443         struct ctdb_vnn      *vnn;
444
445         /* Verify that we dont have an entry for this ip yet */
446         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
447                 if (ctdb_same_sockaddr(addr, &vnn->public_address)) {
448                         DEBUG(DEBUG_CRIT,("Same ip '%s' specified multiple times in the public address list \n", 
449                                 ctdb_addr_to_str(addr)));
450                         return -1;
451                 }               
452         }
453
454         /* create a new vnn structure for this ip address */
455         vnn = talloc_zero(ctdb, struct ctdb_vnn);
456         CTDB_NO_MEMORY_FATAL(ctdb, vnn);
457         vnn->iface = talloc_strdup(vnn, iface);
458         CTDB_NO_MEMORY(ctdb, vnn->iface);
459         vnn->public_address      = *addr;
460         vnn->public_netmask_bits = mask;
461         vnn->pnn                 = -1;
462         
463         DLIST_ADD(ctdb->vnn, vnn);
464
465         return 0;
466 }
467
468
469 /*
470   setup the event script directory
471 */
472 int ctdb_set_event_script_dir(struct ctdb_context *ctdb, const char *script_dir)
473 {
474         ctdb->event_script_dir = talloc_strdup(ctdb, script_dir);
475         CTDB_NO_MEMORY(ctdb, ctdb->event_script_dir);
476         return 0;
477 }
478
479 /*
480   setup the public address lists from a file
481 */
482 int ctdb_set_public_addresses(struct ctdb_context *ctdb, const char *alist)
483 {
484         char **lines;
485         int nlines;
486         int i;
487
488         lines = file_lines_load(alist, &nlines, ctdb);
489         if (lines == NULL) {
490                 ctdb_set_error(ctdb, "Failed to load public address list '%s'\n", alist);
491                 return -1;
492         }
493         while (nlines > 0 && strcmp(lines[nlines-1], "") == 0) {
494                 nlines--;
495         }
496
497         for (i=0;i<nlines;i++) {
498                 unsigned mask;
499                 ctdb_sock_addr addr;
500                 const char *addrstr;
501                 const char *iface;
502                 char *tok, *line;
503
504                 line = lines[i];
505                 while ((*line == ' ') || (*line == '\t')) {
506                         line++;
507                 }
508                 if (*line == '#') {
509                         continue;
510                 }
511                 if (strcmp(line, "") == 0) {
512                         continue;
513                 }
514                 tok = strtok(line, " \t");
515                 addrstr = tok;
516                 tok = strtok(NULL, " \t");
517                 if (tok == NULL) {
518                         if (NULL == ctdb->default_public_interface) {
519                                 DEBUG(DEBUG_CRIT,("No default public interface and no interface specified at line %u of public address list\n",
520                                          i+1));
521                                 talloc_free(lines);
522                                 return -1;
523                         }
524                         iface = ctdb->default_public_interface;
525                 } else {
526                         iface = tok;
527                 }
528
529                 if (!addrstr || !parse_ip_mask(addrstr, iface, &addr, &mask)) {
530                         DEBUG(DEBUG_CRIT,("Badly formed line %u in public address list\n", i+1));
531                         talloc_free(lines);
532                         return -1;
533                 }
534                 if (ctdb_add_public_address(ctdb, &addr, mask, iface)) {
535                         DEBUG(DEBUG_CRIT,("Failed to add line %u to the public address list\n", i+1));
536                         talloc_free(lines);
537                         return -1;
538                 }
539         }
540
541         talloc_free(lines);
542         return 0;
543 }
544
545
546
547
548 struct ctdb_public_ip_list {
549         struct ctdb_public_ip_list *next;
550         uint32_t pnn;
551         ctdb_sock_addr addr;
552 };
553
554
555 /* Given a physical node, return the number of
556    public addresses that is currently assigned to this node.
557 */
558 static int node_ip_coverage(struct ctdb_context *ctdb, 
559         int32_t pnn,
560         struct ctdb_public_ip_list *ips)
561 {
562         int num=0;
563
564         for (;ips;ips=ips->next) {
565                 if (ips->pnn == pnn) {
566                         num++;
567                 }
568         }
569         return num;
570 }
571
572
573 /* Check if this is a public ip known to the node, i.e. can that
574    node takeover this ip ?
575 */
576 static int can_node_serve_ip(struct ctdb_context *ctdb, int32_t pnn, 
577                 struct ctdb_public_ip_list *ip)
578 {
579         struct ctdb_all_public_ips *public_ips;
580         int i;
581
582         public_ips = ctdb->nodes[pnn]->public_ips;
583
584         if (public_ips == NULL) {
585                 return -1;
586         }
587
588         for (i=0;i<public_ips->num;i++) {
589                 if (ctdb_same_ip(&ip->addr, &public_ips->ips[i].addr)) {
590                         /* yes, this node can serve this public ip */
591                         return 0;
592                 }
593         }
594
595         return -1;
596 }
597
598
599 /* search the node lists list for a node to takeover this ip.
600    pick the node that currently are serving the least number of ips
601    so that the ips get spread out evenly.
602 */
603 static int find_takeover_node(struct ctdb_context *ctdb, 
604                 struct ctdb_node_map *nodemap, uint32_t mask, 
605                 struct ctdb_public_ip_list *ip,
606                 struct ctdb_public_ip_list *all_ips)
607 {
608         int pnn, min=0, num;
609         int i;
610
611         pnn    = -1;
612         for (i=0;i<nodemap->num;i++) {
613                 if (nodemap->nodes[i].flags & mask) {
614                         /* This node is not healty and can not be used to serve
615                            a public address 
616                         */
617                         continue;
618                 }
619
620                 /* verify that this node can serve this ip */
621                 if (can_node_serve_ip(ctdb, i, ip)) {
622                         /* no it couldnt   so skip to the next node */
623                         continue;
624                 }
625
626                 num = node_ip_coverage(ctdb, i, all_ips);
627                 /* was this the first node we checked ? */
628                 if (pnn == -1) {
629                         pnn = i;
630                         min  = num;
631                 } else {
632                         if (num < min) {
633                                 pnn = i;
634                                 min  = num;
635                         }
636                 }
637         }       
638         if (pnn == -1) {
639                 DEBUG(DEBUG_WARNING,(__location__ " Could not find node to take over public address '%s'\n",
640                         ctdb_addr_to_str(&ip->addr)));
641
642                 return -1;
643         }
644
645         ip->pnn = pnn;
646         return 0;
647 }
648
649 #define IP_KEYLEN       4
650 static uint32_t *ip_key(ctdb_sock_addr *ip)
651 {
652         static uint32_t key[IP_KEYLEN];
653
654         bzero(key, sizeof(key));
655
656         switch (ip->sa.sa_family) {
657         case AF_INET:
658                 key[3]  = htonl(ip->ip.sin_addr.s_addr);
659                 break;
660         case AF_INET6:
661                 key[0]  = htonl(ip->ip6.sin6_addr.s6_addr32[0]);
662                 key[1]  = htonl(ip->ip6.sin6_addr.s6_addr32[1]);
663                 key[2]  = htonl(ip->ip6.sin6_addr.s6_addr32[2]);
664                 key[3]  = htonl(ip->ip6.sin6_addr.s6_addr32[3]);
665                 break;
666         default:
667                 DEBUG(DEBUG_ERR, (__location__ " ERROR, unknown family passed :%u\n", ip->sa.sa_family));
668                 return key;
669         }
670
671         return key;
672 }
673
674 static void *add_ip_callback(void *parm, void *data)
675 {
676         return parm;
677 }
678
679 void getips_count_callback(void *param, void *data)
680 {
681         struct ctdb_public_ip_list **ip_list = (struct ctdb_public_ip_list **)param;
682         struct ctdb_public_ip_list *new_ip = (struct ctdb_public_ip_list *)data;
683
684         new_ip->next = *ip_list;
685         *ip_list     = new_ip;
686 }
687
688 struct ctdb_public_ip_list *
689 create_merged_ip_list(struct ctdb_context *ctdb, TALLOC_CTX *tmp_ctx)
690 {
691         int i, j;
692         struct ctdb_public_ip_list *ip_list;
693         struct ctdb_all_public_ips *public_ips;
694         trbt_tree_t *ip_tree;
695
696         ip_tree = trbt_create(tmp_ctx, 0);
697
698         for (i=0;i<ctdb->num_nodes;i++) {
699                 public_ips = ctdb->nodes[i]->public_ips;
700
701                 if (ctdb->nodes[i]->flags & NODE_FLAGS_DELETED) {
702                         continue;
703                 }
704
705                 /* there were no public ips for this node */
706                 if (public_ips == NULL) {
707                         continue;
708                 }               
709
710                 for (j=0;j<public_ips->num;j++) {
711                         struct ctdb_public_ip_list *tmp_ip; 
712
713                         tmp_ip = talloc_zero(tmp_ctx, struct ctdb_public_ip_list);
714                         CTDB_NO_MEMORY_NULL(ctdb, tmp_ip);
715                         tmp_ip->pnn  = public_ips->ips[j].pnn;
716                         tmp_ip->addr = public_ips->ips[j].addr;
717                         tmp_ip->next = NULL;
718
719                         trbt_insertarray32_callback(ip_tree,
720                                 IP_KEYLEN, ip_key(&public_ips->ips[j].addr),
721                                 add_ip_callback,
722                                 tmp_ip);
723                 }
724         }
725
726         ip_list = NULL;
727         trbt_traversearray32(ip_tree, IP_KEYLEN, getips_count_callback, &ip_list);
728
729         return ip_list;
730 }
731
732 /*
733   make any IP alias changes for public addresses that are necessary 
734  */
735 int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap)
736 {
737         int i, num_healthy, retries;
738         struct ctdb_public_ip ip;
739         struct ctdb_public_ipv4 ipv4;
740         uint32_t mask;
741         struct ctdb_public_ip_list *all_ips, *tmp_ip;
742         int maxnode, maxnum=0, minnode, minnum=0, num;
743         TDB_DATA data;
744         struct timeval timeout;
745         struct client_async_data *async_data;
746         struct ctdb_client_control_state *state;
747         TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
748
749
750         ZERO_STRUCT(ip);
751
752         /* Count how many completely healthy nodes we have */
753         num_healthy = 0;
754         for (i=0;i<nodemap->num;i++) {
755                 if (!(nodemap->nodes[i].flags & (NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED))) {
756                         num_healthy++;
757                 }
758         }
759
760         if (num_healthy > 0) {
761                 /* We have healthy nodes, so only consider them for 
762                    serving public addresses
763                 */
764                 mask = NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED;
765         } else {
766                 /* We didnt have any completely healthy nodes so
767                    use "disabled" nodes as a fallback
768                 */
769                 mask = NODE_FLAGS_INACTIVE;
770         }
771
772         /* since nodes only know about those public addresses that
773            can be served by that particular node, no single node has
774            a full list of all public addresses that exist in the cluster.
775            Walk over all node structures and create a merged list of
776            all public addresses that exist in the cluster.
777         */
778         all_ips = create_merged_ip_list(ctdb, tmp_ctx);
779
780         /* If we want deterministic ip allocations, i.e. that the ip addresses
781            will always be allocated the same way for a specific set of
782            available/unavailable nodes.
783         */
784         if (1 == ctdb->tunable.deterministic_public_ips) {              
785                 DEBUG(DEBUG_NOTICE,("Deterministic IPs enabled. Resetting all ip allocations\n"));
786                 for (i=0,tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next,i++) {
787                         tmp_ip->pnn = i%nodemap->num;
788                 }
789         }
790
791
792         /* mark all public addresses with a masked node as being served by
793            node -1
794         */
795         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
796                 if (tmp_ip->pnn == -1) {
797                         continue;
798                 }
799                 if (nodemap->nodes[tmp_ip->pnn].flags & mask) {
800                         tmp_ip->pnn = -1;
801                 }
802         }
803
804         /* verify that the assigned nodes can serve that public ip
805            and set it to -1 if not
806         */
807         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
808                 if (tmp_ip->pnn == -1) {
809                         continue;
810                 }
811                 if (can_node_serve_ip(ctdb, tmp_ip->pnn, tmp_ip) != 0) {
812                         /* this node can not serve this ip. */
813                         tmp_ip->pnn = -1;
814                 }
815         }
816
817
818         /* now we must redistribute all public addresses with takeover node
819            -1 among the nodes available
820         */
821         retries = 0;
822 try_again:
823         /* loop over all ip's and find a physical node to cover for 
824            each unassigned ip.
825         */
826         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
827                 if (tmp_ip->pnn == -1) {
828                         if (find_takeover_node(ctdb, nodemap, mask, tmp_ip, all_ips)) {
829                                 DEBUG(DEBUG_WARNING,("Failed to find node to cover ip %s\n",
830                                         ctdb_addr_to_str(&tmp_ip->addr)));
831                         }
832                 }
833         }
834
835         /* If we dont want ips to fail back after a node becomes healthy
836            again, we wont even try to reallocat the ip addresses so that
837            they are evenly spread out.
838            This can NOT be used at the same time as DeterministicIPs !
839         */
840         if (1 == ctdb->tunable.no_ip_failback) {
841                 if (1 == ctdb->tunable.deterministic_public_ips) {
842                         DEBUG(DEBUG_ERR, ("ERROR: You can not use 'DeterministicIPs' and 'NoIPFailback' at the same time\n"));
843                 }
844                 goto finished;
845         }
846
847
848         /* now, try to make sure the ip adresses are evenly distributed
849            across the node.
850            for each ip address, loop over all nodes that can serve this
851            ip and make sure that the difference between the node
852            serving the most and the node serving the least ip's are not greater
853            than 1.
854         */
855         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
856                 if (tmp_ip->pnn == -1) {
857                         continue;
858                 }
859
860                 /* Get the highest and lowest number of ips's served by any 
861                    valid node which can serve this ip.
862                 */
863                 maxnode = -1;
864                 minnode = -1;
865                 for (i=0;i<nodemap->num;i++) {
866                         if (nodemap->nodes[i].flags & mask) {
867                                 continue;
868                         }
869
870                         /* only check nodes that can actually serve this ip */
871                         if (can_node_serve_ip(ctdb, i, tmp_ip)) {
872                                 /* no it couldnt   so skip to the next node */
873                                 continue;
874                         }
875
876                         num = node_ip_coverage(ctdb, i, all_ips);
877                         if (maxnode == -1) {
878                                 maxnode = i;
879                                 maxnum  = num;
880                         } else {
881                                 if (num > maxnum) {
882                                         maxnode = i;
883                                         maxnum  = num;
884                                 }
885                         }
886                         if (minnode == -1) {
887                                 minnode = i;
888                                 minnum  = num;
889                         } else {
890                                 if (num < minnum) {
891                                         minnode = i;
892                                         minnum  = num;
893                                 }
894                         }
895                 }
896                 if (maxnode == -1) {
897                         DEBUG(DEBUG_WARNING,(__location__ " Could not find maxnode. May not be able to serve ip '%s'\n",
898                                 ctdb_addr_to_str(&tmp_ip->addr)));
899
900                         continue;
901                 }
902
903                 /* If we want deterministic IPs then dont try to reallocate 
904                    them to spread out the load.
905                 */
906                 if (1 == ctdb->tunable.deterministic_public_ips) {
907                         continue;
908                 }
909
910                 /* if the spread between the smallest and largest coverage by
911                    a node is >=2 we steal one of the ips from the node with
912                    most coverage to even things out a bit.
913                    try to do this at most 5 times  since we dont want to spend
914                    too much time balancing the ip coverage.
915                 */
916                 if ( (maxnum > minnum+1)
917                   && (retries < 5) ){
918                         struct ctdb_public_ip_list *tmp;
919
920                         /* mark one of maxnode's vnn's as unassigned and try
921                            again
922                         */
923                         for (tmp=all_ips;tmp;tmp=tmp->next) {
924                                 if (tmp->pnn == maxnode) {
925                                         tmp->pnn = -1;
926                                         retries++;
927                                         goto try_again;
928                                 }
929                         }
930                 }
931         }
932
933
934         /* finished distributing the public addresses, now just send the 
935            info out to the nodes
936         */
937 finished:
938
939         /* at this point ->pnn is the node which will own each IP
940            or -1 if there is no node that can cover this ip
941         */
942
943         /* now tell all nodes to delete any alias that they should not
944            have.  This will be a NOOP on nodes that don't currently
945            hold the given alias */
946         async_data = talloc_zero(tmp_ctx, struct client_async_data);
947         CTDB_NO_MEMORY_FATAL(ctdb, async_data);
948
949         for (i=0;i<nodemap->num;i++) {
950                 /* don't talk to unconnected nodes, but do talk to banned nodes */
951                 if (nodemap->nodes[i].flags & NODE_FLAGS_DISCONNECTED) {
952                         continue;
953                 }
954
955                 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
956                         if (tmp_ip->pnn == nodemap->nodes[i].pnn) {
957                                 /* This node should be serving this
958                                    vnn so dont tell it to release the ip
959                                 */
960                                 continue;
961                         }
962                         if (tmp_ip->addr.sa.sa_family == AF_INET) {
963                                 ipv4.pnn = tmp_ip->pnn;
964                                 ipv4.sin = tmp_ip->addr.ip;
965
966                                 timeout = TAKEOVER_TIMEOUT();
967                                 data.dsize = sizeof(ipv4);
968                                 data.dptr  = (uint8_t *)&ipv4;
969                                 state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
970                                                 0, CTDB_CONTROL_RELEASE_IPv4, 0,
971                                                 data, async_data,
972                                                 &timeout, NULL);
973                         } else {
974                                 ip.pnn  = tmp_ip->pnn;
975                                 ip.addr = tmp_ip->addr;
976
977                                 timeout = TAKEOVER_TIMEOUT();
978                                 data.dsize = sizeof(ip);
979                                 data.dptr  = (uint8_t *)&ip;
980                                 state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
981                                                 0, CTDB_CONTROL_RELEASE_IP, 0,
982                                                 data, async_data,
983                                                 &timeout, NULL);
984                         }
985
986                         if (state == NULL) {
987                                 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_RELEASE_IP to node %u\n", nodemap->nodes[i].pnn));
988                                 talloc_free(tmp_ctx);
989                                 return -1;
990                         }
991                 
992                         ctdb_client_async_add(async_data, state);
993                 }
994         }
995         if (ctdb_client_async_wait(ctdb, async_data) != 0) {
996                 DEBUG(DEBUG_ERR,(__location__ " Async control CTDB_CONTROL_RELEASE_IP failed\n"));
997                 talloc_free(tmp_ctx);
998                 return -1;
999         }
1000         talloc_free(async_data);
1001
1002
1003         /* tell all nodes to get their own IPs */
1004         async_data = talloc_zero(tmp_ctx, struct client_async_data);
1005         CTDB_NO_MEMORY_FATAL(ctdb, async_data);
1006         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1007                 if (tmp_ip->pnn == -1) {
1008                         /* this IP won't be taken over */
1009                         continue;
1010                 }
1011
1012                 if (tmp_ip->addr.sa.sa_family == AF_INET) {
1013                         ipv4.pnn = tmp_ip->pnn;
1014                         ipv4.sin = tmp_ip->addr.ip;
1015
1016                         timeout = TAKEOVER_TIMEOUT();
1017                         data.dsize = sizeof(ipv4);
1018                         data.dptr  = (uint8_t *)&ipv4;
1019                         state = ctdb_control_send(ctdb, tmp_ip->pnn,
1020                                         0, CTDB_CONTROL_TAKEOVER_IPv4, 0,
1021                                         data, async_data,
1022                                         &timeout, NULL);
1023                 } else {
1024                         ip.pnn  = tmp_ip->pnn;
1025                         ip.addr = tmp_ip->addr;
1026
1027                         timeout = TAKEOVER_TIMEOUT();
1028                         data.dsize = sizeof(ip);
1029                         data.dptr  = (uint8_t *)&ip;
1030                         state = ctdb_control_send(ctdb, tmp_ip->pnn,
1031                                         0, CTDB_CONTROL_TAKEOVER_IP, 0,
1032                                         data, async_data,
1033                                         &timeout, NULL);
1034                 }
1035                 if (state == NULL) {
1036                         DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_TAKEOVER_IP to node %u\n", tmp_ip->pnn));
1037                         talloc_free(tmp_ctx);
1038                         return -1;
1039                 }
1040                 
1041                 ctdb_client_async_add(async_data, state);
1042         }
1043         if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1044                 DEBUG(DEBUG_ERR,(__location__ " Async control CTDB_CONTROL_TAKEOVER_IP failed\n"));
1045                 talloc_free(tmp_ctx);
1046                 return -1;
1047         }
1048
1049         talloc_free(tmp_ctx);
1050         return 0;
1051 }
1052
1053
1054 /*
1055   destroy a ctdb_client_ip structure
1056  */
1057 static int ctdb_client_ip_destructor(struct ctdb_client_ip *ip)
1058 {
1059         DEBUG(DEBUG_DEBUG,("destroying client tcp for %s:%u (client_id %u)\n",
1060                 ctdb_addr_to_str(&ip->addr),
1061                 ntohs(ip->addr.ip.sin_port),
1062                 ip->client_id));
1063
1064         DLIST_REMOVE(ip->ctdb->client_ip_list, ip);
1065         return 0;
1066 }
1067
1068 /*
1069   called by a client to inform us of a TCP connection that it is managing
1070   that should tickled with an ACK when IP takeover is done
1071   we handle both the old ipv4 style of packets as well as the new ipv4/6
1072   pdus.
1073  */
1074 int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id,
1075                                 TDB_DATA indata)
1076 {
1077         struct ctdb_client *client = ctdb_reqid_find(ctdb, client_id, struct ctdb_client);
1078         struct ctdb_control_tcp *old_addr = NULL;
1079         struct ctdb_control_tcp_addr new_addr;
1080         struct ctdb_control_tcp_addr *tcp_sock = NULL;
1081         struct ctdb_tcp_list *tcp;
1082         struct ctdb_control_tcp_vnn t;
1083         int ret;
1084         TDB_DATA data;
1085         struct ctdb_client_ip *ip;
1086         struct ctdb_vnn *vnn;
1087         ctdb_sock_addr addr;
1088
1089         switch (indata.dsize) {
1090         case sizeof(struct ctdb_control_tcp):
1091                 old_addr = (struct ctdb_control_tcp *)indata.dptr;
1092                 ZERO_STRUCT(new_addr);
1093                 tcp_sock = &new_addr;
1094                 tcp_sock->src.ip  = old_addr->src;
1095                 tcp_sock->dest.ip = old_addr->dest;
1096                 break;
1097         case sizeof(struct ctdb_control_tcp_addr):
1098                 tcp_sock = (struct ctdb_control_tcp_addr *)indata.dptr;
1099                 break;
1100         default:
1101                 DEBUG(DEBUG_ERR,(__location__ " Invalid data structure passed "
1102                                  "to ctdb_control_tcp_client. size was %d but "
1103                                  "only allowed sizes are %lu and %lu\n",
1104                                  (int)indata.dsize,
1105                                  (long unsigned)sizeof(struct ctdb_control_tcp),
1106                                  (long unsigned)sizeof(struct ctdb_control_tcp_addr)));
1107                 return -1;
1108         }
1109
1110         addr = tcp_sock->src;
1111         ctdb_canonicalize_ip(&addr,  &tcp_sock->src);
1112         addr = tcp_sock->dest;
1113         ctdb_canonicalize_ip(&addr, &tcp_sock->dest);
1114
1115         ZERO_STRUCT(addr);
1116         memcpy(&addr, &tcp_sock->dest, sizeof(addr));
1117         vnn = find_public_ip_vnn(ctdb, &addr);
1118         if (vnn == NULL) {
1119                 switch (addr.sa.sa_family) {
1120                 case AF_INET:
1121                         if (ntohl(addr.ip.sin_addr.s_addr) != INADDR_LOOPBACK) {
1122                                 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public address.\n", 
1123                                         ctdb_addr_to_str(&addr)));
1124                         }
1125                         break;
1126                 case AF_INET6:
1127                         DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public ipv6 address.\n", 
1128                                 ctdb_addr_to_str(&addr)));
1129                         break;
1130                 default:
1131                         DEBUG(DEBUG_ERR,(__location__ " Unknown family type %d\n", addr.sa.sa_family));
1132                 }
1133
1134                 return 0;
1135         }
1136
1137         if (vnn->pnn != ctdb->pnn) {
1138                 DEBUG(DEBUG_ERR,("Attempt to register tcp client for IP %s we don't hold - failing (client_id %u pid %u)\n",
1139                         ctdb_addr_to_str(&addr),
1140                         client_id, client->pid));
1141                 /* failing this call will tell smbd to die */
1142                 return -1;
1143         }
1144
1145         ip = talloc(client, struct ctdb_client_ip);
1146         CTDB_NO_MEMORY(ctdb, ip);
1147
1148         ip->ctdb      = ctdb;
1149         ip->addr      = addr;
1150         ip->client_id = client_id;
1151         talloc_set_destructor(ip, ctdb_client_ip_destructor);
1152         DLIST_ADD(ctdb->client_ip_list, ip);
1153
1154         tcp = talloc(client, struct ctdb_tcp_list);
1155         CTDB_NO_MEMORY(ctdb, tcp);
1156
1157         tcp->connection.src_addr = tcp_sock->src;
1158         tcp->connection.dst_addr = tcp_sock->dest;
1159
1160         DLIST_ADD(client->tcp_list, tcp);
1161
1162         t.src  = tcp_sock->src;
1163         t.dest = tcp_sock->dest;
1164
1165         data.dptr = (uint8_t *)&t;
1166         data.dsize = sizeof(t);
1167
1168         switch (addr.sa.sa_family) {
1169         case AF_INET:
1170                 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1171                         (unsigned)ntohs(tcp_sock->dest.ip.sin_port), 
1172                         ctdb_addr_to_str(&tcp_sock->src),
1173                         (unsigned)ntohs(tcp_sock->src.ip.sin_port), client_id, client->pid));
1174                 break;
1175         case AF_INET6:
1176                 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1177                         (unsigned)ntohs(tcp_sock->dest.ip6.sin6_port), 
1178                         ctdb_addr_to_str(&tcp_sock->src),
1179                         (unsigned)ntohs(tcp_sock->src.ip6.sin6_port), client_id, client->pid));
1180                 break;
1181         default:
1182                 DEBUG(DEBUG_ERR,(__location__ " Unknown family %d\n", addr.sa.sa_family));
1183         }
1184
1185
1186         /* tell all nodes about this tcp connection */
1187         ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0, 
1188                                        CTDB_CONTROL_TCP_ADD,
1189                                        0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
1190         if (ret != 0) {
1191                 DEBUG(DEBUG_ERR,(__location__ " Failed to send CTDB_CONTROL_TCP_ADD\n"));
1192                 return -1;
1193         }
1194
1195         return 0;
1196 }
1197
1198 /*
1199   find a tcp address on a list
1200  */
1201 static struct ctdb_tcp_connection *ctdb_tcp_find(struct ctdb_tcp_array *array, 
1202                                            struct ctdb_tcp_connection *tcp)
1203 {
1204         int i;
1205
1206         if (array == NULL) {
1207                 return NULL;
1208         }
1209
1210         for (i=0;i<array->num;i++) {
1211                 if (ctdb_same_sockaddr(&array->connections[i].src_addr, &tcp->src_addr) &&
1212                     ctdb_same_sockaddr(&array->connections[i].dst_addr, &tcp->dst_addr)) {
1213                         return &array->connections[i];
1214                 }
1215         }
1216         return NULL;
1217 }
1218
1219 /*
1220   called by a daemon to inform us of a TCP connection that one of its
1221   clients managing that should tickled with an ACK when IP takeover is
1222   done
1223  */
1224 int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata)
1225 {
1226         struct ctdb_control_tcp_vnn *p = (struct ctdb_control_tcp_vnn *)indata.dptr;
1227         struct ctdb_tcp_array *tcparray;
1228         struct ctdb_tcp_connection tcp;
1229         struct ctdb_vnn *vnn;
1230
1231         vnn = find_public_ip_vnn(ctdb, &p->dest);
1232         if (vnn == NULL) {
1233                 DEBUG(DEBUG_INFO,(__location__ " got TCP_ADD control for an address which is not a public address '%s'\n",
1234                         ctdb_addr_to_str(&p->dest)));
1235
1236                 return -1;
1237         }
1238
1239
1240         tcparray = vnn->tcp_array;
1241
1242         /* If this is the first tickle */
1243         if (tcparray == NULL) {
1244                 tcparray = talloc_size(ctdb->nodes, 
1245                         offsetof(struct ctdb_tcp_array, connections) +
1246                         sizeof(struct ctdb_tcp_connection) * 1);
1247                 CTDB_NO_MEMORY(ctdb, tcparray);
1248                 vnn->tcp_array = tcparray;
1249
1250                 tcparray->num = 0;
1251                 tcparray->connections = talloc_size(tcparray, sizeof(struct ctdb_tcp_connection));
1252                 CTDB_NO_MEMORY(ctdb, tcparray->connections);
1253
1254                 tcparray->connections[tcparray->num].src_addr = p->src;
1255                 tcparray->connections[tcparray->num].dst_addr = p->dest;
1256                 tcparray->num++;
1257                 return 0;
1258         }
1259
1260
1261         /* Do we already have this tickle ?*/
1262         tcp.src_addr = p->src;
1263         tcp.dst_addr = p->dest;
1264         if (ctdb_tcp_find(vnn->tcp_array, &tcp) != NULL) {
1265                 DEBUG(DEBUG_DEBUG,("Already had tickle info for %s:%u for vnn:%u\n",
1266                         ctdb_addr_to_str(&tcp.dst_addr),
1267                         ntohs(tcp.dst_addr.ip.sin_port),
1268                         vnn->pnn));
1269                 return 0;
1270         }
1271
1272         /* A new tickle, we must add it to the array */
1273         tcparray->connections = talloc_realloc(tcparray, tcparray->connections,
1274                                         struct ctdb_tcp_connection,
1275                                         tcparray->num+1);
1276         CTDB_NO_MEMORY(ctdb, tcparray->connections);
1277
1278         vnn->tcp_array = tcparray;
1279         tcparray->connections[tcparray->num].src_addr = p->src;
1280         tcparray->connections[tcparray->num].dst_addr = p->dest;
1281         tcparray->num++;
1282                                 
1283         DEBUG(DEBUG_INFO,("Added tickle info for %s:%u from vnn %u\n",
1284                 ctdb_addr_to_str(&tcp.dst_addr),
1285                 ntohs(tcp.dst_addr.ip.sin_port),
1286                 vnn->pnn));
1287
1288         return 0;
1289 }
1290
1291
1292 /*
1293   called by a daemon to inform us of a TCP connection that one of its
1294   clients managing that should tickled with an ACK when IP takeover is
1295   done
1296  */
1297 static void ctdb_remove_tcp_connection(struct ctdb_context *ctdb, struct ctdb_tcp_connection *conn)
1298 {
1299         struct ctdb_tcp_connection *tcpp;
1300         struct ctdb_vnn *vnn = find_public_ip_vnn(ctdb, &conn->dst_addr);
1301
1302         if (vnn == NULL) {
1303                 DEBUG(DEBUG_ERR,(__location__ " unable to find public address %s\n",
1304                         ctdb_addr_to_str(&conn->dst_addr)));
1305                 return;
1306         }
1307
1308         /* if the array is empty we cant remove it
1309            and we dont need to do anything
1310          */
1311         if (vnn->tcp_array == NULL) {
1312                 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist (array is empty) %s:%u\n",
1313                         ctdb_addr_to_str(&conn->dst_addr),
1314                         ntohs(conn->dst_addr.ip.sin_port)));
1315                 return;
1316         }
1317
1318
1319         /* See if we know this connection
1320            if we dont know this connection  then we dont need to do anything
1321          */
1322         tcpp = ctdb_tcp_find(vnn->tcp_array, conn);
1323         if (tcpp == NULL) {
1324                 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist %s:%u\n",
1325                         ctdb_addr_to_str(&conn->dst_addr),
1326                         ntohs(conn->dst_addr.ip.sin_port)));
1327                 return;
1328         }
1329
1330
1331         /* We need to remove this entry from the array.
1332            Instead of allocating a new array and copying data to it
1333            we cheat and just copy the last entry in the existing array
1334            to the entry that is to be removed and just shring the 
1335            ->num field
1336          */
1337         *tcpp = vnn->tcp_array->connections[vnn->tcp_array->num - 1];
1338         vnn->tcp_array->num--;
1339
1340         /* If we deleted the last entry we also need to remove the entire array
1341          */
1342         if (vnn->tcp_array->num == 0) {
1343                 talloc_free(vnn->tcp_array);
1344                 vnn->tcp_array = NULL;
1345         }               
1346
1347         vnn->tcp_update_needed = true;
1348
1349         DEBUG(DEBUG_INFO,("Removed tickle info for %s:%u\n",
1350                 ctdb_addr_to_str(&conn->src_addr),
1351                 ntohs(conn->src_addr.ip.sin_port)));
1352 }
1353
1354
1355 /*
1356   called when a daemon restarts - send all tickes for all public addresses
1357   we are serving immediately to the new node.
1358  */
1359 int32_t ctdb_control_startup(struct ctdb_context *ctdb, uint32_t vnn)
1360 {
1361 /*XXX here we should send all tickes we are serving to the new node */
1362         return 0;
1363 }
1364
1365
1366 /*
1367   called when a client structure goes away - hook to remove
1368   elements from the tcp_list in all daemons
1369  */
1370 void ctdb_takeover_client_destructor_hook(struct ctdb_client *client)
1371 {
1372         while (client->tcp_list) {
1373                 struct ctdb_tcp_list *tcp = client->tcp_list;
1374                 DLIST_REMOVE(client->tcp_list, tcp);
1375                 ctdb_remove_tcp_connection(client->ctdb, &tcp->connection);
1376         }
1377 }
1378
1379
1380 /*
1381   release all IPs on shutdown
1382  */
1383 void ctdb_release_all_ips(struct ctdb_context *ctdb)
1384 {
1385         struct ctdb_vnn *vnn;
1386
1387         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1388                 if (!ctdb_sys_have_ip(&vnn->public_address)) {
1389                         continue;
1390                 }
1391                 if (vnn->pnn == ctdb->pnn) {
1392                         vnn->pnn = -1;
1393                 }
1394                 ctdb_event_script_args(ctdb, CTDB_EVENT_RELEASE_IP, "%s %s %u",
1395                                   vnn->iface, 
1396                                   talloc_strdup(ctdb, ctdb_addr_to_str(&vnn->public_address)),
1397                                   vnn->public_netmask_bits);
1398                 release_kill_clients(ctdb, &vnn->public_address);
1399         }
1400 }
1401
1402
1403 /*
1404   get list of public IPs
1405  */
1406 int32_t ctdb_control_get_public_ips(struct ctdb_context *ctdb, 
1407                                     struct ctdb_req_control *c, TDB_DATA *outdata)
1408 {
1409         int i, num, len;
1410         struct ctdb_all_public_ips *ips;
1411         struct ctdb_vnn *vnn;
1412
1413         /* count how many public ip structures we have */
1414         num = 0;
1415         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1416                 num++;
1417         }
1418
1419         len = offsetof(struct ctdb_all_public_ips, ips) + 
1420                 num*sizeof(struct ctdb_public_ip);
1421         ips = talloc_zero_size(outdata, len);
1422         CTDB_NO_MEMORY(ctdb, ips);
1423
1424         outdata->dsize = len;
1425         outdata->dptr  = (uint8_t *)ips;
1426
1427         ips->num = num;
1428         i = 0;
1429         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1430                 ips->ips[i].pnn  = vnn->pnn;
1431                 ips->ips[i].addr = vnn->public_address;
1432                 i++;
1433         }
1434
1435         return 0;
1436 }
1437
1438
1439 /*
1440   get list of public IPs, old ipv4 style.  only returns ipv4 addresses
1441  */
1442 int32_t ctdb_control_get_public_ipsv4(struct ctdb_context *ctdb, 
1443                                     struct ctdb_req_control *c, TDB_DATA *outdata)
1444 {
1445         int i, num, len;
1446         struct ctdb_all_public_ipsv4 *ips;
1447         struct ctdb_vnn *vnn;
1448
1449         /* count how many public ip structures we have */
1450         num = 0;
1451         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1452                 if (vnn->public_address.sa.sa_family != AF_INET) {
1453                         continue;
1454                 }
1455                 num++;
1456         }
1457
1458         len = offsetof(struct ctdb_all_public_ipsv4, ips) + 
1459                 num*sizeof(struct ctdb_public_ipv4);
1460         ips = talloc_zero_size(outdata, len);
1461         CTDB_NO_MEMORY(ctdb, ips);
1462
1463         outdata->dsize = len;
1464         outdata->dptr  = (uint8_t *)ips;
1465
1466         ips->num = num;
1467         i = 0;
1468         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1469                 if (vnn->public_address.sa.sa_family != AF_INET) {
1470                         continue;
1471                 }
1472                 ips->ips[i].pnn = vnn->pnn;
1473                 ips->ips[i].sin = vnn->public_address.ip;
1474                 i++;
1475         }
1476
1477         return 0;
1478 }
1479
1480
1481 /* 
1482    structure containing the listening socket and the list of tcp connections
1483    that the ctdb daemon is to kill
1484 */
1485 struct ctdb_kill_tcp {
1486         struct ctdb_vnn *vnn;
1487         struct ctdb_context *ctdb;
1488         int capture_fd;
1489         struct fd_event *fde;
1490         trbt_tree_t *connections;
1491         void *private_data;
1492 };
1493
1494 /*
1495   a tcp connection that is to be killed
1496  */
1497 struct ctdb_killtcp_con {
1498         ctdb_sock_addr src_addr;
1499         ctdb_sock_addr dst_addr;
1500         int count;
1501         struct ctdb_kill_tcp *killtcp;
1502 };
1503
1504 /* this function is used to create a key to represent this socketpair
1505    in the killtcp tree.
1506    this key is used to insert and lookup matching socketpairs that are
1507    to be tickled and RST
1508 */
1509 #define KILLTCP_KEYLEN  10
1510 static uint32_t *killtcp_key(ctdb_sock_addr *src, ctdb_sock_addr *dst)
1511 {
1512         static uint32_t key[KILLTCP_KEYLEN];
1513
1514         bzero(key, sizeof(key));
1515
1516         if (src->sa.sa_family != dst->sa.sa_family) {
1517                 DEBUG(DEBUG_ERR, (__location__ " ERROR, different families passed :%u vs %u\n", src->sa.sa_family, dst->sa.sa_family));
1518                 return key;
1519         }
1520         
1521         switch (src->sa.sa_family) {
1522         case AF_INET:
1523                 key[0]  = dst->ip.sin_addr.s_addr;
1524                 key[1]  = src->ip.sin_addr.s_addr;
1525                 key[2]  = dst->ip.sin_port;
1526                 key[3]  = src->ip.sin_port;
1527                 break;
1528         case AF_INET6:
1529                 key[0]  = dst->ip6.sin6_addr.s6_addr32[3];
1530                 key[1]  = src->ip6.sin6_addr.s6_addr32[3];
1531                 key[2]  = dst->ip6.sin6_addr.s6_addr32[2];
1532                 key[3]  = src->ip6.sin6_addr.s6_addr32[2];
1533                 key[4]  = dst->ip6.sin6_addr.s6_addr32[1];
1534                 key[5]  = src->ip6.sin6_addr.s6_addr32[1];
1535                 key[6]  = dst->ip6.sin6_addr.s6_addr32[0];
1536                 key[7]  = src->ip6.sin6_addr.s6_addr32[0];
1537                 key[8]  = dst->ip6.sin6_port;
1538                 key[9]  = src->ip6.sin6_port;
1539                 break;
1540         default:
1541                 DEBUG(DEBUG_ERR, (__location__ " ERROR, unknown family passed :%u\n", src->sa.sa_family));
1542                 return key;
1543         }
1544
1545         return key;
1546 }
1547
1548 /*
1549   called when we get a read event on the raw socket
1550  */
1551 static void capture_tcp_handler(struct event_context *ev, struct fd_event *fde, 
1552                                 uint16_t flags, void *private_data)
1553 {
1554         struct ctdb_kill_tcp *killtcp = talloc_get_type(private_data, struct ctdb_kill_tcp);
1555         struct ctdb_killtcp_con *con;
1556         ctdb_sock_addr src, dst;
1557         uint32_t ack_seq, seq;
1558
1559         if (!(flags & EVENT_FD_READ)) {
1560                 return;
1561         }
1562
1563         if (ctdb_sys_read_tcp_packet(killtcp->capture_fd,
1564                                 killtcp->private_data,
1565                                 &src, &dst,
1566                                 &ack_seq, &seq) != 0) {
1567                 /* probably a non-tcp ACK packet */
1568                 return;
1569         }
1570
1571         /* check if we have this guy in our list of connections
1572            to kill
1573         */
1574         con = trbt_lookuparray32(killtcp->connections, 
1575                         KILLTCP_KEYLEN, killtcp_key(&src, &dst));
1576         if (con == NULL) {
1577                 /* no this was some other packet we can just ignore */
1578                 return;
1579         }
1580
1581         /* This one has been tickled !
1582            now reset him and remove him from the list.
1583          */
1584         DEBUG(DEBUG_INFO, ("sending a tcp reset to kill connection :%d -> %s:%d\n",
1585                 ntohs(con->dst_addr.ip.sin_port),
1586                 ctdb_addr_to_str(&con->src_addr),
1587                 ntohs(con->src_addr.ip.sin_port)));
1588
1589         ctdb_sys_send_tcp(&con->dst_addr, &con->src_addr, ack_seq, seq, 1);
1590         talloc_free(con);
1591 }
1592
1593
1594 /* when traversing the list of all tcp connections to send tickle acks to
1595    (so that we can capture the ack coming back and kill the connection
1596     by a RST)
1597    this callback is called for each connection we are currently trying to kill
1598 */
1599 static void tickle_connection_traverse(void *param, void *data)
1600 {
1601         struct ctdb_killtcp_con *con = talloc_get_type(data, struct ctdb_killtcp_con);
1602
1603         /* have tried too many times, just give up */
1604         if (con->count >= 5) {
1605                 talloc_free(con);
1606                 return;
1607         }
1608
1609         /* othervise, try tickling it again */
1610         con->count++;
1611         ctdb_sys_send_tcp(
1612                 (ctdb_sock_addr *)&con->dst_addr,
1613                 (ctdb_sock_addr *)&con->src_addr,
1614                 0, 0, 0);
1615 }
1616
1617
1618 /* 
1619    called every second until all sentenced connections have been reset
1620  */
1621 static void ctdb_tickle_sentenced_connections(struct event_context *ev, struct timed_event *te, 
1622                                               struct timeval t, void *private_data)
1623 {
1624         struct ctdb_kill_tcp *killtcp = talloc_get_type(private_data, struct ctdb_kill_tcp);
1625
1626
1627         /* loop over all connections sending tickle ACKs */
1628         trbt_traversearray32(killtcp->connections, KILLTCP_KEYLEN, tickle_connection_traverse, NULL);
1629
1630
1631         /* If there are no more connections to kill we can remove the
1632            entire killtcp structure
1633          */
1634         if ( (killtcp->connections == NULL) || 
1635              (killtcp->connections->root == NULL) ) {
1636                 talloc_free(killtcp);
1637                 return;
1638         }
1639
1640         /* try tickling them again in a seconds time
1641          */
1642         event_add_timed(killtcp->ctdb->ev, killtcp, timeval_current_ofs(1, 0), 
1643                         ctdb_tickle_sentenced_connections, killtcp);
1644 }
1645
1646 /*
1647   destroy the killtcp structure
1648  */
1649 static int ctdb_killtcp_destructor(struct ctdb_kill_tcp *killtcp)
1650 {
1651         killtcp->vnn->killtcp = NULL;
1652         return 0;
1653 }
1654
1655
1656 /* nothing fancy here, just unconditionally replace any existing
1657    connection structure with the new one.
1658
1659    dont even free the old one if it did exist, that one is talloc_stolen
1660    by the same node in the tree anyway and will be deleted when the new data 
1661    is deleted
1662 */
1663 static void *add_killtcp_callback(void *parm, void *data)
1664 {
1665         return parm;
1666 }
1667
1668 /*
1669   add a tcp socket to the list of connections we want to RST
1670  */
1671 static int ctdb_killtcp_add_connection(struct ctdb_context *ctdb, 
1672                                        ctdb_sock_addr *s,
1673                                        ctdb_sock_addr *d)
1674 {
1675         ctdb_sock_addr src, dst;
1676         struct ctdb_kill_tcp *killtcp;
1677         struct ctdb_killtcp_con *con;
1678         struct ctdb_vnn *vnn;
1679
1680         ctdb_canonicalize_ip(s, &src);
1681         ctdb_canonicalize_ip(d, &dst);
1682
1683         vnn = find_public_ip_vnn(ctdb, &dst);
1684         if (vnn == NULL) {
1685                 vnn = find_public_ip_vnn(ctdb, &src);
1686         }
1687         if (vnn == NULL) {
1688                 /* if it is not a public ip   it could be our 'single ip' */
1689                 if (ctdb->single_ip_vnn) {
1690                         if (ctdb_same_ip(&ctdb->single_ip_vnn->public_address, &dst)) {
1691                                 vnn = ctdb->single_ip_vnn;
1692                         }
1693                 }
1694         }
1695         if (vnn == NULL) {
1696                 DEBUG(DEBUG_ERR,(__location__ " Could not killtcp, not a public address\n")); 
1697                 return -1;
1698         }
1699
1700         killtcp = vnn->killtcp;
1701         
1702         /* If this is the first connection to kill we must allocate
1703            a new structure
1704          */
1705         if (killtcp == NULL) {
1706                 killtcp = talloc_zero(ctdb, struct ctdb_kill_tcp);
1707                 CTDB_NO_MEMORY(ctdb, killtcp);
1708
1709                 killtcp->vnn         = vnn;
1710                 killtcp->ctdb        = ctdb;
1711                 killtcp->capture_fd  = -1;
1712                 killtcp->connections = trbt_create(killtcp, 0);
1713
1714                 vnn->killtcp         = killtcp;
1715                 talloc_set_destructor(killtcp, ctdb_killtcp_destructor);
1716         }
1717
1718
1719
1720         /* create a structure that describes this connection we want to
1721            RST and store it in killtcp->connections
1722         */
1723         con = talloc(killtcp, struct ctdb_killtcp_con);
1724         CTDB_NO_MEMORY(ctdb, con);
1725         con->src_addr = src;
1726         con->dst_addr = dst;
1727         con->count    = 0;
1728         con->killtcp  = killtcp;
1729
1730
1731         trbt_insertarray32_callback(killtcp->connections,
1732                         KILLTCP_KEYLEN, killtcp_key(&con->dst_addr, &con->src_addr),
1733                         add_killtcp_callback, con);
1734
1735         /* 
1736            If we dont have a socket to listen on yet we must create it
1737          */
1738         if (killtcp->capture_fd == -1) {
1739                 killtcp->capture_fd = ctdb_sys_open_capture_socket(vnn->iface, &killtcp->private_data);
1740                 if (killtcp->capture_fd == -1) {
1741                         DEBUG(DEBUG_CRIT,(__location__ " Failed to open capturing socket for killtcp\n"));
1742                         goto failed;
1743                 }
1744         }
1745
1746
1747         if (killtcp->fde == NULL) {
1748                 killtcp->fde = event_add_fd(ctdb->ev, killtcp, killtcp->capture_fd, 
1749                                             EVENT_FD_READ | EVENT_FD_AUTOCLOSE, 
1750                                             capture_tcp_handler, killtcp);
1751
1752                 /* We also need to set up some events to tickle all these connections
1753                    until they are all reset
1754                 */
1755                 event_add_timed(ctdb->ev, killtcp, timeval_current_ofs(1, 0), 
1756                                 ctdb_tickle_sentenced_connections, killtcp);
1757         }
1758
1759         /* tickle him once now */
1760         ctdb_sys_send_tcp(
1761                 &con->dst_addr,
1762                 &con->src_addr,
1763                 0, 0, 0);
1764
1765         return 0;
1766
1767 failed:
1768         talloc_free(vnn->killtcp);
1769         vnn->killtcp = NULL;
1770         return -1;
1771 }
1772
1773 /*
1774   kill a TCP connection.
1775  */
1776 int32_t ctdb_control_kill_tcp(struct ctdb_context *ctdb, TDB_DATA indata)
1777 {
1778         struct ctdb_control_killtcp *killtcp = (struct ctdb_control_killtcp *)indata.dptr;
1779
1780         return ctdb_killtcp_add_connection(ctdb, &killtcp->src_addr, &killtcp->dst_addr);
1781 }
1782
1783 /*
1784   called by a daemon to inform us of the entire list of TCP tickles for
1785   a particular public address.
1786   this control should only be sent by the node that is currently serving
1787   that public address.
1788  */
1789 int32_t ctdb_control_set_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata)
1790 {
1791         struct ctdb_control_tcp_tickle_list *list = (struct ctdb_control_tcp_tickle_list *)indata.dptr;
1792         struct ctdb_tcp_array *tcparray;
1793         struct ctdb_vnn *vnn;
1794
1795         /* We must at least have tickles.num or else we cant verify the size
1796            of the received data blob
1797          */
1798         if (indata.dsize < offsetof(struct ctdb_control_tcp_tickle_list, 
1799                                         tickles.connections)) {
1800                 DEBUG(DEBUG_ERR,("Bad indata in ctdb_control_set_tcp_tickle_list. Not enough data for the tickle.num field\n"));
1801                 return -1;
1802         }
1803
1804         /* verify that the size of data matches what we expect */
1805         if (indata.dsize < offsetof(struct ctdb_control_tcp_tickle_list, 
1806                                 tickles.connections)
1807                          + sizeof(struct ctdb_tcp_connection)
1808                                  * list->tickles.num) {
1809                 DEBUG(DEBUG_ERR,("Bad indata in ctdb_control_set_tcp_tickle_list\n"));
1810                 return -1;
1811         }       
1812
1813         vnn = find_public_ip_vnn(ctdb, &list->addr);
1814         if (vnn == NULL) {
1815                 DEBUG(DEBUG_INFO,(__location__ " Could not set tcp tickle list, '%s' is not a public address\n", 
1816                         ctdb_addr_to_str(&list->addr)));
1817
1818                 return 1;
1819         }
1820
1821         /* remove any old ticklelist we might have */
1822         talloc_free(vnn->tcp_array);
1823         vnn->tcp_array = NULL;
1824
1825         tcparray = talloc(ctdb->nodes, struct ctdb_tcp_array);
1826         CTDB_NO_MEMORY(ctdb, tcparray);
1827
1828         tcparray->num = list->tickles.num;
1829
1830         tcparray->connections = talloc_array(tcparray, struct ctdb_tcp_connection, tcparray->num);
1831         CTDB_NO_MEMORY(ctdb, tcparray->connections);
1832
1833         memcpy(tcparray->connections, &list->tickles.connections[0], 
1834                sizeof(struct ctdb_tcp_connection)*tcparray->num);
1835
1836         /* We now have a new fresh tickle list array for this vnn */
1837         vnn->tcp_array = talloc_steal(vnn, tcparray);
1838         
1839         return 0;
1840 }
1841
1842 /*
1843   called to return the full list of tickles for the puclic address associated 
1844   with the provided vnn
1845  */
1846 int32_t ctdb_control_get_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata)
1847 {
1848         ctdb_sock_addr *addr = (ctdb_sock_addr *)indata.dptr;
1849         struct ctdb_control_tcp_tickle_list *list;
1850         struct ctdb_tcp_array *tcparray;
1851         int num;
1852         struct ctdb_vnn *vnn;
1853
1854         vnn = find_public_ip_vnn(ctdb, addr);
1855         if (vnn == NULL) {
1856                 DEBUG(DEBUG_ERR,(__location__ " Could not get tcp tickle list, '%s' is not a public address\n", 
1857                         ctdb_addr_to_str(addr)));
1858
1859                 return 1;
1860         }
1861
1862         tcparray = vnn->tcp_array;
1863         if (tcparray) {
1864                 num = tcparray->num;
1865         } else {
1866                 num = 0;
1867         }
1868
1869         outdata->dsize = offsetof(struct ctdb_control_tcp_tickle_list, 
1870                                 tickles.connections)
1871                         + sizeof(struct ctdb_tcp_connection) * num;
1872
1873         outdata->dptr  = talloc_size(outdata, outdata->dsize);
1874         CTDB_NO_MEMORY(ctdb, outdata->dptr);
1875         list = (struct ctdb_control_tcp_tickle_list *)outdata->dptr;
1876
1877         list->addr = *addr;
1878         list->tickles.num = num;
1879         if (num) {
1880                 memcpy(&list->tickles.connections[0], tcparray->connections, 
1881                         sizeof(struct ctdb_tcp_connection) * num);
1882         }
1883
1884         return 0;
1885 }
1886
1887
1888 /*
1889   set the list of all tcp tickles for a public address
1890  */
1891 static int ctdb_ctrl_set_tcp_tickles(struct ctdb_context *ctdb, 
1892                               struct timeval timeout, uint32_t destnode, 
1893                               ctdb_sock_addr *addr,
1894                               struct ctdb_tcp_array *tcparray)
1895 {
1896         int ret, num;
1897         TDB_DATA data;
1898         struct ctdb_control_tcp_tickle_list *list;
1899
1900         if (tcparray) {
1901                 num = tcparray->num;
1902         } else {
1903                 num = 0;
1904         }
1905
1906         data.dsize = offsetof(struct ctdb_control_tcp_tickle_list, 
1907                                 tickles.connections) +
1908                         sizeof(struct ctdb_tcp_connection) * num;
1909         data.dptr = talloc_size(ctdb, data.dsize);
1910         CTDB_NO_MEMORY(ctdb, data.dptr);
1911
1912         list = (struct ctdb_control_tcp_tickle_list *)data.dptr;
1913         list->addr = *addr;
1914         list->tickles.num = num;
1915         if (tcparray) {
1916                 memcpy(&list->tickles.connections[0], tcparray->connections, sizeof(struct ctdb_tcp_connection) * num);
1917         }
1918
1919         ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0, 
1920                                        CTDB_CONTROL_SET_TCP_TICKLE_LIST,
1921                                        0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
1922         if (ret != 0) {
1923                 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set tcp tickles failed\n"));
1924                 return -1;
1925         }
1926
1927         talloc_free(data.dptr);
1928
1929         return ret;
1930 }
1931
1932
1933 /*
1934   perform tickle updates if required
1935  */
1936 static void ctdb_update_tcp_tickles(struct event_context *ev, 
1937                                 struct timed_event *te, 
1938                                 struct timeval t, void *private_data)
1939 {
1940         struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
1941         int ret;
1942         struct ctdb_vnn *vnn;
1943
1944         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1945                 /* we only send out updates for public addresses that 
1946                    we have taken over
1947                  */
1948                 if (ctdb->pnn != vnn->pnn) {
1949                         continue;
1950                 }
1951                 /* We only send out the updates if we need to */
1952                 if (!vnn->tcp_update_needed) {
1953                         continue;
1954                 }
1955                 ret = ctdb_ctrl_set_tcp_tickles(ctdb, 
1956                                 TAKEOVER_TIMEOUT(),
1957                                 CTDB_BROADCAST_CONNECTED,
1958                                 &vnn->public_address,
1959                                 vnn->tcp_array);
1960                 if (ret != 0) {
1961                         DEBUG(DEBUG_ERR,("Failed to send the tickle update for public address %s\n",
1962                                 ctdb_addr_to_str(&vnn->public_address)));
1963                 }
1964         }
1965
1966         event_add_timed(ctdb->ev, ctdb->tickle_update_context,
1967                              timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0), 
1968                              ctdb_update_tcp_tickles, ctdb);
1969 }               
1970         
1971
1972 /*
1973   start periodic update of tcp tickles
1974  */
1975 void ctdb_start_tcp_tickle_update(struct ctdb_context *ctdb)
1976 {
1977         ctdb->tickle_update_context = talloc_new(ctdb);
1978
1979         event_add_timed(ctdb->ev, ctdb->tickle_update_context,
1980                              timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0), 
1981                              ctdb_update_tcp_tickles, ctdb);
1982 }
1983
1984
1985
1986
1987 struct control_gratious_arp {
1988         struct ctdb_context *ctdb;
1989         ctdb_sock_addr addr;
1990         const char *iface;
1991         int count;
1992 };
1993
1994 /*
1995   send a control_gratuitous arp
1996  */
1997 static void send_gratious_arp(struct event_context *ev, struct timed_event *te, 
1998                                   struct timeval t, void *private_data)
1999 {
2000         int ret;
2001         struct control_gratious_arp *arp = talloc_get_type(private_data, 
2002                                                         struct control_gratious_arp);
2003
2004         ret = ctdb_sys_send_arp(&arp->addr, arp->iface);
2005         if (ret != 0) {
2006                 DEBUG(DEBUG_ERR,(__location__ " sending of gratious arp failed (%s)\n", strerror(errno)));
2007         }
2008
2009
2010         arp->count++;
2011         if (arp->count == CTDB_ARP_REPEAT) {
2012                 talloc_free(arp);
2013                 return;
2014         }
2015
2016         event_add_timed(arp->ctdb->ev, arp, 
2017                         timeval_current_ofs(CTDB_ARP_INTERVAL, 0), 
2018                         send_gratious_arp, arp);
2019 }
2020
2021
2022 /*
2023   send a gratious arp 
2024  */
2025 int32_t ctdb_control_send_gratious_arp(struct ctdb_context *ctdb, TDB_DATA indata)
2026 {
2027         struct ctdb_control_gratious_arp *gratious_arp = (struct ctdb_control_gratious_arp *)indata.dptr;
2028         struct control_gratious_arp *arp;
2029
2030         /* verify the size of indata */
2031         if (indata.dsize < offsetof(struct ctdb_control_gratious_arp, iface)) {
2032                 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_gratious_arp structure. Got %u require %u bytes\n", 
2033                                  (unsigned)indata.dsize, 
2034                                  (unsigned)offsetof(struct ctdb_control_gratious_arp, iface)));
2035                 return -1;
2036         }
2037         if (indata.dsize != 
2038                 ( offsetof(struct ctdb_control_gratious_arp, iface)
2039                 + gratious_arp->len ) ){
2040
2041                 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2042                         "but should be %u bytes\n", 
2043                          (unsigned)indata.dsize, 
2044                          (unsigned)(offsetof(struct ctdb_control_gratious_arp, iface)+gratious_arp->len)));
2045                 return -1;
2046         }
2047
2048
2049         arp = talloc(ctdb, struct control_gratious_arp);
2050         CTDB_NO_MEMORY(ctdb, arp);
2051
2052         arp->ctdb  = ctdb;
2053         arp->addr   = gratious_arp->addr;
2054         arp->iface = talloc_strdup(arp, gratious_arp->iface);
2055         CTDB_NO_MEMORY(ctdb, arp->iface);
2056         arp->count = 0;
2057         
2058         event_add_timed(arp->ctdb->ev, arp, 
2059                         timeval_zero(), send_gratious_arp, arp);
2060
2061         return 0;
2062 }
2063
2064 int32_t ctdb_control_add_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2065 {
2066         struct ctdb_control_ip_iface *pub = (struct ctdb_control_ip_iface *)indata.dptr;
2067         int ret;
2068
2069         /* verify the size of indata */
2070         if (indata.dsize < offsetof(struct ctdb_control_ip_iface, iface)) {
2071                 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_ip_iface structure\n"));
2072                 return -1;
2073         }
2074         if (indata.dsize != 
2075                 ( offsetof(struct ctdb_control_ip_iface, iface)
2076                 + pub->len ) ){
2077
2078                 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2079                         "but should be %u bytes\n", 
2080                          (unsigned)indata.dsize, 
2081                          (unsigned)(offsetof(struct ctdb_control_ip_iface, iface)+pub->len)));
2082                 return -1;
2083         }
2084
2085         ret = ctdb_add_public_address(ctdb, &pub->addr, pub->mask, &pub->iface[0]);
2086
2087         if (ret != 0) {
2088                 DEBUG(DEBUG_ERR,(__location__ " Failed to add public address\n"));
2089                 return -1;
2090         }
2091
2092         return 0;
2093 }
2094
2095 /*
2096   called when releaseip event finishes for del_public_address
2097  */
2098 static void delete_ip_callback(struct ctdb_context *ctdb, int status, 
2099                                 void *private_data)
2100 {
2101         talloc_free(private_data);
2102 }
2103
2104 int32_t ctdb_control_del_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2105 {
2106         struct ctdb_control_ip_iface *pub = (struct ctdb_control_ip_iface *)indata.dptr;
2107         struct ctdb_vnn *vnn;
2108         int ret;
2109
2110         /* verify the size of indata */
2111         if (indata.dsize < offsetof(struct ctdb_control_ip_iface, iface)) {
2112                 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_ip_iface structure\n"));
2113                 return -1;
2114         }
2115         if (indata.dsize != 
2116                 ( offsetof(struct ctdb_control_ip_iface, iface)
2117                 + pub->len ) ){
2118
2119                 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2120                         "but should be %u bytes\n", 
2121                          (unsigned)indata.dsize, 
2122                          (unsigned)(offsetof(struct ctdb_control_ip_iface, iface)+pub->len)));
2123                 return -1;
2124         }
2125
2126         /* walk over all public addresses until we find a match */
2127         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2128                 if (ctdb_same_ip(&vnn->public_address, &pub->addr)) {
2129                         TALLOC_CTX *mem_ctx = talloc_new(ctdb);
2130
2131                         DLIST_REMOVE(ctdb->vnn, vnn);
2132
2133                         ret = ctdb_event_script_callback(ctdb, 
2134                                          mem_ctx, delete_ip_callback, mem_ctx,
2135                                          false,
2136                                          CTDB_EVENT_RELEASE_IP,
2137                                          "%s %s %u",
2138                                          vnn->iface, 
2139                                          talloc_strdup(mem_ctx, ctdb_addr_to_str(&vnn->public_address)),
2140                                          vnn->public_netmask_bits);
2141                         talloc_free(vnn);
2142                         if (ret != 0) {
2143                                 return -1;
2144                         }
2145                         return 0;
2146                 }
2147         }
2148
2149         return -1;
2150 }
2151