Merge branch 'status-test-2'
[sahlberg/ctdb.git] / server / ctdb_takeover.c
1 /* 
2    ctdb ip takeover code
3
4    Copyright (C) Ronnie Sahlberg  2007
5    Copyright (C) Andrew Tridgell  2007
6
7    This program is free software; you can redistribute it and/or modify
8    it under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 3 of the License, or
10    (at your option) any later version.
11    
12    This program is distributed in the hope that it will be useful,
13    but WITHOUT ANY WARRANTY; without even the implied warranty of
14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15    GNU General Public License for more details.
16    
17    You should have received a copy of the GNU General Public License
18    along with this program; if not, see <http://www.gnu.org/licenses/>.
19 */
20 #include "includes.h"
21 #include "lib/events/events.h"
22 #include "lib/tdb/include/tdb.h"
23 #include "lib/util/dlinklist.h"
24 #include "system/network.h"
25 #include "system/filesys.h"
26 #include "system/wait.h"
27 #include "../include/ctdb_private.h"
28 #include "../common/rb_tree.h"
29
30
31 #define TAKEOVER_TIMEOUT() timeval_current_ofs(ctdb->tunable.takeover_timeout,0)
32
33 #define CTDB_ARP_INTERVAL 1
34 #define CTDB_ARP_REPEAT   3
35
36 struct ctdb_takeover_arp {
37         struct ctdb_context *ctdb;
38         uint32_t count;
39         ctdb_sock_addr addr;
40         struct ctdb_tcp_array *tcparray;
41         struct ctdb_vnn *vnn;
42 };
43
44
45 /*
46   lists of tcp endpoints
47  */
48 struct ctdb_tcp_list {
49         struct ctdb_tcp_list *prev, *next;
50         struct ctdb_tcp_connection connection;
51 };
52
53 /*
54   list of clients to kill on IP release
55  */
56 struct ctdb_client_ip {
57         struct ctdb_client_ip *prev, *next;
58         struct ctdb_context *ctdb;
59         ctdb_sock_addr addr;
60         uint32_t client_id;
61 };
62
63
64 /*
65   send a gratuitous arp
66  */
67 static void ctdb_control_send_arp(struct event_context *ev, struct timed_event *te, 
68                                   struct timeval t, void *private_data)
69 {
70         struct ctdb_takeover_arp *arp = talloc_get_type(private_data, 
71                                                         struct ctdb_takeover_arp);
72         int i, ret;
73         struct ctdb_tcp_array *tcparray;
74
75         ret = ctdb_sys_send_arp(&arp->addr, arp->vnn->iface);
76         if (ret != 0) {
77                 DEBUG(DEBUG_CRIT,(__location__ " sending of arp failed (%s)\n", strerror(errno)));
78         }
79
80         tcparray = arp->tcparray;
81         if (tcparray) {
82                 for (i=0;i<tcparray->num;i++) {
83                         struct ctdb_tcp_connection *tcon;
84
85                         tcon = &tcparray->connections[i];
86                         DEBUG(DEBUG_INFO,("sending tcp tickle ack for %u->%s:%u\n",
87                                 (unsigned)ntohs(tcon->dst_addr.ip.sin_port), 
88                                 ctdb_addr_to_str(&tcon->src_addr),
89                                 (unsigned)ntohs(tcon->src_addr.ip.sin_port)));
90                         ret = ctdb_sys_send_tcp(
91                                 &tcon->src_addr, 
92                                 &tcon->dst_addr,
93                                 0, 0, 0);
94                         if (ret != 0) {
95                                 DEBUG(DEBUG_CRIT,(__location__ " Failed to send tcp tickle ack for %s\n",
96                                         ctdb_addr_to_str(&tcon->src_addr)));
97                         }
98                 }
99         }
100
101         arp->count++;
102
103         if (arp->count == CTDB_ARP_REPEAT) {
104                 talloc_free(arp);
105                 return;
106         }
107
108         event_add_timed(arp->ctdb->ev, arp->vnn->takeover_ctx, 
109                         timeval_current_ofs(CTDB_ARP_INTERVAL, 100000), 
110                         ctdb_control_send_arp, arp);
111 }
112
113 struct takeover_callback_state {
114         struct ctdb_req_control *c;
115         ctdb_sock_addr *addr;
116         struct ctdb_vnn *vnn;
117 };
118
119 /*
120   called when takeip event finishes
121  */
122 static void takeover_ip_callback(struct ctdb_context *ctdb, int status, 
123                                  void *private_data)
124 {
125         struct takeover_callback_state *state = 
126                 talloc_get_type(private_data, struct takeover_callback_state);
127         struct ctdb_takeover_arp *arp;
128         struct ctdb_tcp_array *tcparray;
129
130         if (status != 0) {
131                 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
132                         ctdb_addr_to_str(state->addr),
133                         state->vnn->iface));
134                 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
135                 talloc_free(state);
136                 return;
137         }
138
139         if (!state->vnn->takeover_ctx) {
140                 state->vnn->takeover_ctx = talloc_new(state->vnn);
141                 if (!state->vnn->takeover_ctx) {
142                         goto failed;
143                 }
144         }
145
146         arp = talloc_zero(state->vnn->takeover_ctx, struct ctdb_takeover_arp);
147         if (!arp) goto failed;
148         
149         arp->ctdb = ctdb;
150         arp->addr = *state->addr;
151         arp->vnn  = state->vnn;
152
153         tcparray = state->vnn->tcp_array;
154         if (tcparray) {
155                 /* add all of the known tcp connections for this IP to the
156                    list of tcp connections to send tickle acks for */
157                 arp->tcparray = talloc_steal(arp, tcparray);
158
159                 state->vnn->tcp_array = NULL;
160                 state->vnn->tcp_update_needed = true;
161         }
162
163         event_add_timed(arp->ctdb->ev, state->vnn->takeover_ctx, 
164                         timeval_zero(), ctdb_control_send_arp, arp);
165
166         /* the control succeeded */
167         ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
168         talloc_free(state);
169         return;
170
171 failed:
172         ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
173         talloc_free(state);
174         return;
175 }
176
177 /*
178   Find the vnn of the node that has a public ip address
179   returns -1 if the address is not known as a public address
180  */
181 static struct ctdb_vnn *find_public_ip_vnn(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
182 {
183         struct ctdb_vnn *vnn;
184
185         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
186                 if (ctdb_same_ip(&vnn->public_address, addr)) {
187                         return vnn;
188                 }
189         }
190
191         return NULL;
192 }
193
194
195 /*
196   take over an ip address
197  */
198 int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb, 
199                                  struct ctdb_req_control *c,
200                                  TDB_DATA indata, 
201                                  bool *async_reply)
202 {
203         int ret;
204         struct takeover_callback_state *state;
205         struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
206         struct ctdb_vnn *vnn;
207
208         /* update out vnn list */
209         vnn = find_public_ip_vnn(ctdb, &pip->addr);
210         if (vnn == NULL) {
211                 DEBUG(DEBUG_INFO,("takeoverip called for an ip '%s' that is not a public address\n", 
212                         ctdb_addr_to_str(&pip->addr)));
213                 return 0;
214         }
215         vnn->pnn = pip->pnn;
216
217         /* if our kernel already has this IP, do nothing */
218         if (ctdb_sys_have_ip(&pip->addr)) {
219                 return 0;
220         }
221
222         state = talloc(vnn, struct takeover_callback_state);
223         CTDB_NO_MEMORY(ctdb, state);
224
225         state->c = talloc_steal(ctdb, c);
226         state->addr = talloc(ctdb, ctdb_sock_addr);
227         CTDB_NO_MEMORY(ctdb, state->addr);
228
229         *state->addr = pip->addr;
230         state->vnn   = vnn;
231
232         DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u on interface %s\n", 
233                 ctdb_addr_to_str(&pip->addr),
234                 vnn->public_netmask_bits, 
235                 vnn->iface));
236
237         ret = ctdb_event_script_callback(ctdb, 
238                                          state, takeover_ip_callback, state,
239                                          false,
240                                          CTDB_EVENT_TAKE_IP,
241                                          "%s %s %u",
242                                          vnn->iface, 
243                                          talloc_strdup(state, ctdb_addr_to_str(&pip->addr)),
244                                          vnn->public_netmask_bits);
245
246         if (ret != 0) {
247                 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
248                         ctdb_addr_to_str(&pip->addr),
249                         vnn->iface));
250                 talloc_free(state);
251                 return -1;
252         }
253
254         /* tell ctdb_control.c that we will be replying asynchronously */
255         *async_reply = true;
256
257         return 0;
258 }
259
260 /*
261   takeover an ip address old v4 style
262  */
263 int32_t ctdb_control_takeover_ipv4(struct ctdb_context *ctdb, 
264                                 struct ctdb_req_control *c,
265                                 TDB_DATA indata, 
266                                 bool *async_reply)
267 {
268         TDB_DATA data;
269         
270         data.dsize = sizeof(struct ctdb_public_ip);
271         data.dptr  = (uint8_t *)talloc_zero(c, struct ctdb_public_ip);
272         CTDB_NO_MEMORY(ctdb, data.dptr);
273         
274         memcpy(data.dptr, indata.dptr, indata.dsize);
275         return ctdb_control_takeover_ip(ctdb, c, data, async_reply);
276 }
277
278 /*
279   kill any clients that are registered with a IP that is being released
280  */
281 static void release_kill_clients(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
282 {
283         struct ctdb_client_ip *ip;
284
285         DEBUG(DEBUG_INFO,("release_kill_clients for ip %s\n",
286                 ctdb_addr_to_str(addr)));
287
288         for (ip=ctdb->client_ip_list; ip; ip=ip->next) {
289                 ctdb_sock_addr tmp_addr;
290
291                 tmp_addr = ip->addr;
292                 DEBUG(DEBUG_INFO,("checking for client %u with IP %s\n", 
293                         ip->client_id,
294                         ctdb_addr_to_str(&ip->addr)));
295
296                 if (ctdb_same_ip(&tmp_addr, addr)) {
297                         struct ctdb_client *client = ctdb_reqid_find(ctdb, 
298                                                                      ip->client_id, 
299                                                                      struct ctdb_client);
300                         DEBUG(DEBUG_INFO,("matched client %u with IP %s and pid %u\n", 
301                                 ip->client_id,
302                                 ctdb_addr_to_str(&ip->addr),
303                                 client->pid));
304
305                         if (client->pid != 0) {
306                                 DEBUG(DEBUG_INFO,(__location__ " Killing client pid %u for IP %s on client_id %u\n",
307                                         (unsigned)client->pid,
308                                         ctdb_addr_to_str(addr),
309                                         ip->client_id));
310                                 kill(client->pid, SIGKILL);
311                         }
312                 }
313         }
314 }
315
316 /*
317   called when releaseip event finishes
318  */
319 static void release_ip_callback(struct ctdb_context *ctdb, int status, 
320                                 void *private_data)
321 {
322         struct takeover_callback_state *state = 
323                 talloc_get_type(private_data, struct takeover_callback_state);
324         TDB_DATA data;
325
326         /* send a message to all clients of this node telling them
327            that the cluster has been reconfigured and they should
328            release any sockets on this IP */
329         data.dptr = (uint8_t *)talloc_strdup(state, ctdb_addr_to_str(state->addr));
330         CTDB_NO_MEMORY_VOID(ctdb, data.dptr);
331         data.dsize = strlen((char *)data.dptr)+1;
332
333         DEBUG(DEBUG_INFO,(__location__ " sending RELEASE_IP for '%s'\n", data.dptr));
334
335         ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_RELEASE_IP, data);
336
337         /* kill clients that have registered with this IP */
338         release_kill_clients(ctdb, state->addr);
339         
340         /* the control succeeded */
341         ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
342         talloc_free(state);
343 }
344
345 /*
346   release an ip address
347  */
348 int32_t ctdb_control_release_ip(struct ctdb_context *ctdb, 
349                                 struct ctdb_req_control *c,
350                                 TDB_DATA indata, 
351                                 bool *async_reply)
352 {
353         int ret;
354         struct takeover_callback_state *state;
355         struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
356         struct ctdb_vnn *vnn;
357
358         /* update our vnn list */
359         vnn = find_public_ip_vnn(ctdb, &pip->addr);
360         if (vnn == NULL) {
361                 DEBUG(DEBUG_INFO,("releaseip called for an ip '%s' that is not a public address\n",
362                         ctdb_addr_to_str(&pip->addr)));
363                 return 0;
364         }
365         vnn->pnn = pip->pnn;
366
367         /* stop any previous arps */
368         talloc_free(vnn->takeover_ctx);
369         vnn->takeover_ctx = NULL;
370
371         if (!ctdb_sys_have_ip(&pip->addr)) {
372                 DEBUG(DEBUG_NOTICE,("Redundant release of IP %s/%u on interface %s (ip not held)\n", 
373                         ctdb_addr_to_str(&pip->addr),
374                         vnn->public_netmask_bits, 
375                         vnn->iface));
376                 return 0;
377         }
378
379         DEBUG(DEBUG_NOTICE,("Release of IP %s/%u on interface %s  node:%u\n", 
380                 ctdb_addr_to_str(&pip->addr),
381                 vnn->public_netmask_bits, 
382                 vnn->iface,
383                 pip->pnn));
384
385         state = talloc(ctdb, struct takeover_callback_state);
386         CTDB_NO_MEMORY(ctdb, state);
387
388         state->c = talloc_steal(state, c);
389         state->addr = talloc(state, ctdb_sock_addr);       
390         CTDB_NO_MEMORY(ctdb, state->addr);
391         *state->addr = pip->addr;
392         state->vnn   = vnn;
393
394         ret = ctdb_event_script_callback(ctdb, 
395                                          state, release_ip_callback, state,
396                                          false,
397                                          CTDB_EVENT_RELEASE_IP,
398                                          "%s %s %u",
399                                          vnn->iface, 
400                                          talloc_strdup(state, ctdb_addr_to_str(&pip->addr)),
401                                          vnn->public_netmask_bits);
402         if (ret != 0) {
403                 DEBUG(DEBUG_ERR,(__location__ " Failed to release IP %s on interface %s\n",
404                         ctdb_addr_to_str(&pip->addr),
405                         vnn->iface));
406                 talloc_free(state);
407                 return -1;
408         }
409
410         /* tell the control that we will be reply asynchronously */
411         *async_reply = true;
412         return 0;
413 }
414
415 /*
416   release an ip address old v4 style
417  */
418 int32_t ctdb_control_release_ipv4(struct ctdb_context *ctdb, 
419                                 struct ctdb_req_control *c,
420                                 TDB_DATA indata, 
421                                 bool *async_reply)
422 {
423         TDB_DATA data;
424         
425         data.dsize = sizeof(struct ctdb_public_ip);
426         data.dptr  = (uint8_t *)talloc_zero(c, struct ctdb_public_ip);
427         CTDB_NO_MEMORY(ctdb, data.dptr);
428         
429         memcpy(data.dptr, indata.dptr, indata.dsize);
430         return ctdb_control_release_ip(ctdb, c, data, async_reply);
431 }
432
433
434 static int ctdb_add_public_address(struct ctdb_context *ctdb, ctdb_sock_addr *addr, unsigned mask, const char *iface)
435 {
436         struct ctdb_vnn      *vnn;
437
438         /* Verify that we dont have an entry for this ip yet */
439         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
440                 if (ctdb_same_sockaddr(addr, &vnn->public_address)) {
441                         DEBUG(DEBUG_CRIT,("Same ip '%s' specified multiple times in the public address list \n", 
442                                 ctdb_addr_to_str(addr)));
443                         return -1;
444                 }               
445         }
446
447         /* create a new vnn structure for this ip address */
448         vnn = talloc_zero(ctdb, struct ctdb_vnn);
449         CTDB_NO_MEMORY_FATAL(ctdb, vnn);
450         vnn->iface = talloc_strdup(vnn, iface);
451         CTDB_NO_MEMORY(ctdb, vnn->iface);
452         vnn->public_address      = *addr;
453         vnn->public_netmask_bits = mask;
454         vnn->pnn                 = -1;
455         
456         DLIST_ADD(ctdb->vnn, vnn);
457
458         return 0;
459 }
460
461
462 /*
463   setup the event script directory
464 */
465 int ctdb_set_event_script_dir(struct ctdb_context *ctdb, const char *script_dir)
466 {
467         ctdb->event_script_dir = talloc_strdup(ctdb, script_dir);
468         CTDB_NO_MEMORY(ctdb, ctdb->event_script_dir);
469         return 0;
470 }
471
472 /*
473   setup the public address lists from a file
474 */
475 int ctdb_set_public_addresses(struct ctdb_context *ctdb, const char *alist)
476 {
477         char **lines;
478         int nlines;
479         int i;
480
481         lines = file_lines_load(alist, &nlines, ctdb);
482         if (lines == NULL) {
483                 ctdb_set_error(ctdb, "Failed to load public address list '%s'\n", alist);
484                 return -1;
485         }
486         while (nlines > 0 && strcmp(lines[nlines-1], "") == 0) {
487                 nlines--;
488         }
489
490         for (i=0;i<nlines;i++) {
491                 unsigned mask;
492                 ctdb_sock_addr addr;
493                 const char *addrstr;
494                 const char *iface;
495                 char *tok, *line;
496
497                 line = lines[i];
498                 while ((*line == ' ') || (*line == '\t')) {
499                         line++;
500                 }
501                 if (*line == '#') {
502                         continue;
503                 }
504                 if (strcmp(line, "") == 0) {
505                         continue;
506                 }
507                 tok = strtok(line, " \t");
508                 addrstr = tok;
509                 tok = strtok(NULL, " \t");
510                 if (tok == NULL) {
511                         if (NULL == ctdb->default_public_interface) {
512                                 DEBUG(DEBUG_CRIT,("No default public interface and no interface specified at line %u of public address list\n",
513                                          i+1));
514                                 talloc_free(lines);
515                                 return -1;
516                         }
517                         iface = ctdb->default_public_interface;
518                 } else {
519                         iface = tok;
520                 }
521
522                 if (!addrstr || !parse_ip_mask(addrstr, iface, &addr, &mask)) {
523                         DEBUG(DEBUG_CRIT,("Badly formed line %u in public address list\n", i+1));
524                         talloc_free(lines);
525                         return -1;
526                 }
527                 if (ctdb_add_public_address(ctdb, &addr, mask, iface)) {
528                         DEBUG(DEBUG_CRIT,("Failed to add line %u to the public address list\n", i+1));
529                         talloc_free(lines);
530                         return -1;
531                 }
532         }
533
534         talloc_free(lines);
535         return 0;
536 }
537
538
539
540
541 struct ctdb_public_ip_list {
542         struct ctdb_public_ip_list *next;
543         uint32_t pnn;
544         ctdb_sock_addr addr;
545 };
546
547
548 /* Given a physical node, return the number of
549    public addresses that is currently assigned to this node.
550 */
551 static int node_ip_coverage(struct ctdb_context *ctdb, 
552         int32_t pnn,
553         struct ctdb_public_ip_list *ips)
554 {
555         int num=0;
556
557         for (;ips;ips=ips->next) {
558                 if (ips->pnn == pnn) {
559                         num++;
560                 }
561         }
562         return num;
563 }
564
565
566 /* Check if this is a public ip known to the node, i.e. can that
567    node takeover this ip ?
568 */
569 static int can_node_serve_ip(struct ctdb_context *ctdb, int32_t pnn, 
570                 struct ctdb_public_ip_list *ip)
571 {
572         struct ctdb_all_public_ips *public_ips;
573         int i;
574
575         public_ips = ctdb->nodes[pnn]->public_ips;
576
577         if (public_ips == NULL) {
578                 return -1;
579         }
580
581         for (i=0;i<public_ips->num;i++) {
582                 if (ctdb_same_ip(&ip->addr, &public_ips->ips[i].addr)) {
583                         /* yes, this node can serve this public ip */
584                         return 0;
585                 }
586         }
587
588         return -1;
589 }
590
591
592 /* search the node lists list for a node to takeover this ip.
593    pick the node that currently are serving the least number of ips
594    so that the ips get spread out evenly.
595 */
596 static int find_takeover_node(struct ctdb_context *ctdb, 
597                 struct ctdb_node_map *nodemap, uint32_t mask, 
598                 struct ctdb_public_ip_list *ip,
599                 struct ctdb_public_ip_list *all_ips)
600 {
601         int pnn, min=0, num;
602         int i;
603
604         pnn    = -1;
605         for (i=0;i<nodemap->num;i++) {
606                 if (nodemap->nodes[i].flags & mask) {
607                         /* This node is not healty and can not be used to serve
608                            a public address 
609                         */
610                         continue;
611                 }
612
613                 /* verify that this node can serve this ip */
614                 if (can_node_serve_ip(ctdb, i, ip)) {
615                         /* no it couldnt   so skip to the next node */
616                         continue;
617                 }
618
619                 num = node_ip_coverage(ctdb, i, all_ips);
620                 /* was this the first node we checked ? */
621                 if (pnn == -1) {
622                         pnn = i;
623                         min  = num;
624                 } else {
625                         if (num < min) {
626                                 pnn = i;
627                                 min  = num;
628                         }
629                 }
630         }       
631         if (pnn == -1) {
632                 DEBUG(DEBUG_WARNING,(__location__ " Could not find node to take over public address '%s'\n",
633                         ctdb_addr_to_str(&ip->addr)));
634
635                 return -1;
636         }
637
638         ip->pnn = pnn;
639         return 0;
640 }
641
642 #define IP_KEYLEN       4
643 static uint32_t *ip_key(ctdb_sock_addr *ip)
644 {
645         static uint32_t key[IP_KEYLEN];
646
647         bzero(key, sizeof(key));
648
649         switch (ip->sa.sa_family) {
650         case AF_INET:
651                 key[3]  = htonl(ip->ip.sin_addr.s_addr);
652                 break;
653         case AF_INET6:
654                 key[0]  = htonl(ip->ip6.sin6_addr.s6_addr32[0]);
655                 key[1]  = htonl(ip->ip6.sin6_addr.s6_addr32[1]);
656                 key[2]  = htonl(ip->ip6.sin6_addr.s6_addr32[2]);
657                 key[3]  = htonl(ip->ip6.sin6_addr.s6_addr32[3]);
658                 break;
659         default:
660                 DEBUG(DEBUG_ERR, (__location__ " ERROR, unknown family passed :%u\n", ip->sa.sa_family));
661                 return key;
662         }
663
664         return key;
665 }
666
667 static void *add_ip_callback(void *parm, void *data)
668 {
669         return parm;
670 }
671
672 void getips_count_callback(void *param, void *data)
673 {
674         struct ctdb_public_ip_list **ip_list = (struct ctdb_public_ip_list **)param;
675         struct ctdb_public_ip_list *new_ip = (struct ctdb_public_ip_list *)data;
676
677         new_ip->next = *ip_list;
678         *ip_list     = new_ip;
679 }
680
681 struct ctdb_public_ip_list *
682 create_merged_ip_list(struct ctdb_context *ctdb, TALLOC_CTX *tmp_ctx)
683 {
684         int i, j;
685         struct ctdb_public_ip_list *ip_list;
686         struct ctdb_all_public_ips *public_ips;
687         trbt_tree_t *ip_tree;
688
689         ip_tree = trbt_create(tmp_ctx, 0);
690
691         for (i=0;i<ctdb->num_nodes;i++) {
692                 public_ips = ctdb->nodes[i]->public_ips;
693
694                 if (ctdb->nodes[i]->flags & NODE_FLAGS_DELETED) {
695                         continue;
696                 }
697
698                 /* there were no public ips for this node */
699                 if (public_ips == NULL) {
700                         continue;
701                 }               
702
703                 for (j=0;j<public_ips->num;j++) {
704                         struct ctdb_public_ip_list *tmp_ip; 
705
706                         tmp_ip = talloc_zero(tmp_ctx, struct ctdb_public_ip_list);
707                         CTDB_NO_MEMORY_NULL(ctdb, tmp_ip);
708                         tmp_ip->pnn  = public_ips->ips[j].pnn;
709                         tmp_ip->addr = public_ips->ips[j].addr;
710                         tmp_ip->next = NULL;
711
712                         trbt_insertarray32_callback(ip_tree,
713                                 IP_KEYLEN, ip_key(&public_ips->ips[j].addr),
714                                 add_ip_callback,
715                                 tmp_ip);
716                 }
717         }
718
719         ip_list = NULL;
720         trbt_traversearray32(ip_tree, IP_KEYLEN, getips_count_callback, &ip_list);
721
722         return ip_list;
723 }
724
725 /*
726   make any IP alias changes for public addresses that are necessary 
727  */
728 int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap)
729 {
730         int i, num_healthy, retries;
731         struct ctdb_public_ip ip;
732         struct ctdb_public_ipv4 ipv4;
733         uint32_t mask;
734         struct ctdb_public_ip_list *all_ips, *tmp_ip;
735         int maxnode, maxnum=0, minnode, minnum=0, num;
736         TDB_DATA data;
737         struct timeval timeout;
738         struct client_async_data *async_data;
739         struct ctdb_client_control_state *state;
740         TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
741
742
743         ZERO_STRUCT(ip);
744
745         /* Count how many completely healthy nodes we have */
746         num_healthy = 0;
747         for (i=0;i<nodemap->num;i++) {
748                 if (!(nodemap->nodes[i].flags & (NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED))) {
749                         num_healthy++;
750                 }
751         }
752
753         if (num_healthy > 0) {
754                 /* We have healthy nodes, so only consider them for 
755                    serving public addresses
756                 */
757                 mask = NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED;
758         } else {
759                 /* We didnt have any completely healthy nodes so
760                    use "disabled" nodes as a fallback
761                 */
762                 mask = NODE_FLAGS_INACTIVE;
763         }
764
765         /* since nodes only know about those public addresses that
766            can be served by that particular node, no single node has
767            a full list of all public addresses that exist in the cluster.
768            Walk over all node structures and create a merged list of
769            all public addresses that exist in the cluster.
770         */
771         all_ips = create_merged_ip_list(ctdb, tmp_ctx);
772
773         /* If we want deterministic ip allocations, i.e. that the ip addresses
774            will always be allocated the same way for a specific set of
775            available/unavailable nodes.
776         */
777         if (1 == ctdb->tunable.deterministic_public_ips) {              
778                 DEBUG(DEBUG_NOTICE,("Deterministic IPs enabled. Resetting all ip allocations\n"));
779                 for (i=0,tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next,i++) {
780                         tmp_ip->pnn = i%nodemap->num;
781                 }
782         }
783
784
785         /* mark all public addresses with a masked node as being served by
786            node -1
787         */
788         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
789                 if (tmp_ip->pnn == -1) {
790                         continue;
791                 }
792                 if (nodemap->nodes[tmp_ip->pnn].flags & mask) {
793                         tmp_ip->pnn = -1;
794                 }
795         }
796
797         /* verify that the assigned nodes can serve that public ip
798            and set it to -1 if not
799         */
800         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
801                 if (tmp_ip->pnn == -1) {
802                         continue;
803                 }
804                 if (can_node_serve_ip(ctdb, tmp_ip->pnn, tmp_ip) != 0) {
805                         /* this node can not serve this ip. */
806                         tmp_ip->pnn = -1;
807                 }
808         }
809
810
811         /* now we must redistribute all public addresses with takeover node
812            -1 among the nodes available
813         */
814         retries = 0;
815 try_again:
816         /* loop over all ip's and find a physical node to cover for 
817            each unassigned ip.
818         */
819         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
820                 if (tmp_ip->pnn == -1) {
821                         if (find_takeover_node(ctdb, nodemap, mask, tmp_ip, all_ips)) {
822                                 DEBUG(DEBUG_WARNING,("Failed to find node to cover ip %s\n",
823                                         ctdb_addr_to_str(&tmp_ip->addr)));
824                         }
825                 }
826         }
827
828         /* If we dont want ips to fail back after a node becomes healthy
829            again, we wont even try to reallocat the ip addresses so that
830            they are evenly spread out.
831            This can NOT be used at the same time as DeterministicIPs !
832         */
833         if (1 == ctdb->tunable.no_ip_failback) {
834                 if (1 == ctdb->tunable.deterministic_public_ips) {
835                         DEBUG(DEBUG_ERR, ("ERROR: You can not use 'DeterministicIPs' and 'NoIPFailback' at the same time\n"));
836                 }
837                 goto finished;
838         }
839
840
841         /* now, try to make sure the ip adresses are evenly distributed
842            across the node.
843            for each ip address, loop over all nodes that can serve this
844            ip and make sure that the difference between the node
845            serving the most and the node serving the least ip's are not greater
846            than 1.
847         */
848         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
849                 if (tmp_ip->pnn == -1) {
850                         continue;
851                 }
852
853                 /* Get the highest and lowest number of ips's served by any 
854                    valid node which can serve this ip.
855                 */
856                 maxnode = -1;
857                 minnode = -1;
858                 for (i=0;i<nodemap->num;i++) {
859                         if (nodemap->nodes[i].flags & mask) {
860                                 continue;
861                         }
862
863                         /* only check nodes that can actually serve this ip */
864                         if (can_node_serve_ip(ctdb, i, tmp_ip)) {
865                                 /* no it couldnt   so skip to the next node */
866                                 continue;
867                         }
868
869                         num = node_ip_coverage(ctdb, i, all_ips);
870                         if (maxnode == -1) {
871                                 maxnode = i;
872                                 maxnum  = num;
873                         } else {
874                                 if (num > maxnum) {
875                                         maxnode = i;
876                                         maxnum  = num;
877                                 }
878                         }
879                         if (minnode == -1) {
880                                 minnode = i;
881                                 minnum  = num;
882                         } else {
883                                 if (num < minnum) {
884                                         minnode = i;
885                                         minnum  = num;
886                                 }
887                         }
888                 }
889                 if (maxnode == -1) {
890                         DEBUG(DEBUG_WARNING,(__location__ " Could not find maxnode. May not be able to serve ip '%s'\n",
891                                 ctdb_addr_to_str(&tmp_ip->addr)));
892
893                         continue;
894                 }
895
896                 /* If we want deterministic IPs then dont try to reallocate 
897                    them to spread out the load.
898                 */
899                 if (1 == ctdb->tunable.deterministic_public_ips) {
900                         continue;
901                 }
902
903                 /* if the spread between the smallest and largest coverage by
904                    a node is >=2 we steal one of the ips from the node with
905                    most coverage to even things out a bit.
906                    try to do this at most 5 times  since we dont want to spend
907                    too much time balancing the ip coverage.
908                 */
909                 if ( (maxnum > minnum+1)
910                   && (retries < 5) ){
911                         struct ctdb_public_ip_list *tmp;
912
913                         /* mark one of maxnode's vnn's as unassigned and try
914                            again
915                         */
916                         for (tmp=all_ips;tmp;tmp=tmp->next) {
917                                 if (tmp->pnn == maxnode) {
918                                         tmp->pnn = -1;
919                                         retries++;
920                                         goto try_again;
921                                 }
922                         }
923                 }
924         }
925
926
927         /* finished distributing the public addresses, now just send the 
928            info out to the nodes
929         */
930 finished:
931
932         /* at this point ->pnn is the node which will own each IP
933            or -1 if there is no node that can cover this ip
934         */
935
936         /* now tell all nodes to delete any alias that they should not
937            have.  This will be a NOOP on nodes that don't currently
938            hold the given alias */
939         async_data = talloc_zero(tmp_ctx, struct client_async_data);
940         CTDB_NO_MEMORY_FATAL(ctdb, async_data);
941
942         for (i=0;i<nodemap->num;i++) {
943                 /* don't talk to unconnected nodes, but do talk to banned nodes */
944                 if (nodemap->nodes[i].flags & NODE_FLAGS_DISCONNECTED) {
945                         continue;
946                 }
947
948                 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
949                         if (tmp_ip->pnn == nodemap->nodes[i].pnn) {
950                                 /* This node should be serving this
951                                    vnn so dont tell it to release the ip
952                                 */
953                                 continue;
954                         }
955                         if (tmp_ip->addr.sa.sa_family == AF_INET) {
956                                 ipv4.pnn = tmp_ip->pnn;
957                                 ipv4.sin = tmp_ip->addr.ip;
958
959                                 timeout = TAKEOVER_TIMEOUT();
960                                 data.dsize = sizeof(ipv4);
961                                 data.dptr  = (uint8_t *)&ipv4;
962                                 state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
963                                                 0, CTDB_CONTROL_RELEASE_IPv4, 0,
964                                                 data, async_data,
965                                                 &timeout, NULL);
966                         } else {
967                                 ip.pnn  = tmp_ip->pnn;
968                                 ip.addr = tmp_ip->addr;
969
970                                 timeout = TAKEOVER_TIMEOUT();
971                                 data.dsize = sizeof(ip);
972                                 data.dptr  = (uint8_t *)&ip;
973                                 state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
974                                                 0, CTDB_CONTROL_RELEASE_IP, 0,
975                                                 data, async_data,
976                                                 &timeout, NULL);
977                         }
978
979                         if (state == NULL) {
980                                 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_RELEASE_IP to node %u\n", nodemap->nodes[i].pnn));
981                                 talloc_free(tmp_ctx);
982                                 return -1;
983                         }
984                 
985                         ctdb_client_async_add(async_data, state);
986                 }
987         }
988         if (ctdb_client_async_wait(ctdb, async_data) != 0) {
989                 DEBUG(DEBUG_ERR,(__location__ " Async control CTDB_CONTROL_RELEASE_IP failed\n"));
990                 talloc_free(tmp_ctx);
991                 return -1;
992         }
993         talloc_free(async_data);
994
995
996         /* tell all nodes to get their own IPs */
997         async_data = talloc_zero(tmp_ctx, struct client_async_data);
998         CTDB_NO_MEMORY_FATAL(ctdb, async_data);
999         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1000                 if (tmp_ip->pnn == -1) {
1001                         /* this IP won't be taken over */
1002                         continue;
1003                 }
1004
1005                 if (tmp_ip->addr.sa.sa_family == AF_INET) {
1006                         ipv4.pnn = tmp_ip->pnn;
1007                         ipv4.sin = tmp_ip->addr.ip;
1008
1009                         timeout = TAKEOVER_TIMEOUT();
1010                         data.dsize = sizeof(ipv4);
1011                         data.dptr  = (uint8_t *)&ipv4;
1012                         state = ctdb_control_send(ctdb, tmp_ip->pnn,
1013                                         0, CTDB_CONTROL_TAKEOVER_IPv4, 0,
1014                                         data, async_data,
1015                                         &timeout, NULL);
1016                 } else {
1017                         ip.pnn  = tmp_ip->pnn;
1018                         ip.addr = tmp_ip->addr;
1019
1020                         timeout = TAKEOVER_TIMEOUT();
1021                         data.dsize = sizeof(ip);
1022                         data.dptr  = (uint8_t *)&ip;
1023                         state = ctdb_control_send(ctdb, tmp_ip->pnn,
1024                                         0, CTDB_CONTROL_TAKEOVER_IP, 0,
1025                                         data, async_data,
1026                                         &timeout, NULL);
1027                 }
1028                 if (state == NULL) {
1029                         DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_TAKEOVER_IP to node %u\n", tmp_ip->pnn));
1030                         talloc_free(tmp_ctx);
1031                         return -1;
1032                 }
1033                 
1034                 ctdb_client_async_add(async_data, state);
1035         }
1036         if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1037                 DEBUG(DEBUG_ERR,(__location__ " Async control CTDB_CONTROL_TAKEOVER_IP failed\n"));
1038                 talloc_free(tmp_ctx);
1039                 return -1;
1040         }
1041
1042         talloc_free(tmp_ctx);
1043         return 0;
1044 }
1045
1046
1047 /*
1048   destroy a ctdb_client_ip structure
1049  */
1050 static int ctdb_client_ip_destructor(struct ctdb_client_ip *ip)
1051 {
1052         DEBUG(DEBUG_DEBUG,("destroying client tcp for %s:%u (client_id %u)\n",
1053                 ctdb_addr_to_str(&ip->addr),
1054                 ntohs(ip->addr.ip.sin_port),
1055                 ip->client_id));
1056
1057         DLIST_REMOVE(ip->ctdb->client_ip_list, ip);
1058         return 0;
1059 }
1060
1061 /*
1062   called by a client to inform us of a TCP connection that it is managing
1063   that should tickled with an ACK when IP takeover is done
1064   we handle both the old ipv4 style of packets as well as the new ipv4/6
1065   pdus.
1066  */
1067 int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id,
1068                                 TDB_DATA indata)
1069 {
1070         struct ctdb_client *client = ctdb_reqid_find(ctdb, client_id, struct ctdb_client);
1071         struct ctdb_control_tcp *old_addr = NULL;
1072         struct ctdb_control_tcp_addr new_addr;
1073         struct ctdb_control_tcp_addr *tcp_sock = NULL;
1074         struct ctdb_tcp_list *tcp;
1075         struct ctdb_control_tcp_vnn t;
1076         int ret;
1077         TDB_DATA data;
1078         struct ctdb_client_ip *ip;
1079         struct ctdb_vnn *vnn;
1080         ctdb_sock_addr addr;
1081
1082         switch (indata.dsize) {
1083         case sizeof(struct ctdb_control_tcp):
1084                 old_addr = (struct ctdb_control_tcp *)indata.dptr;
1085                 ZERO_STRUCT(new_addr);
1086                 tcp_sock = &new_addr;
1087                 tcp_sock->src.ip  = old_addr->src;
1088                 tcp_sock->dest.ip = old_addr->dest;
1089                 break;
1090         case sizeof(struct ctdb_control_tcp_addr):
1091                 tcp_sock = (struct ctdb_control_tcp_addr *)indata.dptr;
1092                 break;
1093         default:
1094                 DEBUG(DEBUG_ERR,(__location__ " Invalid data structure passed "
1095                                  "to ctdb_control_tcp_client. size was %d but "
1096                                  "only allowed sizes are %lu and %lu\n",
1097                                  (int)indata.dsize,
1098                                  (long unsigned)sizeof(struct ctdb_control_tcp),
1099                                  (long unsigned)sizeof(struct ctdb_control_tcp_addr)));
1100                 return -1;
1101         }
1102
1103         addr = tcp_sock->src;
1104         ctdb_canonicalize_ip(&addr,  &tcp_sock->src);
1105         addr = tcp_sock->dest;
1106         ctdb_canonicalize_ip(&addr, &tcp_sock->dest);
1107
1108         ZERO_STRUCT(addr);
1109         memcpy(&addr, &tcp_sock->dest, sizeof(addr));
1110         vnn = find_public_ip_vnn(ctdb, &addr);
1111         if (vnn == NULL) {
1112                 switch (addr.sa.sa_family) {
1113                 case AF_INET:
1114                         if (ntohl(addr.ip.sin_addr.s_addr) != INADDR_LOOPBACK) {
1115                                 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public address.\n", 
1116                                         ctdb_addr_to_str(&addr)));
1117                         }
1118                         break;
1119                 case AF_INET6:
1120                         DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public ipv6 address.\n", 
1121                                 ctdb_addr_to_str(&addr)));
1122                         break;
1123                 default:
1124                         DEBUG(DEBUG_ERR,(__location__ " Unknown family type %d\n", addr.sa.sa_family));
1125                 }
1126
1127                 return 0;
1128         }
1129
1130         if (vnn->pnn != ctdb->pnn) {
1131                 DEBUG(DEBUG_ERR,("Attempt to register tcp client for IP %s we don't hold - failing (client_id %u pid %u)\n",
1132                         ctdb_addr_to_str(&addr),
1133                         client_id, client->pid));
1134                 /* failing this call will tell smbd to die */
1135                 return -1;
1136         }
1137
1138         ip = talloc(client, struct ctdb_client_ip);
1139         CTDB_NO_MEMORY(ctdb, ip);
1140
1141         ip->ctdb      = ctdb;
1142         ip->addr      = addr;
1143         ip->client_id = client_id;
1144         talloc_set_destructor(ip, ctdb_client_ip_destructor);
1145         DLIST_ADD(ctdb->client_ip_list, ip);
1146
1147         tcp = talloc(client, struct ctdb_tcp_list);
1148         CTDB_NO_MEMORY(ctdb, tcp);
1149
1150         tcp->connection.src_addr = tcp_sock->src;
1151         tcp->connection.dst_addr = tcp_sock->dest;
1152
1153         DLIST_ADD(client->tcp_list, tcp);
1154
1155         t.src  = tcp_sock->src;
1156         t.dest = tcp_sock->dest;
1157
1158         data.dptr = (uint8_t *)&t;
1159         data.dsize = sizeof(t);
1160
1161         switch (addr.sa.sa_family) {
1162         case AF_INET:
1163                 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1164                         (unsigned)ntohs(tcp_sock->dest.ip.sin_port), 
1165                         ctdb_addr_to_str(&tcp_sock->src),
1166                         (unsigned)ntohs(tcp_sock->src.ip.sin_port), client_id, client->pid));
1167                 break;
1168         case AF_INET6:
1169                 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1170                         (unsigned)ntohs(tcp_sock->dest.ip6.sin6_port), 
1171                         ctdb_addr_to_str(&tcp_sock->src),
1172                         (unsigned)ntohs(tcp_sock->src.ip6.sin6_port), client_id, client->pid));
1173                 break;
1174         default:
1175                 DEBUG(DEBUG_ERR,(__location__ " Unknown family %d\n", addr.sa.sa_family));
1176         }
1177
1178
1179         /* tell all nodes about this tcp connection */
1180         ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0, 
1181                                        CTDB_CONTROL_TCP_ADD,
1182                                        0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
1183         if (ret != 0) {
1184                 DEBUG(DEBUG_ERR,(__location__ " Failed to send CTDB_CONTROL_TCP_ADD\n"));
1185                 return -1;
1186         }
1187
1188         return 0;
1189 }
1190
1191 /*
1192   find a tcp address on a list
1193  */
1194 static struct ctdb_tcp_connection *ctdb_tcp_find(struct ctdb_tcp_array *array, 
1195                                            struct ctdb_tcp_connection *tcp)
1196 {
1197         int i;
1198
1199         if (array == NULL) {
1200                 return NULL;
1201         }
1202
1203         for (i=0;i<array->num;i++) {
1204                 if (ctdb_same_sockaddr(&array->connections[i].src_addr, &tcp->src_addr) &&
1205                     ctdb_same_sockaddr(&array->connections[i].dst_addr, &tcp->dst_addr)) {
1206                         return &array->connections[i];
1207                 }
1208         }
1209         return NULL;
1210 }
1211
1212 /*
1213   called by a daemon to inform us of a TCP connection that one of its
1214   clients managing that should tickled with an ACK when IP takeover is
1215   done
1216  */
1217 int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata)
1218 {
1219         struct ctdb_control_tcp_vnn *p = (struct ctdb_control_tcp_vnn *)indata.dptr;
1220         struct ctdb_tcp_array *tcparray;
1221         struct ctdb_tcp_connection tcp;
1222         struct ctdb_vnn *vnn;
1223
1224         vnn = find_public_ip_vnn(ctdb, &p->dest);
1225         if (vnn == NULL) {
1226                 DEBUG(DEBUG_INFO,(__location__ " got TCP_ADD control for an address which is not a public address '%s'\n",
1227                         ctdb_addr_to_str(&p->dest)));
1228
1229                 return -1;
1230         }
1231
1232
1233         tcparray = vnn->tcp_array;
1234
1235         /* If this is the first tickle */
1236         if (tcparray == NULL) {
1237                 tcparray = talloc_size(ctdb->nodes, 
1238                         offsetof(struct ctdb_tcp_array, connections) +
1239                         sizeof(struct ctdb_tcp_connection) * 1);
1240                 CTDB_NO_MEMORY(ctdb, tcparray);
1241                 vnn->tcp_array = tcparray;
1242
1243                 tcparray->num = 0;
1244                 tcparray->connections = talloc_size(tcparray, sizeof(struct ctdb_tcp_connection));
1245                 CTDB_NO_MEMORY(ctdb, tcparray->connections);
1246
1247                 tcparray->connections[tcparray->num].src_addr = p->src;
1248                 tcparray->connections[tcparray->num].dst_addr = p->dest;
1249                 tcparray->num++;
1250                 return 0;
1251         }
1252
1253
1254         /* Do we already have this tickle ?*/
1255         tcp.src_addr = p->src;
1256         tcp.dst_addr = p->dest;
1257         if (ctdb_tcp_find(vnn->tcp_array, &tcp) != NULL) {
1258                 DEBUG(DEBUG_DEBUG,("Already had tickle info for %s:%u for vnn:%u\n",
1259                         ctdb_addr_to_str(&tcp.dst_addr),
1260                         ntohs(tcp.dst_addr.ip.sin_port),
1261                         vnn->pnn));
1262                 return 0;
1263         }
1264
1265         /* A new tickle, we must add it to the array */
1266         tcparray->connections = talloc_realloc(tcparray, tcparray->connections,
1267                                         struct ctdb_tcp_connection,
1268                                         tcparray->num+1);
1269         CTDB_NO_MEMORY(ctdb, tcparray->connections);
1270
1271         vnn->tcp_array = tcparray;
1272         tcparray->connections[tcparray->num].src_addr = p->src;
1273         tcparray->connections[tcparray->num].dst_addr = p->dest;
1274         tcparray->num++;
1275                                 
1276         DEBUG(DEBUG_INFO,("Added tickle info for %s:%u from vnn %u\n",
1277                 ctdb_addr_to_str(&tcp.dst_addr),
1278                 ntohs(tcp.dst_addr.ip.sin_port),
1279                 vnn->pnn));
1280
1281         return 0;
1282 }
1283
1284
1285 /*
1286   called by a daemon to inform us of a TCP connection that one of its
1287   clients managing that should tickled with an ACK when IP takeover is
1288   done
1289  */
1290 static void ctdb_remove_tcp_connection(struct ctdb_context *ctdb, struct ctdb_tcp_connection *conn)
1291 {
1292         struct ctdb_tcp_connection *tcpp;
1293         struct ctdb_vnn *vnn = find_public_ip_vnn(ctdb, &conn->dst_addr);
1294
1295         if (vnn == NULL) {
1296                 DEBUG(DEBUG_ERR,(__location__ " unable to find public address %s\n",
1297                         ctdb_addr_to_str(&conn->dst_addr)));
1298                 return;
1299         }
1300
1301         /* if the array is empty we cant remove it
1302            and we dont need to do anything
1303          */
1304         if (vnn->tcp_array == NULL) {
1305                 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist (array is empty) %s:%u\n",
1306                         ctdb_addr_to_str(&conn->dst_addr),
1307                         ntohs(conn->dst_addr.ip.sin_port)));
1308                 return;
1309         }
1310
1311
1312         /* See if we know this connection
1313            if we dont know this connection  then we dont need to do anything
1314          */
1315         tcpp = ctdb_tcp_find(vnn->tcp_array, conn);
1316         if (tcpp == NULL) {
1317                 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist %s:%u\n",
1318                         ctdb_addr_to_str(&conn->dst_addr),
1319                         ntohs(conn->dst_addr.ip.sin_port)));
1320                 return;
1321         }
1322
1323
1324         /* We need to remove this entry from the array.
1325            Instead of allocating a new array and copying data to it
1326            we cheat and just copy the last entry in the existing array
1327            to the entry that is to be removed and just shring the 
1328            ->num field
1329          */
1330         *tcpp = vnn->tcp_array->connections[vnn->tcp_array->num - 1];
1331         vnn->tcp_array->num--;
1332
1333         /* If we deleted the last entry we also need to remove the entire array
1334          */
1335         if (vnn->tcp_array->num == 0) {
1336                 talloc_free(vnn->tcp_array);
1337                 vnn->tcp_array = NULL;
1338         }               
1339
1340         vnn->tcp_update_needed = true;
1341
1342         DEBUG(DEBUG_INFO,("Removed tickle info for %s:%u\n",
1343                 ctdb_addr_to_str(&conn->src_addr),
1344                 ntohs(conn->src_addr.ip.sin_port)));
1345 }
1346
1347
1348 /*
1349   called when a daemon restarts - send all tickes for all public addresses
1350   we are serving immediately to the new node.
1351  */
1352 int32_t ctdb_control_startup(struct ctdb_context *ctdb, uint32_t vnn)
1353 {
1354 /*XXX here we should send all tickes we are serving to the new node */
1355         return 0;
1356 }
1357
1358
1359 /*
1360   called when a client structure goes away - hook to remove
1361   elements from the tcp_list in all daemons
1362  */
1363 void ctdb_takeover_client_destructor_hook(struct ctdb_client *client)
1364 {
1365         while (client->tcp_list) {
1366                 struct ctdb_tcp_list *tcp = client->tcp_list;
1367                 DLIST_REMOVE(client->tcp_list, tcp);
1368                 ctdb_remove_tcp_connection(client->ctdb, &tcp->connection);
1369         }
1370 }
1371
1372
1373 /*
1374   release all IPs on shutdown
1375  */
1376 void ctdb_release_all_ips(struct ctdb_context *ctdb)
1377 {
1378         struct ctdb_vnn *vnn;
1379
1380         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1381                 if (!ctdb_sys_have_ip(&vnn->public_address)) {
1382                         continue;
1383                 }
1384                 if (vnn->pnn == ctdb->pnn) {
1385                         vnn->pnn = -1;
1386                 }
1387                 ctdb_event_script_args(ctdb, CTDB_EVENT_RELEASE_IP, "%s %s %u",
1388                                   vnn->iface, 
1389                                   talloc_strdup(ctdb, ctdb_addr_to_str(&vnn->public_address)),
1390                                   vnn->public_netmask_bits);
1391                 release_kill_clients(ctdb, &vnn->public_address);
1392         }
1393 }
1394
1395
1396 /*
1397   get list of public IPs
1398  */
1399 int32_t ctdb_control_get_public_ips(struct ctdb_context *ctdb, 
1400                                     struct ctdb_req_control *c, TDB_DATA *outdata)
1401 {
1402         int i, num, len;
1403         struct ctdb_all_public_ips *ips;
1404         struct ctdb_vnn *vnn;
1405
1406         /* count how many public ip structures we have */
1407         num = 0;
1408         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1409                 num++;
1410         }
1411
1412         len = offsetof(struct ctdb_all_public_ips, ips) + 
1413                 num*sizeof(struct ctdb_public_ip);
1414         ips = talloc_zero_size(outdata, len);
1415         CTDB_NO_MEMORY(ctdb, ips);
1416
1417         outdata->dsize = len;
1418         outdata->dptr  = (uint8_t *)ips;
1419
1420         ips->num = num;
1421         i = 0;
1422         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1423                 ips->ips[i].pnn  = vnn->pnn;
1424                 ips->ips[i].addr = vnn->public_address;
1425                 i++;
1426         }
1427
1428         return 0;
1429 }
1430
1431
1432 /*
1433   get list of public IPs, old ipv4 style.  only returns ipv4 addresses
1434  */
1435 int32_t ctdb_control_get_public_ipsv4(struct ctdb_context *ctdb, 
1436                                     struct ctdb_req_control *c, TDB_DATA *outdata)
1437 {
1438         int i, num, len;
1439         struct ctdb_all_public_ipsv4 *ips;
1440         struct ctdb_vnn *vnn;
1441
1442         /* count how many public ip structures we have */
1443         num = 0;
1444         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1445                 if (vnn->public_address.sa.sa_family != AF_INET) {
1446                         continue;
1447                 }
1448                 num++;
1449         }
1450
1451         len = offsetof(struct ctdb_all_public_ipsv4, ips) + 
1452                 num*sizeof(struct ctdb_public_ipv4);
1453         ips = talloc_zero_size(outdata, len);
1454         CTDB_NO_MEMORY(ctdb, ips);
1455
1456         outdata->dsize = len;
1457         outdata->dptr  = (uint8_t *)ips;
1458
1459         ips->num = num;
1460         i = 0;
1461         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1462                 if (vnn->public_address.sa.sa_family != AF_INET) {
1463                         continue;
1464                 }
1465                 ips->ips[i].pnn = vnn->pnn;
1466                 ips->ips[i].sin = vnn->public_address.ip;
1467                 i++;
1468         }
1469
1470         return 0;
1471 }
1472
1473
1474 /* 
1475    structure containing the listening socket and the list of tcp connections
1476    that the ctdb daemon is to kill
1477 */
1478 struct ctdb_kill_tcp {
1479         struct ctdb_vnn *vnn;
1480         struct ctdb_context *ctdb;
1481         int capture_fd;
1482         struct fd_event *fde;
1483         trbt_tree_t *connections;
1484         void *private_data;
1485 };
1486
1487 /*
1488   a tcp connection that is to be killed
1489  */
1490 struct ctdb_killtcp_con {
1491         ctdb_sock_addr src_addr;
1492         ctdb_sock_addr dst_addr;
1493         int count;
1494         struct ctdb_kill_tcp *killtcp;
1495 };
1496
1497 /* this function is used to create a key to represent this socketpair
1498    in the killtcp tree.
1499    this key is used to insert and lookup matching socketpairs that are
1500    to be tickled and RST
1501 */
1502 #define KILLTCP_KEYLEN  10
1503 static uint32_t *killtcp_key(ctdb_sock_addr *src, ctdb_sock_addr *dst)
1504 {
1505         static uint32_t key[KILLTCP_KEYLEN];
1506
1507         bzero(key, sizeof(key));
1508
1509         if (src->sa.sa_family != dst->sa.sa_family) {
1510                 DEBUG(DEBUG_ERR, (__location__ " ERROR, different families passed :%u vs %u\n", src->sa.sa_family, dst->sa.sa_family));
1511                 return key;
1512         }
1513         
1514         switch (src->sa.sa_family) {
1515         case AF_INET:
1516                 key[0]  = dst->ip.sin_addr.s_addr;
1517                 key[1]  = src->ip.sin_addr.s_addr;
1518                 key[2]  = dst->ip.sin_port;
1519                 key[3]  = src->ip.sin_port;
1520                 break;
1521         case AF_INET6:
1522                 key[0]  = dst->ip6.sin6_addr.s6_addr32[3];
1523                 key[1]  = src->ip6.sin6_addr.s6_addr32[3];
1524                 key[2]  = dst->ip6.sin6_addr.s6_addr32[2];
1525                 key[3]  = src->ip6.sin6_addr.s6_addr32[2];
1526                 key[4]  = dst->ip6.sin6_addr.s6_addr32[1];
1527                 key[5]  = src->ip6.sin6_addr.s6_addr32[1];
1528                 key[6]  = dst->ip6.sin6_addr.s6_addr32[0];
1529                 key[7]  = src->ip6.sin6_addr.s6_addr32[0];
1530                 key[8]  = dst->ip6.sin6_port;
1531                 key[9]  = src->ip6.sin6_port;
1532                 break;
1533         default:
1534                 DEBUG(DEBUG_ERR, (__location__ " ERROR, unknown family passed :%u\n", src->sa.sa_family));
1535                 return key;
1536         }
1537
1538         return key;
1539 }
1540
1541 /*
1542   called when we get a read event on the raw socket
1543  */
1544 static void capture_tcp_handler(struct event_context *ev, struct fd_event *fde, 
1545                                 uint16_t flags, void *private_data)
1546 {
1547         struct ctdb_kill_tcp *killtcp = talloc_get_type(private_data, struct ctdb_kill_tcp);
1548         struct ctdb_killtcp_con *con;
1549         ctdb_sock_addr src, dst;
1550         uint32_t ack_seq, seq;
1551
1552         if (!(flags & EVENT_FD_READ)) {
1553                 return;
1554         }
1555
1556         if (ctdb_sys_read_tcp_packet(killtcp->capture_fd,
1557                                 killtcp->private_data,
1558                                 &src, &dst,
1559                                 &ack_seq, &seq) != 0) {
1560                 /* probably a non-tcp ACK packet */
1561                 return;
1562         }
1563
1564         /* check if we have this guy in our list of connections
1565            to kill
1566         */
1567         con = trbt_lookuparray32(killtcp->connections, 
1568                         KILLTCP_KEYLEN, killtcp_key(&src, &dst));
1569         if (con == NULL) {
1570                 /* no this was some other packet we can just ignore */
1571                 return;
1572         }
1573
1574         /* This one has been tickled !
1575            now reset him and remove him from the list.
1576          */
1577         DEBUG(DEBUG_INFO, ("sending a tcp reset to kill connection :%d -> %s:%d\n",
1578                 ntohs(con->dst_addr.ip.sin_port),
1579                 ctdb_addr_to_str(&con->src_addr),
1580                 ntohs(con->src_addr.ip.sin_port)));
1581
1582         ctdb_sys_send_tcp(&con->dst_addr, &con->src_addr, ack_seq, seq, 1);
1583         talloc_free(con);
1584 }
1585
1586
1587 /* when traversing the list of all tcp connections to send tickle acks to
1588    (so that we can capture the ack coming back and kill the connection
1589     by a RST)
1590    this callback is called for each connection we are currently trying to kill
1591 */
1592 static void tickle_connection_traverse(void *param, void *data)
1593 {
1594         struct ctdb_killtcp_con *con = talloc_get_type(data, struct ctdb_killtcp_con);
1595
1596         /* have tried too many times, just give up */
1597         if (con->count >= 5) {
1598                 talloc_free(con);
1599                 return;
1600         }
1601
1602         /* othervise, try tickling it again */
1603         con->count++;
1604         ctdb_sys_send_tcp(
1605                 (ctdb_sock_addr *)&con->dst_addr,
1606                 (ctdb_sock_addr *)&con->src_addr,
1607                 0, 0, 0);
1608 }
1609
1610
1611 /* 
1612    called every second until all sentenced connections have been reset
1613  */
1614 static void ctdb_tickle_sentenced_connections(struct event_context *ev, struct timed_event *te, 
1615                                               struct timeval t, void *private_data)
1616 {
1617         struct ctdb_kill_tcp *killtcp = talloc_get_type(private_data, struct ctdb_kill_tcp);
1618
1619
1620         /* loop over all connections sending tickle ACKs */
1621         trbt_traversearray32(killtcp->connections, KILLTCP_KEYLEN, tickle_connection_traverse, NULL);
1622
1623
1624         /* If there are no more connections to kill we can remove the
1625            entire killtcp structure
1626          */
1627         if ( (killtcp->connections == NULL) || 
1628              (killtcp->connections->root == NULL) ) {
1629                 talloc_free(killtcp);
1630                 return;
1631         }
1632
1633         /* try tickling them again in a seconds time
1634          */
1635         event_add_timed(killtcp->ctdb->ev, killtcp, timeval_current_ofs(1, 0), 
1636                         ctdb_tickle_sentenced_connections, killtcp);
1637 }
1638
1639 /*
1640   destroy the killtcp structure
1641  */
1642 static int ctdb_killtcp_destructor(struct ctdb_kill_tcp *killtcp)
1643 {
1644         killtcp->vnn->killtcp = NULL;
1645         return 0;
1646 }
1647
1648
1649 /* nothing fancy here, just unconditionally replace any existing
1650    connection structure with the new one.
1651
1652    dont even free the old one if it did exist, that one is talloc_stolen
1653    by the same node in the tree anyway and will be deleted when the new data 
1654    is deleted
1655 */
1656 static void *add_killtcp_callback(void *parm, void *data)
1657 {
1658         return parm;
1659 }
1660
1661 /*
1662   add a tcp socket to the list of connections we want to RST
1663  */
1664 static int ctdb_killtcp_add_connection(struct ctdb_context *ctdb, 
1665                                        ctdb_sock_addr *s,
1666                                        ctdb_sock_addr *d)
1667 {
1668         ctdb_sock_addr src, dst;
1669         struct ctdb_kill_tcp *killtcp;
1670         struct ctdb_killtcp_con *con;
1671         struct ctdb_vnn *vnn;
1672
1673         ctdb_canonicalize_ip(s, &src);
1674         ctdb_canonicalize_ip(d, &dst);
1675
1676         vnn = find_public_ip_vnn(ctdb, &dst);
1677         if (vnn == NULL) {
1678                 vnn = find_public_ip_vnn(ctdb, &src);
1679         }
1680         if (vnn == NULL) {
1681                 /* if it is not a public ip   it could be our 'single ip' */
1682                 if (ctdb->single_ip_vnn) {
1683                         if (ctdb_same_ip(&ctdb->single_ip_vnn->public_address, &dst)) {
1684                                 vnn = ctdb->single_ip_vnn;
1685                         }
1686                 }
1687         }
1688         if (vnn == NULL) {
1689                 DEBUG(DEBUG_ERR,(__location__ " Could not killtcp, not a public address\n")); 
1690                 return -1;
1691         }
1692
1693         killtcp = vnn->killtcp;
1694         
1695         /* If this is the first connection to kill we must allocate
1696            a new structure
1697          */
1698         if (killtcp == NULL) {
1699                 killtcp = talloc_zero(ctdb, struct ctdb_kill_tcp);
1700                 CTDB_NO_MEMORY(ctdb, killtcp);
1701
1702                 killtcp->vnn         = vnn;
1703                 killtcp->ctdb        = ctdb;
1704                 killtcp->capture_fd  = -1;
1705                 killtcp->connections = trbt_create(killtcp, 0);
1706
1707                 vnn->killtcp         = killtcp;
1708                 talloc_set_destructor(killtcp, ctdb_killtcp_destructor);
1709         }
1710
1711
1712
1713         /* create a structure that describes this connection we want to
1714            RST and store it in killtcp->connections
1715         */
1716         con = talloc(killtcp, struct ctdb_killtcp_con);
1717         CTDB_NO_MEMORY(ctdb, con);
1718         con->src_addr = src;
1719         con->dst_addr = dst;
1720         con->count    = 0;
1721         con->killtcp  = killtcp;
1722
1723
1724         trbt_insertarray32_callback(killtcp->connections,
1725                         KILLTCP_KEYLEN, killtcp_key(&con->dst_addr, &con->src_addr),
1726                         add_killtcp_callback, con);
1727
1728         /* 
1729            If we dont have a socket to listen on yet we must create it
1730          */
1731         if (killtcp->capture_fd == -1) {
1732                 killtcp->capture_fd = ctdb_sys_open_capture_socket(vnn->iface, &killtcp->private_data);
1733                 if (killtcp->capture_fd == -1) {
1734                         DEBUG(DEBUG_CRIT,(__location__ " Failed to open capturing socket for killtcp\n"));
1735                         goto failed;
1736                 }
1737         }
1738
1739
1740         if (killtcp->fde == NULL) {
1741                 killtcp->fde = event_add_fd(ctdb->ev, killtcp, killtcp->capture_fd, 
1742                                             EVENT_FD_READ | EVENT_FD_AUTOCLOSE, 
1743                                             capture_tcp_handler, killtcp);
1744
1745                 /* We also need to set up some events to tickle all these connections
1746                    until they are all reset
1747                 */
1748                 event_add_timed(ctdb->ev, killtcp, timeval_current_ofs(1, 0), 
1749                                 ctdb_tickle_sentenced_connections, killtcp);
1750         }
1751
1752         /* tickle him once now */
1753         ctdb_sys_send_tcp(
1754                 &con->dst_addr,
1755                 &con->src_addr,
1756                 0, 0, 0);
1757
1758         return 0;
1759
1760 failed:
1761         talloc_free(vnn->killtcp);
1762         vnn->killtcp = NULL;
1763         return -1;
1764 }
1765
1766 /*
1767   kill a TCP connection.
1768  */
1769 int32_t ctdb_control_kill_tcp(struct ctdb_context *ctdb, TDB_DATA indata)
1770 {
1771         struct ctdb_control_killtcp *killtcp = (struct ctdb_control_killtcp *)indata.dptr;
1772
1773         return ctdb_killtcp_add_connection(ctdb, &killtcp->src_addr, &killtcp->dst_addr);
1774 }
1775
1776 /*
1777   called by a daemon to inform us of the entire list of TCP tickles for
1778   a particular public address.
1779   this control should only be sent by the node that is currently serving
1780   that public address.
1781  */
1782 int32_t ctdb_control_set_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata)
1783 {
1784         struct ctdb_control_tcp_tickle_list *list = (struct ctdb_control_tcp_tickle_list *)indata.dptr;
1785         struct ctdb_tcp_array *tcparray;
1786         struct ctdb_vnn *vnn;
1787
1788         /* We must at least have tickles.num or else we cant verify the size
1789            of the received data blob
1790          */
1791         if (indata.dsize < offsetof(struct ctdb_control_tcp_tickle_list, 
1792                                         tickles.connections)) {
1793                 DEBUG(DEBUG_ERR,("Bad indata in ctdb_control_set_tcp_tickle_list. Not enough data for the tickle.num field\n"));
1794                 return -1;
1795         }
1796
1797         /* verify that the size of data matches what we expect */
1798         if (indata.dsize < offsetof(struct ctdb_control_tcp_tickle_list, 
1799                                 tickles.connections)
1800                          + sizeof(struct ctdb_tcp_connection)
1801                                  * list->tickles.num) {
1802                 DEBUG(DEBUG_ERR,("Bad indata in ctdb_control_set_tcp_tickle_list\n"));
1803                 return -1;
1804         }       
1805
1806         vnn = find_public_ip_vnn(ctdb, &list->addr);
1807         if (vnn == NULL) {
1808                 DEBUG(DEBUG_INFO,(__location__ " Could not set tcp tickle list, '%s' is not a public address\n", 
1809                         ctdb_addr_to_str(&list->addr)));
1810
1811                 return 1;
1812         }
1813
1814         /* remove any old ticklelist we might have */
1815         talloc_free(vnn->tcp_array);
1816         vnn->tcp_array = NULL;
1817
1818         tcparray = talloc(ctdb->nodes, struct ctdb_tcp_array);
1819         CTDB_NO_MEMORY(ctdb, tcparray);
1820
1821         tcparray->num = list->tickles.num;
1822
1823         tcparray->connections = talloc_array(tcparray, struct ctdb_tcp_connection, tcparray->num);
1824         CTDB_NO_MEMORY(ctdb, tcparray->connections);
1825
1826         memcpy(tcparray->connections, &list->tickles.connections[0], 
1827                sizeof(struct ctdb_tcp_connection)*tcparray->num);
1828
1829         /* We now have a new fresh tickle list array for this vnn */
1830         vnn->tcp_array = talloc_steal(vnn, tcparray);
1831         
1832         return 0;
1833 }
1834
1835 /*
1836   called to return the full list of tickles for the puclic address associated 
1837   with the provided vnn
1838  */
1839 int32_t ctdb_control_get_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata)
1840 {
1841         ctdb_sock_addr *addr = (ctdb_sock_addr *)indata.dptr;
1842         struct ctdb_control_tcp_tickle_list *list;
1843         struct ctdb_tcp_array *tcparray;
1844         int num;
1845         struct ctdb_vnn *vnn;
1846
1847         vnn = find_public_ip_vnn(ctdb, addr);
1848         if (vnn == NULL) {
1849                 DEBUG(DEBUG_ERR,(__location__ " Could not get tcp tickle list, '%s' is not a public address\n", 
1850                         ctdb_addr_to_str(addr)));
1851
1852                 return 1;
1853         }
1854
1855         tcparray = vnn->tcp_array;
1856         if (tcparray) {
1857                 num = tcparray->num;
1858         } else {
1859                 num = 0;
1860         }
1861
1862         outdata->dsize = offsetof(struct ctdb_control_tcp_tickle_list, 
1863                                 tickles.connections)
1864                         + sizeof(struct ctdb_tcp_connection) * num;
1865
1866         outdata->dptr  = talloc_size(outdata, outdata->dsize);
1867         CTDB_NO_MEMORY(ctdb, outdata->dptr);
1868         list = (struct ctdb_control_tcp_tickle_list *)outdata->dptr;
1869
1870         list->addr = *addr;
1871         list->tickles.num = num;
1872         if (num) {
1873                 memcpy(&list->tickles.connections[0], tcparray->connections, 
1874                         sizeof(struct ctdb_tcp_connection) * num);
1875         }
1876
1877         return 0;
1878 }
1879
1880
1881 /*
1882   set the list of all tcp tickles for a public address
1883  */
1884 static int ctdb_ctrl_set_tcp_tickles(struct ctdb_context *ctdb, 
1885                               struct timeval timeout, uint32_t destnode, 
1886                               ctdb_sock_addr *addr,
1887                               struct ctdb_tcp_array *tcparray)
1888 {
1889         int ret, num;
1890         TDB_DATA data;
1891         struct ctdb_control_tcp_tickle_list *list;
1892
1893         if (tcparray) {
1894                 num = tcparray->num;
1895         } else {
1896                 num = 0;
1897         }
1898
1899         data.dsize = offsetof(struct ctdb_control_tcp_tickle_list, 
1900                                 tickles.connections) +
1901                         sizeof(struct ctdb_tcp_connection) * num;
1902         data.dptr = talloc_size(ctdb, data.dsize);
1903         CTDB_NO_MEMORY(ctdb, data.dptr);
1904
1905         list = (struct ctdb_control_tcp_tickle_list *)data.dptr;
1906         list->addr = *addr;
1907         list->tickles.num = num;
1908         if (tcparray) {
1909                 memcpy(&list->tickles.connections[0], tcparray->connections, sizeof(struct ctdb_tcp_connection) * num);
1910         }
1911
1912         ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0, 
1913                                        CTDB_CONTROL_SET_TCP_TICKLE_LIST,
1914                                        0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
1915         if (ret != 0) {
1916                 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set tcp tickles failed\n"));
1917                 return -1;
1918         }
1919
1920         talloc_free(data.dptr);
1921
1922         return ret;
1923 }
1924
1925
1926 /*
1927   perform tickle updates if required
1928  */
1929 static void ctdb_update_tcp_tickles(struct event_context *ev, 
1930                                 struct timed_event *te, 
1931                                 struct timeval t, void *private_data)
1932 {
1933         struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
1934         int ret;
1935         struct ctdb_vnn *vnn;
1936
1937         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1938                 /* we only send out updates for public addresses that 
1939                    we have taken over
1940                  */
1941                 if (ctdb->pnn != vnn->pnn) {
1942                         continue;
1943                 }
1944                 /* We only send out the updates if we need to */
1945                 if (!vnn->tcp_update_needed) {
1946                         continue;
1947                 }
1948                 ret = ctdb_ctrl_set_tcp_tickles(ctdb, 
1949                                 TAKEOVER_TIMEOUT(),
1950                                 CTDB_BROADCAST_CONNECTED,
1951                                 &vnn->public_address,
1952                                 vnn->tcp_array);
1953                 if (ret != 0) {
1954                         DEBUG(DEBUG_ERR,("Failed to send the tickle update for public address %s\n",
1955                                 ctdb_addr_to_str(&vnn->public_address)));
1956                 }
1957         }
1958
1959         event_add_timed(ctdb->ev, ctdb->tickle_update_context,
1960                              timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0), 
1961                              ctdb_update_tcp_tickles, ctdb);
1962 }               
1963         
1964
1965 /*
1966   start periodic update of tcp tickles
1967  */
1968 void ctdb_start_tcp_tickle_update(struct ctdb_context *ctdb)
1969 {
1970         ctdb->tickle_update_context = talloc_new(ctdb);
1971
1972         event_add_timed(ctdb->ev, ctdb->tickle_update_context,
1973                              timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0), 
1974                              ctdb_update_tcp_tickles, ctdb);
1975 }
1976
1977
1978
1979
1980 struct control_gratious_arp {
1981         struct ctdb_context *ctdb;
1982         ctdb_sock_addr addr;
1983         const char *iface;
1984         int count;
1985 };
1986
1987 /*
1988   send a control_gratuitous arp
1989  */
1990 static void send_gratious_arp(struct event_context *ev, struct timed_event *te, 
1991                                   struct timeval t, void *private_data)
1992 {
1993         int ret;
1994         struct control_gratious_arp *arp = talloc_get_type(private_data, 
1995                                                         struct control_gratious_arp);
1996
1997         ret = ctdb_sys_send_arp(&arp->addr, arp->iface);
1998         if (ret != 0) {
1999                 DEBUG(DEBUG_ERR,(__location__ " sending of gratious arp failed (%s)\n", strerror(errno)));
2000         }
2001
2002
2003         arp->count++;
2004         if (arp->count == CTDB_ARP_REPEAT) {
2005                 talloc_free(arp);
2006                 return;
2007         }
2008
2009         event_add_timed(arp->ctdb->ev, arp, 
2010                         timeval_current_ofs(CTDB_ARP_INTERVAL, 0), 
2011                         send_gratious_arp, arp);
2012 }
2013
2014
2015 /*
2016   send a gratious arp 
2017  */
2018 int32_t ctdb_control_send_gratious_arp(struct ctdb_context *ctdb, TDB_DATA indata)
2019 {
2020         struct ctdb_control_gratious_arp *gratious_arp = (struct ctdb_control_gratious_arp *)indata.dptr;
2021         struct control_gratious_arp *arp;
2022
2023         /* verify the size of indata */
2024         if (indata.dsize < offsetof(struct ctdb_control_gratious_arp, iface)) {
2025                 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_gratious_arp structure. Got %u require %u bytes\n", 
2026                                  (unsigned)indata.dsize, 
2027                                  (unsigned)offsetof(struct ctdb_control_gratious_arp, iface)));
2028                 return -1;
2029         }
2030         if (indata.dsize != 
2031                 ( offsetof(struct ctdb_control_gratious_arp, iface)
2032                 + gratious_arp->len ) ){
2033
2034                 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2035                         "but should be %u bytes\n", 
2036                          (unsigned)indata.dsize, 
2037                          (unsigned)(offsetof(struct ctdb_control_gratious_arp, iface)+gratious_arp->len)));
2038                 return -1;
2039         }
2040
2041
2042         arp = talloc(ctdb, struct control_gratious_arp);
2043         CTDB_NO_MEMORY(ctdb, arp);
2044
2045         arp->ctdb  = ctdb;
2046         arp->addr   = gratious_arp->addr;
2047         arp->iface = talloc_strdup(arp, gratious_arp->iface);
2048         CTDB_NO_MEMORY(ctdb, arp->iface);
2049         arp->count = 0;
2050         
2051         event_add_timed(arp->ctdb->ev, arp, 
2052                         timeval_zero(), send_gratious_arp, arp);
2053
2054         return 0;
2055 }
2056
2057 int32_t ctdb_control_add_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2058 {
2059         struct ctdb_control_ip_iface *pub = (struct ctdb_control_ip_iface *)indata.dptr;
2060         int ret;
2061
2062         /* verify the size of indata */
2063         if (indata.dsize < offsetof(struct ctdb_control_ip_iface, iface)) {
2064                 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_ip_iface structure\n"));
2065                 return -1;
2066         }
2067         if (indata.dsize != 
2068                 ( offsetof(struct ctdb_control_ip_iface, iface)
2069                 + pub->len ) ){
2070
2071                 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2072                         "but should be %u bytes\n", 
2073                          (unsigned)indata.dsize, 
2074                          (unsigned)(offsetof(struct ctdb_control_ip_iface, iface)+pub->len)));
2075                 return -1;
2076         }
2077
2078         ret = ctdb_add_public_address(ctdb, &pub->addr, pub->mask, &pub->iface[0]);
2079
2080         if (ret != 0) {
2081                 DEBUG(DEBUG_ERR,(__location__ " Failed to add public address\n"));
2082                 return -1;
2083         }
2084
2085         return 0;
2086 }
2087
2088 /*
2089   called when releaseip event finishes for del_public_address
2090  */
2091 static void delete_ip_callback(struct ctdb_context *ctdb, int status, 
2092                                 void *private_data)
2093 {
2094         talloc_free(private_data);
2095 }
2096
2097 int32_t ctdb_control_del_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2098 {
2099         struct ctdb_control_ip_iface *pub = (struct ctdb_control_ip_iface *)indata.dptr;
2100         struct ctdb_vnn *vnn;
2101         int ret;
2102
2103         /* verify the size of indata */
2104         if (indata.dsize < offsetof(struct ctdb_control_ip_iface, iface)) {
2105                 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_ip_iface structure\n"));
2106                 return -1;
2107         }
2108         if (indata.dsize != 
2109                 ( offsetof(struct ctdb_control_ip_iface, iface)
2110                 + pub->len ) ){
2111
2112                 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2113                         "but should be %u bytes\n", 
2114                          (unsigned)indata.dsize, 
2115                          (unsigned)(offsetof(struct ctdb_control_ip_iface, iface)+pub->len)));
2116                 return -1;
2117         }
2118
2119         /* walk over all public addresses until we find a match */
2120         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2121                 if (ctdb_same_ip(&vnn->public_address, &pub->addr)) {
2122                         TALLOC_CTX *mem_ctx = talloc_new(ctdb);
2123
2124                         DLIST_REMOVE(ctdb->vnn, vnn);
2125
2126                         ret = ctdb_event_script_callback(ctdb, 
2127                                          mem_ctx, delete_ip_callback, mem_ctx,
2128                                          false,
2129                                          CTDB_EVENT_RELEASE_IP,
2130                                          "%s %s %u",
2131                                          vnn->iface, 
2132                                          talloc_strdup(mem_ctx, ctdb_addr_to_str(&vnn->public_address)),
2133                                          vnn->public_netmask_bits);
2134                         talloc_free(vnn);
2135                         if (ret != 0) {
2136                                 return -1;
2137                         }
2138                         return 0;
2139                 }
2140         }
2141
2142         return -1;
2143 }
2144