Merge commit 'origin/master'
[sahlberg/ctdb.git] / server / ctdb_takeover.c
1 /* 
2    ctdb ip takeover code
3
4    Copyright (C) Ronnie Sahlberg  2007
5    Copyright (C) Andrew Tridgell  2007
6
7    This program is free software; you can redistribute it and/or modify
8    it under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 3 of the License, or
10    (at your option) any later version.
11    
12    This program is distributed in the hope that it will be useful,
13    but WITHOUT ANY WARRANTY; without even the implied warranty of
14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15    GNU General Public License for more details.
16    
17    You should have received a copy of the GNU General Public License
18    along with this program; if not, see <http://www.gnu.org/licenses/>.
19 */
20 #include "includes.h"
21 #include "lib/events/events.h"
22 #include "lib/tdb/include/tdb.h"
23 #include "lib/util/dlinklist.h"
24 #include "system/network.h"
25 #include "system/filesys.h"
26 #include "system/wait.h"
27 #include "../include/ctdb_private.h"
28 #include "../common/rb_tree.h"
29
30
31 #define TAKEOVER_TIMEOUT() timeval_current_ofs(ctdb->tunable.takeover_timeout,0)
32
33 #define CTDB_ARP_INTERVAL 1
34 #define CTDB_ARP_REPEAT   3
35
36 struct ctdb_takeover_arp {
37         struct ctdb_context *ctdb;
38         uint32_t count;
39         ctdb_sock_addr addr;
40         struct ctdb_tcp_array *tcparray;
41         struct ctdb_vnn *vnn;
42 };
43
44
45 /*
46   lists of tcp endpoints
47  */
48 struct ctdb_tcp_list {
49         struct ctdb_tcp_list *prev, *next;
50         struct ctdb_tcp_connection connection;
51 };
52
53 /*
54   list of clients to kill on IP release
55  */
56 struct ctdb_client_ip {
57         struct ctdb_client_ip *prev, *next;
58         struct ctdb_context *ctdb;
59         ctdb_sock_addr addr;
60         uint32_t client_id;
61 };
62
63
64 /*
65   send a gratuitous arp
66  */
67 static void ctdb_control_send_arp(struct event_context *ev, struct timed_event *te, 
68                                   struct timeval t, void *private_data)
69 {
70         struct ctdb_takeover_arp *arp = talloc_get_type(private_data, 
71                                                         struct ctdb_takeover_arp);
72         int i, ret;
73         struct ctdb_tcp_array *tcparray;
74
75         ret = ctdb_sys_send_arp(&arp->addr, arp->vnn->iface);
76         if (ret != 0) {
77                 DEBUG(DEBUG_CRIT,(__location__ " sending of arp failed (%s)\n", strerror(errno)));
78         }
79
80         tcparray = arp->tcparray;
81         if (tcparray) {
82                 for (i=0;i<tcparray->num;i++) {
83                         struct ctdb_tcp_connection *tcon;
84
85                         tcon = &tcparray->connections[i];
86                         DEBUG(DEBUG_INFO,("sending tcp tickle ack for %u->%s:%u\n",
87                                 (unsigned)ntohs(tcon->dst_addr.ip.sin_port), 
88                                 ctdb_addr_to_str(&tcon->src_addr),
89                                 (unsigned)ntohs(tcon->src_addr.ip.sin_port)));
90                         ret = ctdb_sys_send_tcp(
91                                 &tcon->src_addr, 
92                                 &tcon->dst_addr,
93                                 0, 0, 0);
94                         if (ret != 0) {
95                                 DEBUG(DEBUG_CRIT,(__location__ " Failed to send tcp tickle ack for %s\n",
96                                         ctdb_addr_to_str(&tcon->src_addr)));
97                         }
98                 }
99         }
100
101         arp->count++;
102
103         if (arp->count == CTDB_ARP_REPEAT) {
104                 talloc_free(arp);
105                 return;
106         }
107
108         event_add_timed(arp->ctdb->ev, arp->vnn->takeover_ctx, 
109                         timeval_current_ofs(CTDB_ARP_INTERVAL, 100000), 
110                         ctdb_control_send_arp, arp);
111 }
112
113 struct takeover_callback_state {
114         struct ctdb_req_control *c;
115         ctdb_sock_addr *addr;
116         struct ctdb_vnn *vnn;
117 };
118
119 /*
120   called when takeip event finishes
121  */
122 static void takeover_ip_callback(struct ctdb_context *ctdb, int status, 
123                                  void *private_data)
124 {
125         struct takeover_callback_state *state = 
126                 talloc_get_type(private_data, struct takeover_callback_state);
127         struct ctdb_takeover_arp *arp;
128         struct ctdb_tcp_array *tcparray;
129
130         if (status != 0) {
131                 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
132                         ctdb_addr_to_str(state->addr),
133                         state->vnn->iface));
134                 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
135                 talloc_free(state);
136                 return;
137         }
138
139         if (!state->vnn->takeover_ctx) {
140                 state->vnn->takeover_ctx = talloc_new(state->vnn);
141                 if (!state->vnn->takeover_ctx) {
142                         goto failed;
143                 }
144         }
145
146         arp = talloc_zero(state->vnn->takeover_ctx, struct ctdb_takeover_arp);
147         if (!arp) goto failed;
148         
149         arp->ctdb = ctdb;
150         arp->addr = *state->addr;
151         arp->vnn  = state->vnn;
152
153         tcparray = state->vnn->tcp_array;
154         if (tcparray) {
155                 /* add all of the known tcp connections for this IP to the
156                    list of tcp connections to send tickle acks for */
157                 arp->tcparray = talloc_steal(arp, tcparray);
158
159                 state->vnn->tcp_array = NULL;
160                 state->vnn->tcp_update_needed = true;
161         }
162
163         event_add_timed(arp->ctdb->ev, state->vnn->takeover_ctx, 
164                         timeval_zero(), ctdb_control_send_arp, arp);
165
166         /* the control succeeded */
167         ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
168         talloc_free(state);
169         return;
170
171 failed:
172         ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
173         talloc_free(state);
174         return;
175 }
176
177 /*
178   Find the vnn of the node that has a public ip address
179   returns -1 if the address is not known as a public address
180  */
181 static struct ctdb_vnn *find_public_ip_vnn(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
182 {
183         struct ctdb_vnn *vnn;
184
185         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
186                 if (ctdb_same_ip(&vnn->public_address, addr)) {
187                         return vnn;
188                 }
189         }
190
191         return NULL;
192 }
193
194
195 /*
196   take over an ip address
197  */
198 int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb, 
199                                  struct ctdb_req_control *c,
200                                  TDB_DATA indata, 
201                                  bool *async_reply)
202 {
203         int ret;
204         struct takeover_callback_state *state;
205         struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
206         struct ctdb_vnn *vnn;
207
208         /* update out vnn list */
209         vnn = find_public_ip_vnn(ctdb, &pip->addr);
210         if (vnn == NULL) {
211                 DEBUG(DEBUG_INFO,("takeoverip called for an ip '%s' that is not a public address\n", 
212                         ctdb_addr_to_str(&pip->addr)));
213                 return 0;
214         }
215         vnn->pnn = pip->pnn;
216
217         /* if our kernel already has this IP, do nothing */
218         if (ctdb_sys_have_ip(&pip->addr)) {
219                 return 0;
220         }
221
222         state = talloc(vnn, struct takeover_callback_state);
223         CTDB_NO_MEMORY(ctdb, state);
224
225         state->c = talloc_steal(ctdb, c);
226         state->addr = talloc(ctdb, ctdb_sock_addr);
227         CTDB_NO_MEMORY(ctdb, state->addr);
228
229         *state->addr = pip->addr;
230         state->vnn   = vnn;
231
232         DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u on interface %s\n", 
233                 ctdb_addr_to_str(&pip->addr),
234                 vnn->public_netmask_bits, 
235                 vnn->iface));
236
237         ret = ctdb_event_script_callback(ctdb, 
238                                          timeval_set(ctdb->tunable.script_timeout, 0),
239                                          state, takeover_ip_callback, state,
240                                          "takeip %s %s %u",
241                                          vnn->iface, 
242                                          talloc_strdup(state, ctdb_addr_to_str(&pip->addr)),
243                                          vnn->public_netmask_bits);
244
245         if (ret != 0) {
246                 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
247                         ctdb_addr_to_str(&pip->addr),
248                         vnn->iface));
249                 talloc_free(state);
250                 return -1;
251         }
252
253         /* tell ctdb_control.c that we will be replying asynchronously */
254         *async_reply = true;
255
256         return 0;
257 }
258
259 /*
260   takeover an ip address old v4 style
261  */
262 int32_t ctdb_control_takeover_ipv4(struct ctdb_context *ctdb, 
263                                 struct ctdb_req_control *c,
264                                 TDB_DATA indata, 
265                                 bool *async_reply)
266 {
267         TDB_DATA data;
268         
269         data.dsize = sizeof(struct ctdb_public_ip);
270         data.dptr  = (uint8_t *)talloc_zero(c, struct ctdb_public_ip);
271         CTDB_NO_MEMORY(ctdb, data.dptr);
272         
273         memcpy(data.dptr, indata.dptr, indata.dsize);
274         return ctdb_control_takeover_ip(ctdb, c, data, async_reply);
275 }
276
277 /*
278   kill any clients that are registered with a IP that is being released
279  */
280 static void release_kill_clients(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
281 {
282         struct ctdb_client_ip *ip;
283
284         DEBUG(DEBUG_INFO,("release_kill_clients for ip %s\n",
285                 ctdb_addr_to_str(addr)));
286
287         for (ip=ctdb->client_ip_list; ip; ip=ip->next) {
288                 ctdb_sock_addr tmp_addr;
289
290                 tmp_addr = ip->addr;
291                 DEBUG(DEBUG_INFO,("checking for client %u with IP %s\n", 
292                         ip->client_id,
293                         ctdb_addr_to_str(&ip->addr)));
294
295                 if (ctdb_same_ip(&tmp_addr, addr)) {
296                         struct ctdb_client *client = ctdb_reqid_find(ctdb, 
297                                                                      ip->client_id, 
298                                                                      struct ctdb_client);
299                         DEBUG(DEBUG_INFO,("matched client %u with IP %s and pid %u\n", 
300                                 ip->client_id,
301                                 ctdb_addr_to_str(&ip->addr),
302                                 client->pid));
303
304                         if (client->pid != 0) {
305                                 DEBUG(DEBUG_INFO,(__location__ " Killing client pid %u for IP %s on client_id %u\n",
306                                         (unsigned)client->pid,
307                                         ctdb_addr_to_str(addr),
308                                         ip->client_id));
309                                 kill(client->pid, SIGKILL);
310                         }
311                 }
312         }
313 }
314
315 /*
316   called when releaseip event finishes
317  */
318 static void release_ip_callback(struct ctdb_context *ctdb, int status, 
319                                 void *private_data)
320 {
321         struct takeover_callback_state *state = 
322                 talloc_get_type(private_data, struct takeover_callback_state);
323         TDB_DATA data;
324
325         /* send a message to all clients of this node telling them
326            that the cluster has been reconfigured and they should
327            release any sockets on this IP */
328         data.dptr = (uint8_t *)talloc_strdup(state, ctdb_addr_to_str(state->addr));
329         CTDB_NO_MEMORY_VOID(ctdb, data.dptr);
330         data.dsize = strlen((char *)data.dptr)+1;
331
332         DEBUG(DEBUG_INFO,(__location__ " sending RELEASE_IP for '%s'\n", data.dptr));
333
334         ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_RELEASE_IP, data);
335
336         /* kill clients that have registered with this IP */
337         release_kill_clients(ctdb, state->addr);
338         
339         /* the control succeeded */
340         ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
341         talloc_free(state);
342 }
343
344 /*
345   release an ip address
346  */
347 int32_t ctdb_control_release_ip(struct ctdb_context *ctdb, 
348                                 struct ctdb_req_control *c,
349                                 TDB_DATA indata, 
350                                 bool *async_reply)
351 {
352         int ret;
353         struct takeover_callback_state *state;
354         struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
355         struct ctdb_vnn *vnn;
356
357         /* update our vnn list */
358         vnn = find_public_ip_vnn(ctdb, &pip->addr);
359         if (vnn == NULL) {
360                 DEBUG(DEBUG_INFO,("releaseip called for an ip '%s' that is not a public address\n",
361                         ctdb_addr_to_str(&pip->addr)));
362                 return 0;
363         }
364         vnn->pnn = pip->pnn;
365
366         /* stop any previous arps */
367         talloc_free(vnn->takeover_ctx);
368         vnn->takeover_ctx = NULL;
369
370         if (!ctdb_sys_have_ip(&pip->addr)) {
371                 DEBUG(DEBUG_NOTICE,("Redundant release of IP %s/%u on interface %s (ip not held)\n", 
372                         ctdb_addr_to_str(&pip->addr),
373                         vnn->public_netmask_bits, 
374                         vnn->iface));
375                 return 0;
376         }
377
378         DEBUG(DEBUG_NOTICE,("Release of IP %s/%u on interface %s  node:%u\n", 
379                 ctdb_addr_to_str(&pip->addr),
380                 vnn->public_netmask_bits, 
381                 vnn->iface,
382                 pip->pnn));
383
384         state = talloc(ctdb, struct takeover_callback_state);
385         CTDB_NO_MEMORY(ctdb, state);
386
387         state->c = talloc_steal(state, c);
388         state->addr = talloc(state, ctdb_sock_addr);       
389         CTDB_NO_MEMORY(ctdb, state->addr);
390         *state->addr = pip->addr;
391         state->vnn   = vnn;
392
393         ret = ctdb_event_script_callback(ctdb, 
394                                          timeval_set(ctdb->tunable.script_timeout, 0),
395                                          state, release_ip_callback, state,
396                                          "releaseip %s %s %u",
397                                          vnn->iface, 
398                                          talloc_strdup(state, ctdb_addr_to_str(&pip->addr)),
399                                          vnn->public_netmask_bits);
400         if (ret != 0) {
401                 DEBUG(DEBUG_ERR,(__location__ " Failed to release IP %s on interface %s\n",
402                         ctdb_addr_to_str(&pip->addr),
403                         vnn->iface));
404                 talloc_free(state);
405                 return -1;
406         }
407
408         /* tell the control that we will be reply asynchronously */
409         *async_reply = true;
410         return 0;
411 }
412
413 /*
414   release an ip address old v4 style
415  */
416 int32_t ctdb_control_release_ipv4(struct ctdb_context *ctdb, 
417                                 struct ctdb_req_control *c,
418                                 TDB_DATA indata, 
419                                 bool *async_reply)
420 {
421         TDB_DATA data;
422         
423         data.dsize = sizeof(struct ctdb_public_ip);
424         data.dptr  = (uint8_t *)talloc_zero(c, struct ctdb_public_ip);
425         CTDB_NO_MEMORY(ctdb, data.dptr);
426         
427         memcpy(data.dptr, indata.dptr, indata.dsize);
428         return ctdb_control_release_ip(ctdb, c, data, async_reply);
429 }
430
431
432 static int ctdb_add_public_address(struct ctdb_context *ctdb, ctdb_sock_addr *addr, unsigned mask, const char *iface)
433 {
434         struct ctdb_vnn      *vnn;
435
436         /* Verify that we dont have an entry for this ip yet */
437         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
438                 if (ctdb_same_sockaddr(addr, &vnn->public_address)) {
439                         DEBUG(DEBUG_CRIT,("Same ip '%s' specified multiple times in the public address list \n", 
440                                 ctdb_addr_to_str(addr)));
441                         return -1;
442                 }               
443         }
444
445         /* create a new vnn structure for this ip address */
446         vnn = talloc_zero(ctdb, struct ctdb_vnn);
447         CTDB_NO_MEMORY_FATAL(ctdb, vnn);
448         vnn->iface = talloc_strdup(vnn, iface);
449         CTDB_NO_MEMORY(ctdb, vnn->iface);
450         vnn->public_address      = *addr;
451         vnn->public_netmask_bits = mask;
452         vnn->pnn                 = -1;
453         
454         DLIST_ADD(ctdb->vnn, vnn);
455
456         return 0;
457 }
458
459
460 /*
461   setup the event script directory
462 */
463 int ctdb_set_event_script_dir(struct ctdb_context *ctdb, const char *script_dir)
464 {
465         ctdb->event_script_dir = talloc_strdup(ctdb, script_dir);
466         CTDB_NO_MEMORY(ctdb, ctdb->event_script_dir);
467         return 0;
468 }
469
470 /*
471   setup the public address lists from a file
472 */
473 int ctdb_set_public_addresses(struct ctdb_context *ctdb, const char *alist)
474 {
475         char **lines;
476         int nlines;
477         int i;
478
479         lines = file_lines_load(alist, &nlines, ctdb);
480         if (lines == NULL) {
481                 ctdb_set_error(ctdb, "Failed to load public address list '%s'\n", alist);
482                 return -1;
483         }
484         while (nlines > 0 && strcmp(lines[nlines-1], "") == 0) {
485                 nlines--;
486         }
487
488         for (i=0;i<nlines;i++) {
489                 unsigned mask;
490                 ctdb_sock_addr addr;
491                 const char *addrstr;
492                 const char *iface;
493                 char *tok, *line;
494
495                 line = lines[i];
496                 while ((*line == ' ') || (*line == '\t')) {
497                         line++;
498                 }
499                 if (*line == '#') {
500                         continue;
501                 }
502                 if (strcmp(line, "") == 0) {
503                         continue;
504                 }
505                 tok = strtok(line, " \t");
506                 addrstr = tok;
507                 tok = strtok(NULL, " \t");
508                 if (tok == NULL) {
509                         if (NULL == ctdb->default_public_interface) {
510                                 DEBUG(DEBUG_CRIT,("No default public interface and no interface specified at line %u of public address list\n",
511                                          i+1));
512                                 talloc_free(lines);
513                                 return -1;
514                         }
515                         iface = ctdb->default_public_interface;
516                 } else {
517                         iface = tok;
518                 }
519
520                 if (!addrstr || !parse_ip_mask(addrstr, iface, &addr, &mask)) {
521                         DEBUG(DEBUG_CRIT,("Badly formed line %u in public address list\n", i+1));
522                         talloc_free(lines);
523                         return -1;
524                 }
525                 if (ctdb_add_public_address(ctdb, &addr, mask, iface)) {
526                         DEBUG(DEBUG_CRIT,("Failed to add line %u to the public address list\n", i+1));
527                         talloc_free(lines);
528                         return -1;
529                 }
530         }
531
532         talloc_free(lines);
533         return 0;
534 }
535
536
537
538
539 struct ctdb_public_ip_list {
540         struct ctdb_public_ip_list *next;
541         uint32_t pnn;
542         ctdb_sock_addr addr;
543 };
544
545
546 /* Given a physical node, return the number of
547    public addresses that is currently assigned to this node.
548 */
549 static int node_ip_coverage(struct ctdb_context *ctdb, 
550         int32_t pnn,
551         struct ctdb_public_ip_list *ips)
552 {
553         int num=0;
554
555         for (;ips;ips=ips->next) {
556                 if (ips->pnn == pnn) {
557                         num++;
558                 }
559         }
560         return num;
561 }
562
563
564 /* Check if this is a public ip known to the node, i.e. can that
565    node takeover this ip ?
566 */
567 static int can_node_serve_ip(struct ctdb_context *ctdb, int32_t pnn, 
568                 struct ctdb_public_ip_list *ip)
569 {
570         struct ctdb_all_public_ips *public_ips;
571         int i;
572
573         public_ips = ctdb->nodes[pnn]->public_ips;
574
575         if (public_ips == NULL) {
576                 return -1;
577         }
578
579         for (i=0;i<public_ips->num;i++) {
580                 if (ctdb_same_ip(&ip->addr, &public_ips->ips[i].addr)) {
581                         /* yes, this node can serve this public ip */
582                         return 0;
583                 }
584         }
585
586         return -1;
587 }
588
589
590 /* search the node lists list for a node to takeover this ip.
591    pick the node that currently are serving the least number of ips
592    so that the ips get spread out evenly.
593 */
594 static int find_takeover_node(struct ctdb_context *ctdb, 
595                 struct ctdb_node_map *nodemap, uint32_t mask, 
596                 struct ctdb_public_ip_list *ip,
597                 struct ctdb_public_ip_list *all_ips)
598 {
599         int pnn, min=0, num;
600         int i;
601
602         pnn    = -1;
603         for (i=0;i<nodemap->num;i++) {
604                 if (nodemap->nodes[i].flags & mask) {
605                         /* This node is not healty and can not be used to serve
606                            a public address 
607                         */
608                         continue;
609                 }
610
611                 /* verify that this node can serve this ip */
612                 if (can_node_serve_ip(ctdb, i, ip)) {
613                         /* no it couldnt   so skip to the next node */
614                         continue;
615                 }
616
617                 num = node_ip_coverage(ctdb, i, all_ips);
618                 /* was this the first node we checked ? */
619                 if (pnn == -1) {
620                         pnn = i;
621                         min  = num;
622                 } else {
623                         if (num < min) {
624                                 pnn = i;
625                                 min  = num;
626                         }
627                 }
628         }       
629         if (pnn == -1) {
630                 DEBUG(DEBUG_WARNING,(__location__ " Could not find node to take over public address '%s'\n",
631                         ctdb_addr_to_str(&ip->addr)));
632
633                 return -1;
634         }
635
636         ip->pnn = pnn;
637         return 0;
638 }
639
640 struct ctdb_public_ip_list *
641 add_ip_to_merged_list(struct ctdb_context *ctdb,
642                         TALLOC_CTX *tmp_ctx, 
643                         struct ctdb_public_ip_list *ip_list, 
644                         struct ctdb_public_ip *ip)
645 {
646         struct ctdb_public_ip_list *tmp_ip; 
647
648         /* do we already have this ip in our merged list ?*/
649         for (tmp_ip=ip_list;tmp_ip;tmp_ip=tmp_ip->next) {
650
651                 /* we already have this public ip in the list */
652                 if (ctdb_same_ip(&tmp_ip->addr, &ip->addr)) {
653                         return ip_list;
654                 }
655         }
656
657         /* this is a new public ip, we must add it to the list */
658         tmp_ip = talloc_zero(tmp_ctx, struct ctdb_public_ip_list);
659         CTDB_NO_MEMORY_NULL(ctdb, tmp_ip);
660         tmp_ip->pnn  = ip->pnn;
661         tmp_ip->addr = ip->addr;
662         tmp_ip->next = ip_list;
663
664         return tmp_ip;
665 }
666
667 struct ctdb_public_ip_list *
668 create_merged_ip_list(struct ctdb_context *ctdb, TALLOC_CTX *tmp_ctx)
669 {
670         int i, j;
671         struct ctdb_public_ip_list *ip_list = NULL;
672         struct ctdb_all_public_ips *public_ips;
673
674         for (i=0;i<ctdb->num_nodes;i++) {
675                 public_ips = ctdb->nodes[i]->public_ips;
676
677                 if (ctdb->nodes[i]->flags & NODE_FLAGS_DELETED) {
678                         continue;
679                 }
680
681                 /* there were no public ips for this node */
682                 if (public_ips == NULL) {
683                         continue;
684                 }               
685
686                 for (j=0;j<public_ips->num;j++) {
687                         ip_list = add_ip_to_merged_list(ctdb, tmp_ctx,
688                                         ip_list, &public_ips->ips[j]);
689                 }
690         }
691
692         return ip_list;
693 }
694
695 /*
696   make any IP alias changes for public addresses that are necessary 
697  */
698 int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap)
699 {
700         int i, num_healthy, retries;
701         struct ctdb_public_ip ip;
702         struct ctdb_public_ipv4 ipv4;
703         uint32_t mask;
704         struct ctdb_public_ip_list *all_ips, *tmp_ip;
705         int maxnode, maxnum=0, minnode, minnum=0, num;
706         TDB_DATA data;
707         struct timeval timeout;
708         struct client_async_data *async_data;
709         struct ctdb_client_control_state *state;
710         TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
711
712
713         ZERO_STRUCT(ip);
714
715         /* Count how many completely healthy nodes we have */
716         num_healthy = 0;
717         for (i=0;i<nodemap->num;i++) {
718                 if (!(nodemap->nodes[i].flags & (NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED))) {
719                         num_healthy++;
720                 }
721         }
722
723         if (num_healthy > 0) {
724                 /* We have healthy nodes, so only consider them for 
725                    serving public addresses
726                 */
727                 mask = NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED;
728         } else {
729                 /* We didnt have any completely healthy nodes so
730                    use "disabled" nodes as a fallback
731                 */
732                 mask = NODE_FLAGS_INACTIVE;
733         }
734
735         /* since nodes only know about those public addresses that
736            can be served by that particular node, no single node has
737            a full list of all public addresses that exist in the cluster.
738            Walk over all node structures and create a merged list of
739            all public addresses that exist in the cluster.
740         */
741         all_ips = create_merged_ip_list(ctdb, tmp_ctx);
742
743         /* If we want deterministic ip allocations, i.e. that the ip addresses
744            will always be allocated the same way for a specific set of
745            available/unavailable nodes.
746         */
747         if (1 == ctdb->tunable.deterministic_public_ips) {              
748                 DEBUG(DEBUG_NOTICE,("Deterministic IPs enabled. Resetting all ip allocations\n"));
749                 for (i=0,tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next,i++) {
750                         tmp_ip->pnn = i%nodemap->num;
751                 }
752         }
753
754
755         /* mark all public addresses with a masked node as being served by
756            node -1
757         */
758         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
759                 if (tmp_ip->pnn == -1) {
760                         continue;
761                 }
762                 if (nodemap->nodes[tmp_ip->pnn].flags & mask) {
763                         tmp_ip->pnn = -1;
764                 }
765         }
766
767         /* verify that the assigned nodes can serve that public ip
768            and set it to -1 if not
769         */
770         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
771                 if (tmp_ip->pnn == -1) {
772                         continue;
773                 }
774                 if (can_node_serve_ip(ctdb, tmp_ip->pnn, tmp_ip) != 0) {
775                         /* this node can not serve this ip. */
776                         tmp_ip->pnn = -1;
777                 }
778         }
779
780
781         /* now we must redistribute all public addresses with takeover node
782            -1 among the nodes available
783         */
784         retries = 0;
785 try_again:
786         /* loop over all ip's and find a physical node to cover for 
787            each unassigned ip.
788         */
789         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
790                 if (tmp_ip->pnn == -1) {
791                         if (find_takeover_node(ctdb, nodemap, mask, tmp_ip, all_ips)) {
792                                 DEBUG(DEBUG_WARNING,("Failed to find node to cover ip %s\n",
793                                         ctdb_addr_to_str(&tmp_ip->addr)));
794                         }
795                 }
796         }
797
798         /* If we dont want ips to fail back after a node becomes healthy
799            again, we wont even try to reallocat the ip addresses so that
800            they are evenly spread out.
801            This can NOT be used at the same time as DeterministicIPs !
802         */
803         if (1 == ctdb->tunable.no_ip_failback) {
804                 if (1 == ctdb->tunable.deterministic_public_ips) {
805                         DEBUG(DEBUG_ERR, ("ERROR: You can not use 'DeterministicIPs' and 'NoIPFailback' at the same time\n"));
806                 }
807                 goto finished;
808         }
809
810
811         /* now, try to make sure the ip adresses are evenly distributed
812            across the node.
813            for each ip address, loop over all nodes that can serve this
814            ip and make sure that the difference between the node
815            serving the most and the node serving the least ip's are not greater
816            than 1.
817         */
818         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
819                 if (tmp_ip->pnn == -1) {
820                         continue;
821                 }
822
823                 /* Get the highest and lowest number of ips's served by any 
824                    valid node which can serve this ip.
825                 */
826                 maxnode = -1;
827                 minnode = -1;
828                 for (i=0;i<nodemap->num;i++) {
829                         if (nodemap->nodes[i].flags & mask) {
830                                 continue;
831                         }
832
833                         /* only check nodes that can actually serve this ip */
834                         if (can_node_serve_ip(ctdb, i, tmp_ip)) {
835                                 /* no it couldnt   so skip to the next node */
836                                 continue;
837                         }
838
839                         num = node_ip_coverage(ctdb, i, all_ips);
840                         if (maxnode == -1) {
841                                 maxnode = i;
842                                 maxnum  = num;
843                         } else {
844                                 if (num > maxnum) {
845                                         maxnode = i;
846                                         maxnum  = num;
847                                 }
848                         }
849                         if (minnode == -1) {
850                                 minnode = i;
851                                 minnum  = num;
852                         } else {
853                                 if (num < minnum) {
854                                         minnode = i;
855                                         minnum  = num;
856                                 }
857                         }
858                 }
859                 if (maxnode == -1) {
860                         DEBUG(DEBUG_WARNING,(__location__ " Could not find maxnode. May not be able to serve ip '%s'\n",
861                                 ctdb_addr_to_str(&tmp_ip->addr)));
862
863                         continue;
864                 }
865
866                 /* If we want deterministic IPs then dont try to reallocate 
867                    them to spread out the load.
868                 */
869                 if (1 == ctdb->tunable.deterministic_public_ips) {
870                         continue;
871                 }
872
873                 /* if the spread between the smallest and largest coverage by
874                    a node is >=2 we steal one of the ips from the node with
875                    most coverage to even things out a bit.
876                    try to do this at most 5 times  since we dont want to spend
877                    too much time balancing the ip coverage.
878                 */
879                 if ( (maxnum > minnum+1)
880                   && (retries < 5) ){
881                         struct ctdb_public_ip_list *tmp;
882
883                         /* mark one of maxnode's vnn's as unassigned and try
884                            again
885                         */
886                         for (tmp=all_ips;tmp;tmp=tmp->next) {
887                                 if (tmp->pnn == maxnode) {
888                                         tmp->pnn = -1;
889                                         retries++;
890                                         goto try_again;
891                                 }
892                         }
893                 }
894         }
895
896
897         /* finished distributing the public addresses, now just send the 
898            info out to the nodes
899         */
900 finished:
901
902         /* at this point ->pnn is the node which will own each IP
903            or -1 if there is no node that can cover this ip
904         */
905
906         /* now tell all nodes to delete any alias that they should not
907            have.  This will be a NOOP on nodes that don't currently
908            hold the given alias */
909         async_data = talloc_zero(tmp_ctx, struct client_async_data);
910         CTDB_NO_MEMORY_FATAL(ctdb, async_data);
911
912         for (i=0;i<nodemap->num;i++) {
913                 /* don't talk to unconnected nodes, but do talk to banned nodes */
914                 if (nodemap->nodes[i].flags & NODE_FLAGS_DISCONNECTED) {
915                         continue;
916                 }
917
918                 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
919                         if (tmp_ip->pnn == nodemap->nodes[i].pnn) {
920                                 /* This node should be serving this
921                                    vnn so dont tell it to release the ip
922                                 */
923                                 continue;
924                         }
925                         if (tmp_ip->addr.sa.sa_family == AF_INET) {
926                                 ipv4.pnn = tmp_ip->pnn;
927                                 ipv4.sin = tmp_ip->addr.ip;
928
929                                 timeout = TAKEOVER_TIMEOUT();
930                                 data.dsize = sizeof(ipv4);
931                                 data.dptr  = (uint8_t *)&ipv4;
932                                 state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
933                                                 0, CTDB_CONTROL_RELEASE_IPv4, 0,
934                                                 data, async_data,
935                                                 &timeout, NULL);
936                         } else {
937                                 ip.pnn  = tmp_ip->pnn;
938                                 ip.addr = tmp_ip->addr;
939
940                                 timeout = TAKEOVER_TIMEOUT();
941                                 data.dsize = sizeof(ip);
942                                 data.dptr  = (uint8_t *)&ip;
943                                 state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
944                                                 0, CTDB_CONTROL_RELEASE_IP, 0,
945                                                 data, async_data,
946                                                 &timeout, NULL);
947                         }
948
949                         if (state == NULL) {
950                                 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_RELEASE_IP to node %u\n", nodemap->nodes[i].pnn));
951                                 talloc_free(tmp_ctx);
952                                 return -1;
953                         }
954                 
955                         ctdb_client_async_add(async_data, state);
956                 }
957         }
958         if (ctdb_client_async_wait(ctdb, async_data) != 0) {
959                 DEBUG(DEBUG_ERR,(__location__ " Async control CTDB_CONTROL_RELEASE_IP failed\n"));
960                 talloc_free(tmp_ctx);
961                 return -1;
962         }
963         talloc_free(async_data);
964
965
966         /* tell all nodes to get their own IPs */
967         async_data = talloc_zero(tmp_ctx, struct client_async_data);
968         CTDB_NO_MEMORY_FATAL(ctdb, async_data);
969         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
970                 if (tmp_ip->pnn == -1) {
971                         /* this IP won't be taken over */
972                         continue;
973                 }
974
975                 if (tmp_ip->addr.sa.sa_family == AF_INET) {
976                         ipv4.pnn = tmp_ip->pnn;
977                         ipv4.sin = tmp_ip->addr.ip;
978
979                         timeout = TAKEOVER_TIMEOUT();
980                         data.dsize = sizeof(ipv4);
981                         data.dptr  = (uint8_t *)&ipv4;
982                         state = ctdb_control_send(ctdb, tmp_ip->pnn,
983                                         0, CTDB_CONTROL_TAKEOVER_IPv4, 0,
984                                         data, async_data,
985                                         &timeout, NULL);
986                 } else {
987                         ip.pnn  = tmp_ip->pnn;
988                         ip.addr = tmp_ip->addr;
989
990                         timeout = TAKEOVER_TIMEOUT();
991                         data.dsize = sizeof(ip);
992                         data.dptr  = (uint8_t *)&ip;
993                         state = ctdb_control_send(ctdb, tmp_ip->pnn,
994                                         0, CTDB_CONTROL_TAKEOVER_IP, 0,
995                                         data, async_data,
996                                         &timeout, NULL);
997                 }
998                 if (state == NULL) {
999                         DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_TAKEOVER_IP to node %u\n", tmp_ip->pnn));
1000                         talloc_free(tmp_ctx);
1001                         return -1;
1002                 }
1003                 
1004                 ctdb_client_async_add(async_data, state);
1005         }
1006         if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1007                 DEBUG(DEBUG_ERR,(__location__ " Async control CTDB_CONTROL_TAKEOVER_IP failed\n"));
1008                 talloc_free(tmp_ctx);
1009                 return -1;
1010         }
1011
1012         talloc_free(tmp_ctx);
1013         return 0;
1014 }
1015
1016
1017 /*
1018   destroy a ctdb_client_ip structure
1019  */
1020 static int ctdb_client_ip_destructor(struct ctdb_client_ip *ip)
1021 {
1022         DEBUG(DEBUG_DEBUG,("destroying client tcp for %s:%u (client_id %u)\n",
1023                 ctdb_addr_to_str(&ip->addr),
1024                 ntohs(ip->addr.ip.sin_port),
1025                 ip->client_id));
1026
1027         DLIST_REMOVE(ip->ctdb->client_ip_list, ip);
1028         return 0;
1029 }
1030
1031 /*
1032   called by a client to inform us of a TCP connection that it is managing
1033   that should tickled with an ACK when IP takeover is done
1034   we handle both the old ipv4 style of packets as well as the new ipv4/6
1035   pdus.
1036  */
1037 int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id,
1038                                 TDB_DATA indata)
1039 {
1040         struct ctdb_client *client = ctdb_reqid_find(ctdb, client_id, struct ctdb_client);
1041         struct ctdb_control_tcp *old_addr = NULL;
1042         struct ctdb_control_tcp_addr new_addr;
1043         struct ctdb_control_tcp_addr *tcp_sock = NULL;
1044         struct ctdb_tcp_list *tcp;
1045         struct ctdb_control_tcp_vnn t;
1046         int ret;
1047         TDB_DATA data;
1048         struct ctdb_client_ip *ip;
1049         struct ctdb_vnn *vnn;
1050         ctdb_sock_addr addr;
1051
1052         switch (indata.dsize) {
1053         case sizeof(struct ctdb_control_tcp):
1054                 old_addr = (struct ctdb_control_tcp *)indata.dptr;
1055                 ZERO_STRUCT(new_addr);
1056                 tcp_sock = &new_addr;
1057                 tcp_sock->src.ip  = old_addr->src;
1058                 tcp_sock->dest.ip = old_addr->dest;
1059                 break;
1060         case sizeof(struct ctdb_control_tcp_addr):
1061                 tcp_sock = (struct ctdb_control_tcp_addr *)indata.dptr;
1062                 break;
1063         default:
1064                 DEBUG(DEBUG_ERR,(__location__ " Invalid data structure passed "
1065                                  "to ctdb_control_tcp_client. size was %d but "
1066                                  "only allowed sizes are %lu and %lu\n",
1067                                  (int)indata.dsize,
1068                                  (long unsigned)sizeof(struct ctdb_control_tcp),
1069                                  (long unsigned)sizeof(struct ctdb_control_tcp_addr)));
1070                 return -1;
1071         }
1072
1073         addr = tcp_sock->src;
1074         ctdb_canonicalize_ip(&addr,  &tcp_sock->src);
1075         addr = tcp_sock->dest;
1076         ctdb_canonicalize_ip(&addr, &tcp_sock->dest);
1077
1078         ZERO_STRUCT(addr);
1079         memcpy(&addr, &tcp_sock->dest, sizeof(addr));
1080         vnn = find_public_ip_vnn(ctdb, &addr);
1081         if (vnn == NULL) {
1082                 switch (addr.sa.sa_family) {
1083                 case AF_INET:
1084                         if (ntohl(addr.ip.sin_addr.s_addr) != INADDR_LOOPBACK) {
1085                                 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public address.\n", 
1086                                         ctdb_addr_to_str(&addr)));
1087                         }
1088                         break;
1089                 case AF_INET6:
1090                         DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public ipv6 address.\n", 
1091                                 ctdb_addr_to_str(&addr)));
1092                         break;
1093                 default:
1094                         DEBUG(DEBUG_ERR,(__location__ " Unknown family type %d\n", addr.sa.sa_family));
1095                 }
1096
1097                 return 0;
1098         }
1099
1100         if (vnn->pnn != ctdb->pnn) {
1101                 DEBUG(DEBUG_ERR,("Attempt to register tcp client for IP %s we don't hold - failing (client_id %u pid %u)\n",
1102                         ctdb_addr_to_str(&addr),
1103                         client_id, client->pid));
1104                 /* failing this call will tell smbd to die */
1105                 return -1;
1106         }
1107
1108         ip = talloc(client, struct ctdb_client_ip);
1109         CTDB_NO_MEMORY(ctdb, ip);
1110
1111         ip->ctdb      = ctdb;
1112         ip->addr      = addr;
1113         ip->client_id = client_id;
1114         talloc_set_destructor(ip, ctdb_client_ip_destructor);
1115         DLIST_ADD(ctdb->client_ip_list, ip);
1116
1117         tcp = talloc(client, struct ctdb_tcp_list);
1118         CTDB_NO_MEMORY(ctdb, tcp);
1119
1120         tcp->connection.src_addr = tcp_sock->src;
1121         tcp->connection.dst_addr = tcp_sock->dest;
1122
1123         DLIST_ADD(client->tcp_list, tcp);
1124
1125         t.src  = tcp_sock->src;
1126         t.dest = tcp_sock->dest;
1127
1128         data.dptr = (uint8_t *)&t;
1129         data.dsize = sizeof(t);
1130
1131         switch (addr.sa.sa_family) {
1132         case AF_INET:
1133                 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1134                         (unsigned)ntohs(tcp_sock->dest.ip.sin_port), 
1135                         ctdb_addr_to_str(&tcp_sock->src),
1136                         (unsigned)ntohs(tcp_sock->src.ip.sin_port), client_id, client->pid));
1137                 break;
1138         case AF_INET6:
1139                 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1140                         (unsigned)ntohs(tcp_sock->dest.ip6.sin6_port), 
1141                         ctdb_addr_to_str(&tcp_sock->src),
1142                         (unsigned)ntohs(tcp_sock->src.ip6.sin6_port), client_id, client->pid));
1143                 break;
1144         default:
1145                 DEBUG(DEBUG_ERR,(__location__ " Unknown family %d\n", addr.sa.sa_family));
1146         }
1147
1148
1149         /* tell all nodes about this tcp connection */
1150         ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0, 
1151                                        CTDB_CONTROL_TCP_ADD,
1152                                        0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
1153         if (ret != 0) {
1154                 DEBUG(DEBUG_ERR,(__location__ " Failed to send CTDB_CONTROL_TCP_ADD\n"));
1155                 return -1;
1156         }
1157
1158         return 0;
1159 }
1160
1161 /*
1162   find a tcp address on a list
1163  */
1164 static struct ctdb_tcp_connection *ctdb_tcp_find(struct ctdb_tcp_array *array, 
1165                                            struct ctdb_tcp_connection *tcp)
1166 {
1167         int i;
1168
1169         if (array == NULL) {
1170                 return NULL;
1171         }
1172
1173         for (i=0;i<array->num;i++) {
1174                 if (ctdb_same_sockaddr(&array->connections[i].src_addr, &tcp->src_addr) &&
1175                     ctdb_same_sockaddr(&array->connections[i].dst_addr, &tcp->dst_addr)) {
1176                         return &array->connections[i];
1177                 }
1178         }
1179         return NULL;
1180 }
1181
1182 /*
1183   called by a daemon to inform us of a TCP connection that one of its
1184   clients managing that should tickled with an ACK when IP takeover is
1185   done
1186  */
1187 int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata)
1188 {
1189         struct ctdb_control_tcp_vnn *p = (struct ctdb_control_tcp_vnn *)indata.dptr;
1190         struct ctdb_tcp_array *tcparray;
1191         struct ctdb_tcp_connection tcp;
1192         struct ctdb_vnn *vnn;
1193
1194         vnn = find_public_ip_vnn(ctdb, &p->dest);
1195         if (vnn == NULL) {
1196                 DEBUG(DEBUG_INFO,(__location__ " got TCP_ADD control for an address which is not a public address '%s'\n",
1197                         ctdb_addr_to_str(&p->dest)));
1198
1199                 return -1;
1200         }
1201
1202
1203         tcparray = vnn->tcp_array;
1204
1205         /* If this is the first tickle */
1206         if (tcparray == NULL) {
1207                 tcparray = talloc_size(ctdb->nodes, 
1208                         offsetof(struct ctdb_tcp_array, connections) +
1209                         sizeof(struct ctdb_tcp_connection) * 1);
1210                 CTDB_NO_MEMORY(ctdb, tcparray);
1211                 vnn->tcp_array = tcparray;
1212
1213                 tcparray->num = 0;
1214                 tcparray->connections = talloc_size(tcparray, sizeof(struct ctdb_tcp_connection));
1215                 CTDB_NO_MEMORY(ctdb, tcparray->connections);
1216
1217                 tcparray->connections[tcparray->num].src_addr = p->src;
1218                 tcparray->connections[tcparray->num].dst_addr = p->dest;
1219                 tcparray->num++;
1220                 return 0;
1221         }
1222
1223
1224         /* Do we already have this tickle ?*/
1225         tcp.src_addr = p->src;
1226         tcp.dst_addr = p->dest;
1227         if (ctdb_tcp_find(vnn->tcp_array, &tcp) != NULL) {
1228                 DEBUG(DEBUG_DEBUG,("Already had tickle info for %s:%u for vnn:%u\n",
1229                         ctdb_addr_to_str(&tcp.dst_addr),
1230                         ntohs(tcp.dst_addr.ip.sin_port),
1231                         vnn->pnn));
1232                 return 0;
1233         }
1234
1235         /* A new tickle, we must add it to the array */
1236         tcparray->connections = talloc_realloc(tcparray, tcparray->connections,
1237                                         struct ctdb_tcp_connection,
1238                                         tcparray->num+1);
1239         CTDB_NO_MEMORY(ctdb, tcparray->connections);
1240
1241         vnn->tcp_array = tcparray;
1242         tcparray->connections[tcparray->num].src_addr = p->src;
1243         tcparray->connections[tcparray->num].dst_addr = p->dest;
1244         tcparray->num++;
1245                                 
1246         DEBUG(DEBUG_INFO,("Added tickle info for %s:%u from vnn %u\n",
1247                 ctdb_addr_to_str(&tcp.dst_addr),
1248                 ntohs(tcp.dst_addr.ip.sin_port),
1249                 vnn->pnn));
1250
1251         return 0;
1252 }
1253
1254
1255 /*
1256   called by a daemon to inform us of a TCP connection that one of its
1257   clients managing that should tickled with an ACK when IP takeover is
1258   done
1259  */
1260 static void ctdb_remove_tcp_connection(struct ctdb_context *ctdb, struct ctdb_tcp_connection *conn)
1261 {
1262         struct ctdb_tcp_connection *tcpp;
1263         struct ctdb_vnn *vnn = find_public_ip_vnn(ctdb, &conn->dst_addr);
1264
1265         if (vnn == NULL) {
1266                 DEBUG(DEBUG_ERR,(__location__ " unable to find public address %s\n",
1267                         ctdb_addr_to_str(&conn->dst_addr)));
1268                 return;
1269         }
1270
1271         /* if the array is empty we cant remove it
1272            and we dont need to do anything
1273          */
1274         if (vnn->tcp_array == NULL) {
1275                 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist (array is empty) %s:%u\n",
1276                         ctdb_addr_to_str(&conn->dst_addr),
1277                         ntohs(conn->dst_addr.ip.sin_port)));
1278                 return;
1279         }
1280
1281
1282         /* See if we know this connection
1283            if we dont know this connection  then we dont need to do anything
1284          */
1285         tcpp = ctdb_tcp_find(vnn->tcp_array, conn);
1286         if (tcpp == NULL) {
1287                 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist %s:%u\n",
1288                         ctdb_addr_to_str(&conn->dst_addr),
1289                         ntohs(conn->dst_addr.ip.sin_port)));
1290                 return;
1291         }
1292
1293
1294         /* We need to remove this entry from the array.
1295            Instead of allocating a new array and copying data to it
1296            we cheat and just copy the last entry in the existing array
1297            to the entry that is to be removed and just shring the 
1298            ->num field
1299          */
1300         *tcpp = vnn->tcp_array->connections[vnn->tcp_array->num - 1];
1301         vnn->tcp_array->num--;
1302
1303         /* If we deleted the last entry we also need to remove the entire array
1304          */
1305         if (vnn->tcp_array->num == 0) {
1306                 talloc_free(vnn->tcp_array);
1307                 vnn->tcp_array = NULL;
1308         }               
1309
1310         vnn->tcp_update_needed = true;
1311
1312         DEBUG(DEBUG_INFO,("Removed tickle info for %s:%u\n",
1313                 ctdb_addr_to_str(&conn->src_addr),
1314                 ntohs(conn->src_addr.ip.sin_port)));
1315 }
1316
1317
1318 /*
1319   called when a daemon restarts - send all tickes for all public addresses
1320   we are serving immediately to the new node.
1321  */
1322 int32_t ctdb_control_startup(struct ctdb_context *ctdb, uint32_t vnn)
1323 {
1324 /*XXX here we should send all tickes we are serving to the new node */
1325         return 0;
1326 }
1327
1328
1329 /*
1330   called when a client structure goes away - hook to remove
1331   elements from the tcp_list in all daemons
1332  */
1333 void ctdb_takeover_client_destructor_hook(struct ctdb_client *client)
1334 {
1335         while (client->tcp_list) {
1336                 struct ctdb_tcp_list *tcp = client->tcp_list;
1337                 DLIST_REMOVE(client->tcp_list, tcp);
1338                 ctdb_remove_tcp_connection(client->ctdb, &tcp->connection);
1339         }
1340 }
1341
1342
1343 /*
1344   release all IPs on shutdown
1345  */
1346 void ctdb_release_all_ips(struct ctdb_context *ctdb)
1347 {
1348         struct ctdb_vnn *vnn;
1349
1350         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1351                 if (!ctdb_sys_have_ip(&vnn->public_address)) {
1352                         continue;
1353                 }
1354                 if (vnn->pnn == ctdb->pnn) {
1355                         vnn->pnn = -1;
1356                 }
1357                 ctdb_event_script(ctdb, "releaseip %s %s %u",
1358                                   vnn->iface, 
1359                                   talloc_strdup(ctdb, ctdb_addr_to_str(&vnn->public_address)),
1360                                   vnn->public_netmask_bits);
1361                 release_kill_clients(ctdb, &vnn->public_address);
1362         }
1363 }
1364
1365
1366 /*
1367   get list of public IPs
1368  */
1369 int32_t ctdb_control_get_public_ips(struct ctdb_context *ctdb, 
1370                                     struct ctdb_req_control *c, TDB_DATA *outdata)
1371 {
1372         int i, num, len;
1373         struct ctdb_all_public_ips *ips;
1374         struct ctdb_vnn *vnn;
1375
1376         /* count how many public ip structures we have */
1377         num = 0;
1378         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1379                 num++;
1380         }
1381
1382         len = offsetof(struct ctdb_all_public_ips, ips) + 
1383                 num*sizeof(struct ctdb_public_ip);
1384         ips = talloc_zero_size(outdata, len);
1385         CTDB_NO_MEMORY(ctdb, ips);
1386
1387         outdata->dsize = len;
1388         outdata->dptr  = (uint8_t *)ips;
1389
1390         ips->num = num;
1391         i = 0;
1392         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1393                 ips->ips[i].pnn  = vnn->pnn;
1394                 ips->ips[i].addr = vnn->public_address;
1395                 i++;
1396         }
1397
1398         return 0;
1399 }
1400
1401
1402 /*
1403   get list of public IPs, old ipv4 style.  only returns ipv4 addresses
1404  */
1405 int32_t ctdb_control_get_public_ipsv4(struct ctdb_context *ctdb, 
1406                                     struct ctdb_req_control *c, TDB_DATA *outdata)
1407 {
1408         int i, num, len;
1409         struct ctdb_all_public_ipsv4 *ips;
1410         struct ctdb_vnn *vnn;
1411
1412         /* count how many public ip structures we have */
1413         num = 0;
1414         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1415                 if (vnn->public_address.sa.sa_family != AF_INET) {
1416                         continue;
1417                 }
1418                 num++;
1419         }
1420
1421         len = offsetof(struct ctdb_all_public_ipsv4, ips) + 
1422                 num*sizeof(struct ctdb_public_ipv4);
1423         ips = talloc_zero_size(outdata, len);
1424         CTDB_NO_MEMORY(ctdb, ips);
1425
1426         outdata->dsize = len;
1427         outdata->dptr  = (uint8_t *)ips;
1428
1429         ips->num = num;
1430         i = 0;
1431         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1432                 if (vnn->public_address.sa.sa_family != AF_INET) {
1433                         continue;
1434                 }
1435                 ips->ips[i].pnn = vnn->pnn;
1436                 ips->ips[i].sin = vnn->public_address.ip;
1437                 i++;
1438         }
1439
1440         return 0;
1441 }
1442
1443
1444 /* 
1445    structure containing the listening socket and the list of tcp connections
1446    that the ctdb daemon is to kill
1447 */
1448 struct ctdb_kill_tcp {
1449         struct ctdb_vnn *vnn;
1450         struct ctdb_context *ctdb;
1451         int capture_fd;
1452         struct fd_event *fde;
1453         trbt_tree_t *connections;
1454         void *private_data;
1455 };
1456
1457 /*
1458   a tcp connection that is to be killed
1459  */
1460 struct ctdb_killtcp_con {
1461         ctdb_sock_addr src_addr;
1462         ctdb_sock_addr dst_addr;
1463         int count;
1464         struct ctdb_kill_tcp *killtcp;
1465 };
1466
1467 /* this function is used to create a key to represent this socketpair
1468    in the killtcp tree.
1469    this key is used to insert and lookup matching socketpairs that are
1470    to be tickled and RST
1471 */
1472 #define KILLTCP_KEYLEN  10
1473 static uint32_t *killtcp_key(ctdb_sock_addr *src, ctdb_sock_addr *dst)
1474 {
1475         static uint32_t key[KILLTCP_KEYLEN];
1476
1477         bzero(key, sizeof(key));
1478
1479         if (src->sa.sa_family != dst->sa.sa_family) {
1480                 DEBUG(DEBUG_ERR, (__location__ " ERROR, different families passed :%u vs %u\n", src->sa.sa_family, dst->sa.sa_family));
1481                 return key;
1482         }
1483         
1484         switch (src->sa.sa_family) {
1485         case AF_INET:
1486                 key[0]  = dst->ip.sin_addr.s_addr;
1487                 key[1]  = src->ip.sin_addr.s_addr;
1488                 key[2]  = dst->ip.sin_port;
1489                 key[3]  = src->ip.sin_port;
1490                 break;
1491         case AF_INET6:
1492                 key[0]  = dst->ip6.sin6_addr.s6_addr32[3];
1493                 key[1]  = src->ip6.sin6_addr.s6_addr32[3];
1494                 key[2]  = dst->ip6.sin6_addr.s6_addr32[2];
1495                 key[3]  = src->ip6.sin6_addr.s6_addr32[2];
1496                 key[4]  = dst->ip6.sin6_addr.s6_addr32[1];
1497                 key[5]  = src->ip6.sin6_addr.s6_addr32[1];
1498                 key[6]  = dst->ip6.sin6_addr.s6_addr32[0];
1499                 key[7]  = src->ip6.sin6_addr.s6_addr32[0];
1500                 key[8]  = dst->ip6.sin6_port;
1501                 key[9]  = src->ip6.sin6_port;
1502                 break;
1503         default:
1504                 DEBUG(DEBUG_ERR, (__location__ " ERROR, unknown family passed :%u\n", src->sa.sa_family));
1505                 return key;
1506         }
1507
1508         return key;
1509 }
1510
1511 /*
1512   called when we get a read event on the raw socket
1513  */
1514 static void capture_tcp_handler(struct event_context *ev, struct fd_event *fde, 
1515                                 uint16_t flags, void *private_data)
1516 {
1517         struct ctdb_kill_tcp *killtcp = talloc_get_type(private_data, struct ctdb_kill_tcp);
1518         struct ctdb_killtcp_con *con;
1519         ctdb_sock_addr src, dst;
1520         uint32_t ack_seq, seq;
1521
1522         if (!(flags & EVENT_FD_READ)) {
1523                 return;
1524         }
1525
1526         if (ctdb_sys_read_tcp_packet(killtcp->capture_fd,
1527                                 killtcp->private_data,
1528                                 &src, &dst,
1529                                 &ack_seq, &seq) != 0) {
1530                 /* probably a non-tcp ACK packet */
1531                 return;
1532         }
1533
1534         /* check if we have this guy in our list of connections
1535            to kill
1536         */
1537         con = trbt_lookuparray32(killtcp->connections, 
1538                         KILLTCP_KEYLEN, killtcp_key(&src, &dst));
1539         if (con == NULL) {
1540                 /* no this was some other packet we can just ignore */
1541                 return;
1542         }
1543
1544         /* This one has been tickled !
1545            now reset him and remove him from the list.
1546          */
1547         DEBUG(DEBUG_INFO, ("sending a tcp reset to kill connection :%d -> %s:%d\n",
1548                 ntohs(con->dst_addr.ip.sin_port),
1549                 ctdb_addr_to_str(&con->src_addr),
1550                 ntohs(con->src_addr.ip.sin_port)));
1551
1552         ctdb_sys_send_tcp(&con->dst_addr, &con->src_addr, ack_seq, seq, 1);
1553         talloc_free(con);
1554 }
1555
1556
1557 /* when traversing the list of all tcp connections to send tickle acks to
1558    (so that we can capture the ack coming back and kill the connection
1559     by a RST)
1560    this callback is called for each connection we are currently trying to kill
1561 */
1562 static void tickle_connection_traverse(void *param, void *data)
1563 {
1564         struct ctdb_killtcp_con *con = talloc_get_type(data, struct ctdb_killtcp_con);
1565
1566         /* have tried too many times, just give up */
1567         if (con->count >= 5) {
1568                 talloc_free(con);
1569                 return;
1570         }
1571
1572         /* othervise, try tickling it again */
1573         con->count++;
1574         ctdb_sys_send_tcp(
1575                 (ctdb_sock_addr *)&con->dst_addr,
1576                 (ctdb_sock_addr *)&con->src_addr,
1577                 0, 0, 0);
1578 }
1579
1580
1581 /* 
1582    called every second until all sentenced connections have been reset
1583  */
1584 static void ctdb_tickle_sentenced_connections(struct event_context *ev, struct timed_event *te, 
1585                                               struct timeval t, void *private_data)
1586 {
1587         struct ctdb_kill_tcp *killtcp = talloc_get_type(private_data, struct ctdb_kill_tcp);
1588
1589
1590         /* loop over all connections sending tickle ACKs */
1591         trbt_traversearray32(killtcp->connections, KILLTCP_KEYLEN, tickle_connection_traverse, NULL);
1592
1593
1594         /* If there are no more connections to kill we can remove the
1595            entire killtcp structure
1596          */
1597         if ( (killtcp->connections == NULL) || 
1598              (killtcp->connections->root == NULL) ) {
1599                 talloc_free(killtcp);
1600                 return;
1601         }
1602
1603         /* try tickling them again in a seconds time
1604          */
1605         event_add_timed(killtcp->ctdb->ev, killtcp, timeval_current_ofs(1, 0), 
1606                         ctdb_tickle_sentenced_connections, killtcp);
1607 }
1608
1609 /*
1610   destroy the killtcp structure
1611  */
1612 static int ctdb_killtcp_destructor(struct ctdb_kill_tcp *killtcp)
1613 {
1614         killtcp->vnn->killtcp = NULL;
1615         return 0;
1616 }
1617
1618
1619 /* nothing fancy here, just unconditionally replace any existing
1620    connection structure with the new one.
1621
1622    dont even free the old one if it did exist, that one is talloc_stolen
1623    by the same node in the tree anyway and will be deleted when the new data 
1624    is deleted
1625 */
1626 static void *add_killtcp_callback(void *parm, void *data)
1627 {
1628         return parm;
1629 }
1630
1631 /*
1632   add a tcp socket to the list of connections we want to RST
1633  */
1634 static int ctdb_killtcp_add_connection(struct ctdb_context *ctdb, 
1635                                        ctdb_sock_addr *s,
1636                                        ctdb_sock_addr *d)
1637 {
1638         ctdb_sock_addr src, dst;
1639         struct ctdb_kill_tcp *killtcp;
1640         struct ctdb_killtcp_con *con;
1641         struct ctdb_vnn *vnn;
1642
1643         ctdb_canonicalize_ip(s, &src);
1644         ctdb_canonicalize_ip(d, &dst);
1645
1646         vnn = find_public_ip_vnn(ctdb, &dst);
1647         if (vnn == NULL) {
1648                 vnn = find_public_ip_vnn(ctdb, &src);
1649         }
1650         if (vnn == NULL) {
1651                 /* if it is not a public ip   it could be our 'single ip' */
1652                 if (ctdb->single_ip_vnn) {
1653                         if (ctdb_same_ip(&ctdb->single_ip_vnn->public_address, &dst)) {
1654                                 vnn = ctdb->single_ip_vnn;
1655                         }
1656                 }
1657         }
1658         if (vnn == NULL) {
1659                 DEBUG(DEBUG_ERR,(__location__ " Could not killtcp, not a public address\n")); 
1660                 return -1;
1661         }
1662
1663         killtcp = vnn->killtcp;
1664         
1665         /* If this is the first connection to kill we must allocate
1666            a new structure
1667          */
1668         if (killtcp == NULL) {
1669                 killtcp = talloc_zero(ctdb, struct ctdb_kill_tcp);
1670                 CTDB_NO_MEMORY(ctdb, killtcp);
1671
1672                 killtcp->vnn         = vnn;
1673                 killtcp->ctdb        = ctdb;
1674                 killtcp->capture_fd  = -1;
1675                 killtcp->connections = trbt_create(killtcp, 0);
1676
1677                 vnn->killtcp         = killtcp;
1678                 talloc_set_destructor(killtcp, ctdb_killtcp_destructor);
1679         }
1680
1681
1682
1683         /* create a structure that describes this connection we want to
1684            RST and store it in killtcp->connections
1685         */
1686         con = talloc(killtcp, struct ctdb_killtcp_con);
1687         CTDB_NO_MEMORY(ctdb, con);
1688         con->src_addr = src;
1689         con->dst_addr = dst;
1690         con->count    = 0;
1691         con->killtcp  = killtcp;
1692
1693
1694         trbt_insertarray32_callback(killtcp->connections,
1695                         KILLTCP_KEYLEN, killtcp_key(&con->dst_addr, &con->src_addr),
1696                         add_killtcp_callback, con);
1697
1698         /* 
1699            If we dont have a socket to listen on yet we must create it
1700          */
1701         if (killtcp->capture_fd == -1) {
1702                 killtcp->capture_fd = ctdb_sys_open_capture_socket(vnn->iface, &killtcp->private_data);
1703                 if (killtcp->capture_fd == -1) {
1704                         DEBUG(DEBUG_CRIT,(__location__ " Failed to open capturing socket for killtcp\n"));
1705                         goto failed;
1706                 }
1707         }
1708
1709
1710         if (killtcp->fde == NULL) {
1711                 killtcp->fde = event_add_fd(ctdb->ev, killtcp, killtcp->capture_fd, 
1712                                             EVENT_FD_READ | EVENT_FD_AUTOCLOSE, 
1713                                             capture_tcp_handler, killtcp);
1714
1715                 /* We also need to set up some events to tickle all these connections
1716                    until they are all reset
1717                 */
1718                 event_add_timed(ctdb->ev, killtcp, timeval_current_ofs(1, 0), 
1719                                 ctdb_tickle_sentenced_connections, killtcp);
1720         }
1721
1722         /* tickle him once now */
1723         ctdb_sys_send_tcp(
1724                 &con->dst_addr,
1725                 &con->src_addr,
1726                 0, 0, 0);
1727
1728         return 0;
1729
1730 failed:
1731         talloc_free(vnn->killtcp);
1732         vnn->killtcp = NULL;
1733         return -1;
1734 }
1735
1736 /*
1737   kill a TCP connection.
1738  */
1739 int32_t ctdb_control_kill_tcp(struct ctdb_context *ctdb, TDB_DATA indata)
1740 {
1741         struct ctdb_control_killtcp *killtcp = (struct ctdb_control_killtcp *)indata.dptr;
1742
1743         return ctdb_killtcp_add_connection(ctdb, &killtcp->src_addr, &killtcp->dst_addr);
1744 }
1745
1746 /*
1747   called by a daemon to inform us of the entire list of TCP tickles for
1748   a particular public address.
1749   this control should only be sent by the node that is currently serving
1750   that public address.
1751  */
1752 int32_t ctdb_control_set_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata)
1753 {
1754         struct ctdb_control_tcp_tickle_list *list = (struct ctdb_control_tcp_tickle_list *)indata.dptr;
1755         struct ctdb_tcp_array *tcparray;
1756         struct ctdb_vnn *vnn;
1757
1758         /* We must at least have tickles.num or else we cant verify the size
1759            of the received data blob
1760          */
1761         if (indata.dsize < offsetof(struct ctdb_control_tcp_tickle_list, 
1762                                         tickles.connections)) {
1763                 DEBUG(DEBUG_ERR,("Bad indata in ctdb_control_set_tcp_tickle_list. Not enough data for the tickle.num field\n"));
1764                 return -1;
1765         }
1766
1767         /* verify that the size of data matches what we expect */
1768         if (indata.dsize < offsetof(struct ctdb_control_tcp_tickle_list, 
1769                                 tickles.connections)
1770                          + sizeof(struct ctdb_tcp_connection)
1771                                  * list->tickles.num) {
1772                 DEBUG(DEBUG_ERR,("Bad indata in ctdb_control_set_tcp_tickle_list\n"));
1773                 return -1;
1774         }       
1775
1776         vnn = find_public_ip_vnn(ctdb, &list->addr);
1777         if (vnn == NULL) {
1778                 DEBUG(DEBUG_INFO,(__location__ " Could not set tcp tickle list, '%s' is not a public address\n", 
1779                         ctdb_addr_to_str(&list->addr)));
1780
1781                 return 1;
1782         }
1783
1784         /* remove any old ticklelist we might have */
1785         talloc_free(vnn->tcp_array);
1786         vnn->tcp_array = NULL;
1787
1788         tcparray = talloc(ctdb->nodes, struct ctdb_tcp_array);
1789         CTDB_NO_MEMORY(ctdb, tcparray);
1790
1791         tcparray->num = list->tickles.num;
1792
1793         tcparray->connections = talloc_array(tcparray, struct ctdb_tcp_connection, tcparray->num);
1794         CTDB_NO_MEMORY(ctdb, tcparray->connections);
1795
1796         memcpy(tcparray->connections, &list->tickles.connections[0], 
1797                sizeof(struct ctdb_tcp_connection)*tcparray->num);
1798
1799         /* We now have a new fresh tickle list array for this vnn */
1800         vnn->tcp_array = talloc_steal(vnn, tcparray);
1801         
1802         return 0;
1803 }
1804
1805 /*
1806   called to return the full list of tickles for the puclic address associated 
1807   with the provided vnn
1808  */
1809 int32_t ctdb_control_get_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata)
1810 {
1811         ctdb_sock_addr *addr = (ctdb_sock_addr *)indata.dptr;
1812         struct ctdb_control_tcp_tickle_list *list;
1813         struct ctdb_tcp_array *tcparray;
1814         int num;
1815         struct ctdb_vnn *vnn;
1816
1817         vnn = find_public_ip_vnn(ctdb, addr);
1818         if (vnn == NULL) {
1819                 DEBUG(DEBUG_ERR,(__location__ " Could not get tcp tickle list, '%s' is not a public address\n", 
1820                         ctdb_addr_to_str(addr)));
1821
1822                 return 1;
1823         }
1824
1825         tcparray = vnn->tcp_array;
1826         if (tcparray) {
1827                 num = tcparray->num;
1828         } else {
1829                 num = 0;
1830         }
1831
1832         outdata->dsize = offsetof(struct ctdb_control_tcp_tickle_list, 
1833                                 tickles.connections)
1834                         + sizeof(struct ctdb_tcp_connection) * num;
1835
1836         outdata->dptr  = talloc_size(outdata, outdata->dsize);
1837         CTDB_NO_MEMORY(ctdb, outdata->dptr);
1838         list = (struct ctdb_control_tcp_tickle_list *)outdata->dptr;
1839
1840         list->addr = *addr;
1841         list->tickles.num = num;
1842         if (num) {
1843                 memcpy(&list->tickles.connections[0], tcparray->connections, 
1844                         sizeof(struct ctdb_tcp_connection) * num);
1845         }
1846
1847         return 0;
1848 }
1849
1850
1851 /*
1852   set the list of all tcp tickles for a public address
1853  */
1854 static int ctdb_ctrl_set_tcp_tickles(struct ctdb_context *ctdb, 
1855                               struct timeval timeout, uint32_t destnode, 
1856                               ctdb_sock_addr *addr,
1857                               struct ctdb_tcp_array *tcparray)
1858 {
1859         int ret, num;
1860         TDB_DATA data;
1861         struct ctdb_control_tcp_tickle_list *list;
1862
1863         if (tcparray) {
1864                 num = tcparray->num;
1865         } else {
1866                 num = 0;
1867         }
1868
1869         data.dsize = offsetof(struct ctdb_control_tcp_tickle_list, 
1870                                 tickles.connections) +
1871                         sizeof(struct ctdb_tcp_connection) * num;
1872         data.dptr = talloc_size(ctdb, data.dsize);
1873         CTDB_NO_MEMORY(ctdb, data.dptr);
1874
1875         list = (struct ctdb_control_tcp_tickle_list *)data.dptr;
1876         list->addr = *addr;
1877         list->tickles.num = num;
1878         if (tcparray) {
1879                 memcpy(&list->tickles.connections[0], tcparray->connections, sizeof(struct ctdb_tcp_connection) * num);
1880         }
1881
1882         ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0, 
1883                                        CTDB_CONTROL_SET_TCP_TICKLE_LIST,
1884                                        0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
1885         if (ret != 0) {
1886                 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set tcp tickles failed\n"));
1887                 return -1;
1888         }
1889
1890         talloc_free(data.dptr);
1891
1892         return ret;
1893 }
1894
1895
1896 /*
1897   perform tickle updates if required
1898  */
1899 static void ctdb_update_tcp_tickles(struct event_context *ev, 
1900                                 struct timed_event *te, 
1901                                 struct timeval t, void *private_data)
1902 {
1903         struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
1904         int ret;
1905         struct ctdb_vnn *vnn;
1906
1907         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1908                 /* we only send out updates for public addresses that 
1909                    we have taken over
1910                  */
1911                 if (ctdb->pnn != vnn->pnn) {
1912                         continue;
1913                 }
1914                 /* We only send out the updates if we need to */
1915                 if (!vnn->tcp_update_needed) {
1916                         continue;
1917                 }
1918                 ret = ctdb_ctrl_set_tcp_tickles(ctdb, 
1919                                 TAKEOVER_TIMEOUT(),
1920                                 CTDB_BROADCAST_CONNECTED,
1921                                 &vnn->public_address,
1922                                 vnn->tcp_array);
1923                 if (ret != 0) {
1924                         DEBUG(DEBUG_ERR,("Failed to send the tickle update for public address %s\n",
1925                                 ctdb_addr_to_str(&vnn->public_address)));
1926                 }
1927         }
1928
1929         event_add_timed(ctdb->ev, ctdb->tickle_update_context,
1930                              timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0), 
1931                              ctdb_update_tcp_tickles, ctdb);
1932 }               
1933         
1934
1935 /*
1936   start periodic update of tcp tickles
1937  */
1938 void ctdb_start_tcp_tickle_update(struct ctdb_context *ctdb)
1939 {
1940         ctdb->tickle_update_context = talloc_new(ctdb);
1941
1942         event_add_timed(ctdb->ev, ctdb->tickle_update_context,
1943                              timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0), 
1944                              ctdb_update_tcp_tickles, ctdb);
1945 }
1946
1947
1948
1949
1950 struct control_gratious_arp {
1951         struct ctdb_context *ctdb;
1952         ctdb_sock_addr addr;
1953         const char *iface;
1954         int count;
1955 };
1956
1957 /*
1958   send a control_gratuitous arp
1959  */
1960 static void send_gratious_arp(struct event_context *ev, struct timed_event *te, 
1961                                   struct timeval t, void *private_data)
1962 {
1963         int ret;
1964         struct control_gratious_arp *arp = talloc_get_type(private_data, 
1965                                                         struct control_gratious_arp);
1966
1967         ret = ctdb_sys_send_arp(&arp->addr, arp->iface);
1968         if (ret != 0) {
1969                 DEBUG(DEBUG_ERR,(__location__ " sending of gratious arp failed (%s)\n", strerror(errno)));
1970         }
1971
1972
1973         arp->count++;
1974         if (arp->count == CTDB_ARP_REPEAT) {
1975                 talloc_free(arp);
1976                 return;
1977         }
1978
1979         event_add_timed(arp->ctdb->ev, arp, 
1980                         timeval_current_ofs(CTDB_ARP_INTERVAL, 0), 
1981                         send_gratious_arp, arp);
1982 }
1983
1984
1985 /*
1986   send a gratious arp 
1987  */
1988 int32_t ctdb_control_send_gratious_arp(struct ctdb_context *ctdb, TDB_DATA indata)
1989 {
1990         struct ctdb_control_gratious_arp *gratious_arp = (struct ctdb_control_gratious_arp *)indata.dptr;
1991         struct control_gratious_arp *arp;
1992
1993         /* verify the size of indata */
1994         if (indata.dsize < offsetof(struct ctdb_control_gratious_arp, iface)) {
1995                 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_gratious_arp structure. Got %u require %u bytes\n", 
1996                                  (unsigned)indata.dsize, 
1997                                  (unsigned)offsetof(struct ctdb_control_gratious_arp, iface)));
1998                 return -1;
1999         }
2000         if (indata.dsize != 
2001                 ( offsetof(struct ctdb_control_gratious_arp, iface)
2002                 + gratious_arp->len ) ){
2003
2004                 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2005                         "but should be %u bytes\n", 
2006                          (unsigned)indata.dsize, 
2007                          (unsigned)(offsetof(struct ctdb_control_gratious_arp, iface)+gratious_arp->len)));
2008                 return -1;
2009         }
2010
2011
2012         arp = talloc(ctdb, struct control_gratious_arp);
2013         CTDB_NO_MEMORY(ctdb, arp);
2014
2015         arp->ctdb  = ctdb;
2016         arp->addr   = gratious_arp->addr;
2017         arp->iface = talloc_strdup(arp, gratious_arp->iface);
2018         CTDB_NO_MEMORY(ctdb, arp->iface);
2019         arp->count = 0;
2020         
2021         event_add_timed(arp->ctdb->ev, arp, 
2022                         timeval_zero(), send_gratious_arp, arp);
2023
2024         return 0;
2025 }
2026
2027 int32_t ctdb_control_add_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2028 {
2029         struct ctdb_control_ip_iface *pub = (struct ctdb_control_ip_iface *)indata.dptr;
2030         int ret;
2031
2032         /* verify the size of indata */
2033         if (indata.dsize < offsetof(struct ctdb_control_ip_iface, iface)) {
2034                 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_ip_iface structure\n"));
2035                 return -1;
2036         }
2037         if (indata.dsize != 
2038                 ( offsetof(struct ctdb_control_ip_iface, iface)
2039                 + pub->len ) ){
2040
2041                 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2042                         "but should be %u bytes\n", 
2043                          (unsigned)indata.dsize, 
2044                          (unsigned)(offsetof(struct ctdb_control_ip_iface, iface)+pub->len)));
2045                 return -1;
2046         }
2047
2048         ret = ctdb_add_public_address(ctdb, &pub->addr, pub->mask, &pub->iface[0]);
2049
2050         if (ret != 0) {
2051                 DEBUG(DEBUG_ERR,(__location__ " Failed to add public address\n"));
2052                 return -1;
2053         }
2054
2055         return 0;
2056 }
2057
2058 /*
2059   called when releaseip event finishes for del_public_address
2060  */
2061 static void delete_ip_callback(struct ctdb_context *ctdb, int status, 
2062                                 void *private_data)
2063 {
2064         talloc_free(private_data);
2065 }
2066
2067 int32_t ctdb_control_del_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2068 {
2069         struct ctdb_control_ip_iface *pub = (struct ctdb_control_ip_iface *)indata.dptr;
2070         struct ctdb_vnn *vnn;
2071         int ret;
2072
2073         /* verify the size of indata */
2074         if (indata.dsize < offsetof(struct ctdb_control_ip_iface, iface)) {
2075                 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_ip_iface structure\n"));
2076                 return -1;
2077         }
2078         if (indata.dsize != 
2079                 ( offsetof(struct ctdb_control_ip_iface, iface)
2080                 + pub->len ) ){
2081
2082                 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2083                         "but should be %u bytes\n", 
2084                          (unsigned)indata.dsize, 
2085                          (unsigned)(offsetof(struct ctdb_control_ip_iface, iface)+pub->len)));
2086                 return -1;
2087         }
2088
2089         /* walk over all public addresses until we find a match */
2090         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2091                 if (ctdb_same_ip(&vnn->public_address, &pub->addr)) {
2092                         TALLOC_CTX *mem_ctx = talloc_new(ctdb);
2093
2094                         DLIST_REMOVE(ctdb->vnn, vnn);
2095
2096                         ret = ctdb_event_script_callback(ctdb, 
2097                                          timeval_set(ctdb->tunable.script_timeout, 0),
2098                                          mem_ctx, delete_ip_callback, mem_ctx,
2099                                          "releaseip %s %s %u",
2100                                          vnn->iface, 
2101                                          talloc_strdup(mem_ctx, ctdb_addr_to_str(&vnn->public_address)),
2102                                          vnn->public_netmask_bits);
2103                         talloc_free(vnn);
2104                         if (ret != 0) {
2105                                 return -1;
2106                         }
2107                         return 0;
2108                 }
2109         }
2110
2111         return -1;
2112 }
2113